From 8c91177935d00d19e703e4694170be856ace5a4e Mon Sep 17 00:00:00 2001
From: zhangdingyun <zhangdingyun@kuaishou.com>
Date: Fri, 16 Aug 2024 21:53:16 +0800
Subject: [PATCH] feat: update

---
 app.py                        | 68 -----------------------------------
 src/config/argument_config.py |  6 ++--
 src/gradio_pipeline.py        |  4 +--
 3 files changed, 5 insertions(+), 73 deletions(-)

diff --git a/app.py b/app.py
index 12b4dbf..50c7285 100644
--- a/app.py
+++ b/app.py
@@ -126,74 +126,6 @@ retargeting_output_image_paste_back = gr.Image(type="numpy")
 output_video = gr.Video(autoplay=False)
 output_video_paste_back = gr.Video(autoplay=False)
 
-"""
-每个点和每个维度对应的表情：
-(0,0): 头顶左右偏
-(0,1): 头顶上下偏
-(0,2): 头顶前后偏
-(1,0): 眉毛上下，眼睛左右
-(1,1): 眉毛上下，眼睛上下
-(1,2): 嘴巴和眼睛的动作
-(2,0): 眉毛上下，眼睛左右
-(2,1): 眉毛上下，眼睛上下
-(2,2): 嘴巴动作
-(3,0): 左脸胖瘦, 眉毛上下
-(3,1): 左脸上下，眉毛上下
-(3,2): 左脸前后，会变形
-(4,0): 右脸胖瘦
-(4,1): 右脸上下
-(4,2): 右脸前后，会变形
-(5,0): 头左右平移
-(5,1): 头上下平移
-(5,2): 嘴部动作
-(6,0): 嘴部动作
-(6,1): 嘴部动作
-(6,2): 嘴部动作
-(7,0): 右脸胖瘦
-(7,1): 右脸上下
-(7,2): 右脸前后
-(8,0): 右脸胖瘦
-(8,1): 右脸上下
-(8,2): 嘴部动作
-(9,0): 下巴胖瘦
-(9,1): 嘴部动作
-(9,2): 眼部动作
-(10,0): 左边放缩
-(10,1): 左边放缩，眼部动作
-(10,2): 下巴放缩
-(11,0): 左眼左右转
-(11,1): 左眼上下睁开闭合
-(11,2): 左眼前后
-(12,0): 嘴部动作
-(12,1): 无明显
-(12,2): 嘴部动作
-(13,0): 眼部动作
-(13,1): 眼部动作
-(13,2): 眼部动作
-(14,0): 嘴部动作
-(14,1): 嘴部动作
-(14,2): 嘴部动作
-(15,0): 眼部动作
-(15,1): 眼部动作，嘴部动作
-(15,2): 眼部动作
-(16,0): 眼睛
-(16,1): 右眼睁开闭合，嘴部动作
-(16,2): 眼部动作
-(17,0): 嘴部动作，眼部动作
-(17,1): 嘴部动作，眼部动作
-(17,2): 撅嘴，拉平嘴
-(18,0): 眼部方向
-(18,1): 眼部上下
-(18,2): 嘴部动作，眼部动作
-(19,0): 撇嘴
-(19,1): 张开闭合嘴
-(19,2): 内收外翻嘴
-(20,0): 下弯嘴
-(20,1): 露牙，闭合牙
-(20,2): 下拉嘴，哦形嘴
-"""
-
-
 with gr.Blocks(theme=gr.themes.Soft(font=[gr.themes.GoogleFont("Plus Jakarta Sans")])) as demo:
     gr.HTML(load_description(title_md))
 
diff --git a/src/config/argument_config.py b/src/config/argument_config.py
index aa482b8..f4a3b7d 100644
--- a/src/config/argument_config.py
+++ b/src/config/argument_config.py
@@ -13,8 +13,8 @@ from .base_config import PrintableConfig, make_abs_path
 @dataclass(repr=False)  # use repr from PrintableConfig
 class ArgumentConfig(PrintableConfig):
     ########## input arguments ##########
-    source: Annotated[str, tyro.conf.arg(aliases=["-s"])] = make_abs_path('../../assets/examples/source/s3.jpg')  # path to the source portrait (human/animal) or video (human)
-    driving:  Annotated[str, tyro.conf.arg(aliases=["-d"])] = make_abs_path('../../assets/examples/driving/d30.jpg')  # path to driving video or template (.pkl format)
+    source: Annotated[str, tyro.conf.arg(aliases=["-s"])] = make_abs_path('../../assets/examples/source/s0.jpg')  # path to the source portrait (human/animal) or video (human)
+    driving:  Annotated[str, tyro.conf.arg(aliases=["-d"])] = make_abs_path('../../assets/examples/driving/d0.mp4')  # path to driving video or template (.pkl format)
     output_dir: Annotated[str, tyro.conf.arg(aliases=["-o"])] = 'animations/'  # directory to save output video
 
     ########## inference arguments ##########
@@ -34,7 +34,7 @@ class ArgumentConfig(PrintableConfig):
     driving_multiplier: float = 1.0 # be used only when driving_option is "expression-friendly"
     driving_smooth_observation_variance: float = 3e-7  # smooth strength scalar for the animated video when the input is a source video, the larger the number, the smoother the animated video; too much smoothness would result in loss of motion accuracy
     audio_priority: Literal['source', 'driving'] = 'driving'  # whether to use the audio from source or driving video
-    animation_region: Literal["exp", "pose", "lip", "eyes", "all"] = "all" # the region where the animation was performed, "exp" means the expression, "pose" means the head pose
+    animation_region: Literal["exp", "pose", "lip", "eyes", "all"] = "exp" # the region where the animation was performed, "exp" means the expression, "pose" means the head pose
     ########## source crop arguments ##########
     det_thresh: float = 0.15 # detection threshold
     scale: float = 2.3  # the ratio of face area is smaller if scale is larger
diff --git a/src/gradio_pipeline.py b/src/gradio_pipeline.py
index 8a67b40..263f14d 100644
--- a/src/gradio_pipeline.py
+++ b/src/gradio_pipeline.py
@@ -431,7 +431,7 @@ class GradioPipeline(LivePortraitPipeline):
         else:
             inference_cfg = self.live_portrait_wrapper.inference_cfg
             f_s_user_lst, x_s_user_lst, x_d_i_new_lst, source_M_c2o_lst, mask_ori_lst, source_rgb_lst, img_crop_256x256_lst, source_fps, n_frames = \
-                self.prepare_video_lip_silence(input_video, device, driving_smooth_observation_variance_retargeting, flag_do_crop=flag_do_crop_input_retargeting_video)
+                self.prepare_video_lip_silence(input_video, device, flag_do_crop=flag_do_crop_input_retargeting_video)
 
             I_p_pstbk_lst = None
             if flag_do_crop_input_retargeting_video:
@@ -540,7 +540,7 @@ class GradioPipeline(LivePortraitPipeline):
             raise gr.Error("Please upload a source video as the retargeting input 🤗🤗🤗", duration=5)
 
     @torch.no_grad()
-    def prepare_video_lip_silence(self, input_video, device, driving_smooth_observation_variance_retargeting, flag_do_crop=True):
+    def prepare_video_lip_silence(self, input_video, device, flag_do_crop=True):
         """ for keeping lips in the source video silent
         """
         if input_video is not None: