feat: update

2025-03-14 21:22:43 +00:00 · 2024-08-16 21:53:16 +08:00 · 2024-08-16 21:53:16 +08:00 · 8c91177935
commit 8c91177935
parent 18ebcc8b61
3 changed files with 5 additions and 73 deletions
--- a/app.py
+++ b/app.py
@ -126,74 +126,6 @@ retargeting_output_image_paste_back = gr.Image(type="numpy")
 output_video = gr.Video(autoplay=False)
 output_video_paste_back = gr.Video(autoplay=False)
 """
 每个点和每个维度对应的表情：
 (0,0): 头顶左右偏
 (0,1): 头顶上下偏
 (0,2): 头顶前后偏
 (1,0): 眉毛上下，眼睛左右
 (1,1): 眉毛上下，眼睛上下
 (1,2): 嘴巴和眼睛的动作
 (2,0): 眉毛上下，眼睛左右
 (2,1): 眉毛上下，眼睛上下
 (2,2): 嘴巴动作
 (3,0): 左脸胖瘦, 眉毛上下
 (3,1): 左脸上下，眉毛上下
 (3,2): 左脸前后，会变形
 (4,0): 右脸胖瘦
 (4,1): 右脸上下
 (4,2): 右脸前后，会变形
 (5,0): 头左右平移
 (5,1): 头上下平移
 (5,2): 嘴部动作
 (6,0): 嘴部动作
 (6,1): 嘴部动作
 (6,2): 嘴部动作
 (7,0): 右脸胖瘦
 (7,1): 右脸上下
 (7,2): 右脸前后
 (8,0): 右脸胖瘦
 (8,1): 右脸上下
 (8,2): 嘴部动作
 (9,0): 下巴胖瘦
 (9,1): 嘴部动作
 (9,2): 眼部动作
 (10,0): 左边放缩
 (10,1): 左边放缩，眼部动作
 (10,2): 下巴放缩
 (11,0): 左眼左右转
 (11,1): 左眼上下睁开闭合
 (11,2): 左眼前后
 (12,0): 嘴部动作
 (12,1): 无明显
 (12,2): 嘴部动作
 (13,0): 眼部动作
 (13,1): 眼部动作
 (13,2): 眼部动作
 (14,0): 嘴部动作
 (14,1): 嘴部动作
 (14,2): 嘴部动作
 (15,0): 眼部动作
 (15,1): 眼部动作，嘴部动作
 (15,2): 眼部动作
 (16,0): 眼睛
 (16,1): 右眼睁开闭合，嘴部动作
 (16,2): 眼部动作
 (17,0): 嘴部动作，眼部动作
 (17,1): 嘴部动作，眼部动作
 (17,2): 撅嘴，拉平嘴
 (18,0): 眼部方向
 (18,1): 眼部上下
 (18,2): 嘴部动作，眼部动作
 (19,0): 撇嘴
 (19,1): 张开闭合嘴
 (19,2): 内收外翻嘴
 (20,0): 下弯嘴
 (20,1): 露牙，闭合牙
 (20,2): 下拉嘴，哦形嘴
 """
 with gr.Blocks(theme=gr.themes.Soft(font=[gr.themes.GoogleFont("Plus Jakarta Sans")])) as demo:
    gr.HTML(load_description(title_md))
--- a/src/config/argument_config.py
+++ b/src/config/argument_config.py
@ -13,8 +13,8 @@ from .base_config import PrintableConfig, make_abs_path
@dataclass(repr=False)  # use repr from PrintableConfig
 class ArgumentConfig(PrintableConfig):
    ########## input arguments ##########
-    source: Annotated[str, tyro.conf.arg(aliases=["-s"])] = make_abs_path('../../assets/examples/source/s3.jpg')  # path to the source portrait (human/animal) or video (human)
+    source: Annotated[str, tyro.conf.arg(aliases=["-s"])] = make_abs_path('../../assets/examples/source/s0.jpg')  # path to the source portrait (human/animal) or video (human)
-    driving:  Annotated[str, tyro.conf.arg(aliases=["-d"])] = make_abs_path('../../assets/examples/driving/d30.jpg')  # path to driving video or template (.pkl format)
+    driving:  Annotated[str, tyro.conf.arg(aliases=["-d"])] = make_abs_path('../../assets/examples/driving/d0.mp4')  # path to driving video or template (.pkl format)
    output_dir: Annotated[str, tyro.conf.arg(aliases=["-o"])] = 'animations/'  # directory to save output video
    ########## inference arguments ##########
@ -34,7 +34,7 @@ class ArgumentConfig(PrintableConfig):
    driving_multiplier: float = 1.0 # be used only when driving_option is "expression-friendly"
    driving_smooth_observation_variance: float = 3e-7  # smooth strength scalar for the animated video when the input is a source video, the larger the number, the smoother the animated video; too much smoothness would result in loss of motion accuracy
    audio_priority: Literal['source', 'driving'] = 'driving'  # whether to use the audio from source or driving video
-    animation_region: Literal["exp", "pose", "lip", "eyes", "all"] = "all" # the region where the animation was performed, "exp" means the expression, "pose" means the head pose
+    animation_region: Literal["exp", "pose", "lip", "eyes", "all"] = "exp" # the region where the animation was performed, "exp" means the expression, "pose" means the head pose
    ########## source crop arguments ##########
    det_thresh: float = 0.15 # detection threshold
    scale: float = 2.3  # the ratio of face area is smaller if scale is larger
--- a/src/gradio_pipeline.py
+++ b/src/gradio_pipeline.py
@ -431,7 +431,7 @@ class GradioPipeline(LivePortraitPipeline):
        else:
            inference_cfg = self.live_portrait_wrapper.inference_cfg
            f_s_user_lst, x_s_user_lst, x_d_i_new_lst, source_M_c2o_lst, mask_ori_lst, source_rgb_lst, img_crop_256x256_lst, source_fps, n_frames = \
-                self.prepare_video_lip_silence(input_video, device, driving_smooth_observation_variance_retargeting, flag_do_crop=flag_do_crop_input_retargeting_video)
+                self.prepare_video_lip_silence(input_video, device, flag_do_crop=flag_do_crop_input_retargeting_video)
            I_p_pstbk_lst = None
            if flag_do_crop_input_retargeting_video:
@ -540,7 +540,7 @@ class GradioPipeline(LivePortraitPipeline):
            raise gr.Error("Please upload a source video as the retargeting input 🤗🤗🤗", duration=5)
    @torch.no_grad()
-    def prepare_video_lip_silence(self, input_video, device, driving_smooth_observation_variance_retargeting, flag_do_crop=True):
+    def prepare_video_lip_silence(self, input_video, device, flag_do_crop=True):
        """ for keeping lips in the source video silent
        """
        if input_video is not None: