feat: update

2025-03-15 05:52:58 +00:00 · 2024-08-13 18:23:30 +08:00 · 2024-08-13 18:23:30 +08:00 · 200f84dd1f
commit 200f84dd1f
parent 2638f3b10a
3 changed files with 21 additions and 19 deletions
--- a/src/config/argument_config.py
+++ b/src/config/argument_config.py
@ -13,8 +13,8 @@ from .base_config import PrintableConfig, make_abs_path
@dataclass(repr=False)  # use repr from PrintableConfig
 class ArgumentConfig(PrintableConfig):
    ########## input arguments ##########
-    source: Annotated[str, tyro.conf.arg(aliases=["-s"])] = make_abs_path('../../assets/examples/source/s3.jpg')  # path to the source portrait (human/animal) or video (human)
-    driving:  Annotated[str, tyro.conf.arg(aliases=["-d"])] = make_abs_path('../../assets/examples/driving/d3.jpg')  # path to driving video or template (.pkl format)
+    source: Annotated[str, tyro.conf.arg(aliases=["-s"])] = make_abs_path('../../assets/examples/driving/d0.mp4')  # path to the source portrait (human/animal) or video (human)
+    driving:  Annotated[str, tyro.conf.arg(aliases=["-d"])] = make_abs_path('../../assets/examples/driving/d6.pkl')  # path to driving video or template (.pkl format)
    output_dir: Annotated[str, tyro.conf.arg(aliases=["-o"])] = 'animations/'  # directory to save output video

    ########## inference arguments ##########
@ -22,7 +22,7 @@ class ArgumentConfig(PrintableConfig):
    flag_crop_driving_video: bool = False  # whether to crop the driving video, if the given driving info is a video
    device_id: int = 0  # gpu device id
    flag_force_cpu: bool = False  # force cpu inference, WIP!
-    flag_normalize_lip: bool = True  # whether to let the lip to close state before animation, only take effect when flag_eye_retargeting and flag_lip_retargeting is False
+    flag_normalize_lip: bool = False  # whether to let the lip to close state before animation, only take effect when flag_eye_retargeting and flag_lip_retargeting is False
    flag_source_video_eye_retargeting: bool = False  # when the input is a source video, whether to let the eye-open scalar of each frame to be the same as the first source frame before the animation, only take effect when flag_eye_retargeting and flag_lip_retargeting is False, may cause the inter-frame jittering
    flag_video_editing_head_rotation: bool = False  # when the input is a source video, whether to inherit the relative head rotation from the driving video
    flag_eye_retargeting: bool = False  # not recommend to be True, WIP; whether to transfer the eyes-open ratio of each driving frame to the source image or the corresponding source frame
@ -35,7 +35,7 @@ class ArgumentConfig(PrintableConfig):
    driving_multiplier: float = 1.0 # be used only when driving_option is "expression-friendly"
    driving_smooth_observation_variance: float = 3e-7  # smooth strength scalar for the animated video when the input is a source video, the larger the number, the smoother the animated video; too much smoothness would result in loss of motion accuracy
    audio_priority: Literal['source', 'driving'] = 'driving'  # whether to use the audio from source or driving video
-    animation_region: Literal["exp", "pose", "lip", "eyes", "all"] = "pose" # the region where the animation was performed, "exp" means the expression, "pose" means the head pose
+    animation_region: Literal["exp", "pose", "lip", "eyes", "all"] = "eyes" # the region where the animation was performed, "exp" means the expression, "pose" means the head pose
    ########## source crop arguments ##########
    det_thresh: float = 0.15 # detection threshold
    scale: float = 2.3  # the ratio of face area is smaller if scale is larger
--- a/src/live_portrait_pipeline.py
+++ b/src/live_portrait_pipeline.py
@ -290,6 +290,7 @@ class LivePortraitPipeline(object):
                R_d_0 = R_d_i
                x_d_0_info = x_d_i_info

+            delta_new = x_s_info['exp'].clone()
            if inf_cfg.flag_relative_motion:
                if flag_is_source_video:
                    if inf_cfg.flag_video_editing_head_rotation:
@ -298,21 +299,18 @@ class LivePortraitPipeline(object):
                        R_new = R_s
                else:
                    if inf_cfg.animation_region == "all" or inf_cfg.animation_region == "pose":
-                        delta_new = x_s_info['exp']
+
                        R_new = (R_d_i @ R_d_0.permute(0, 2, 1)) @ R_s
                    else:
                        R_new = R_s
                if inf_cfg.animation_region == "all" or inf_cfg.animation_region == "exp":
                    delta_new = x_d_exp_lst_smooth[i] if flag_is_source_video else x_s_info['exp'] + (x_d_i_info['exp'] - x_d_0_info['exp'])
                elif inf_cfg.animation_region == "lip":
-                    delta_new = x_s_info['exp']
                    for lip_idx in [14, 17, 19, 20]:
-                        delta_new[:, lip_idx, :] += (x_d_i_info['exp'][:, lip_idx, :] - x_d_0_info['exp'][:, lip_idx, :])
+                        delta_new[:, lip_idx, :] =  x_d_exp_lst_smooth[i][lip_idx, :] if flag_is_source_video else (x_s_info['exp'] + (x_d_i_info['exp'] - x_d_0_info['exp']))[:, lip_idx, :]
                elif inf_cfg.animation_region == "eyes":
-                    delta_new = x_s_info['exp']
                    for eyes_idx in [11, 13, 15, 16]:
-                        delta_new[:, eyes_idx, :] += (x_d_i_info['exp'][:, eyes_idx, :] - x_d_0_info['exp'][:, eyes_idx, :])
-
+                        delta_new[:, eyes_idx, :] = x_d_exp_lst_smooth[i][eyes_idx, :] if flag_is_source_video else (x_s_info['exp'] + (x_d_i_info['exp'] - x_d_0_info['exp']))[:, eyes_idx, :]
                if inf_cfg.animation_region == "all":
                    scale_new = x_s_info['scale'] if flag_is_source_video else x_s_info['scale'] * (x_d_i_info['scale'] / x_d_0_info['scale'])
                else:
@ -329,20 +327,24 @@ class LivePortraitPipeline(object):
                        R_new = R_s
                else:
                    if inf_cfg.animation_region == "all" or inf_cfg.animation_region == "pose":
-                        delta_new = x_s_info['exp']
+
                        R_new = R_d_i
                    else:
                        R_new = R_s
                if inf_cfg.animation_region == "all" or inf_cfg.animation_region == "exp":
-                    delta_new = x_d_exp_lst_smooth[i] if flag_is_source_video else x_d_i_info['exp']
+                    # delta_new = x_d_exp_lst_smooth[i] if flag_is_source_video else x_d_i_info['exp']
+                    for idx in [1,2,6,11,12,13,14,15,16,17,18,19,20]:
+                        delta_new[:, idx, :] = x_d_exp_lst_smooth[i][idx, :] if flag_is_source_video else x_d_i_info['exp'][:, idx, :]
+                    delta_new[:, 3:5, 1] = x_d_exp_lst_smooth[i][3:5, 1] if flag_is_source_video else x_d_i_info['exp'][:, 3:5, 1]
+                    delta_new[:, 5, 2] = x_d_exp_lst_smooth[i][5, 2] if flag_is_source_video else x_d_i_info['exp'][:, 5, 2]
+                    delta_new[:, 8, 2] = x_d_exp_lst_smooth[i][8, 2] if flag_is_source_video else x_d_i_info['exp'][:, 8, 2]
+                    delta_new[:, 9, 1:] = x_d_exp_lst_smooth[i][9, 1:] if flag_is_source_video else x_d_i_info['exp'][:, 9, 1:]
                elif inf_cfg.animation_region == "lip":
-                    delta_new = x_s_info['exp']
                    for lip_idx in [14, 17, 19, 20]:
-                        delta_new[:, lip_idx, :] = x_d_i_info['exp'][:, lip_idx, :]
+                        delta_new[:, lip_idx, :] = x_d_exp_lst_smooth[i][lip_idx, :] if flag_is_source_video else x_d_i_info['exp'][:, lip_idx, :]
                elif inf_cfg.animation_region == "eyes":
-                    delta_new = x_s_info['exp']
                    for eyes_idx in [11, 13, 15, 16]:
-                        delta_new[:, eyes_idx, :] = x_d_i_info['exp'][:, eyes_idx, :]
+                        delta_new[:, eyes_idx, :] = x_d_exp_lst_smooth[i][eyes_idx, :] if flag_is_source_video else x_d_i_info['exp'][:, eyes_idx, :]
                scale_new = x_s_info['scale']
                if inf_cfg.animation_region == "all" or inf_cfg.animation_region == "pose":
                    t_new = x_d_i_info['t']
--- a/src/utils/cropper.py
+++ b/src/utils/cropper.py
@ -66,14 +66,14 @@ class Cropper(object):
                    providers=face_analysis_wrapper_provider,
                )
        self.face_analysis_wrapper.prepare(ctx_id=device_id, det_size=(512, 512), det_thresh=self.crop_cfg.det_thresh)
-        self.face_analysis_wrapper.warmup()
+        # self.face_analysis_wrapper.warmup()

        self.human_landmark_runner = HumanLandmark(
            ckpt_path=self.crop_cfg.landmark_ckpt_path,
            onnx_provider=device,
            device_id=device_id,
        )
-        self.human_landmark_runner.warmup()
+        # self.human_landmark_runner.warmup()

        if self.image_type == "animal_face":
            from .animal_landmark_runner import XPoseRunner as AnimalLandmarkRunner
@ -83,7 +83,7 @@ class Cropper(object):
                    embeddings_cache_path=self.crop_cfg.xpose_embedding_cache_path,
                    flag_use_half_precision=kwargs.get("flag_use_half_precision", True),
                )
-            self.animal_landmark_runner.warmup()
+            # self.animal_landmark_runner.warmup()

    def update_config(self, user_args):
        for k, v in user_args.items():