From 200f84dd1fb0b91bb684aa72099c5886ea614425 Mon Sep 17 00:00:00 2001 From: zhangdingyun Date: Tue, 13 Aug 2024 18:23:30 +0800 Subject: [PATCH] feat: update --- src/config/argument_config.py | 8 ++++---- src/live_portrait_pipeline.py | 26 ++++++++++++++------------ src/utils/cropper.py | 6 +++--- 3 files changed, 21 insertions(+), 19 deletions(-) diff --git a/src/config/argument_config.py b/src/config/argument_config.py index 6599ce5..11fd3df 100644 --- a/src/config/argument_config.py +++ b/src/config/argument_config.py @@ -13,8 +13,8 @@ from .base_config import PrintableConfig, make_abs_path @dataclass(repr=False) # use repr from PrintableConfig class ArgumentConfig(PrintableConfig): ########## input arguments ########## - source: Annotated[str, tyro.conf.arg(aliases=["-s"])] = make_abs_path('../../assets/examples/source/s3.jpg') # path to the source portrait (human/animal) or video (human) - driving: Annotated[str, tyro.conf.arg(aliases=["-d"])] = make_abs_path('../../assets/examples/driving/d3.jpg') # path to driving video or template (.pkl format) + source: Annotated[str, tyro.conf.arg(aliases=["-s"])] = make_abs_path('../../assets/examples/driving/d0.mp4') # path to the source portrait (human/animal) or video (human) + driving: Annotated[str, tyro.conf.arg(aliases=["-d"])] = make_abs_path('../../assets/examples/driving/d6.pkl') # path to driving video or template (.pkl format) output_dir: Annotated[str, tyro.conf.arg(aliases=["-o"])] = 'animations/' # directory to save output video ########## inference arguments ########## @@ -22,7 +22,7 @@ class ArgumentConfig(PrintableConfig): flag_crop_driving_video: bool = False # whether to crop the driving video, if the given driving info is a video device_id: int = 0 # gpu device id flag_force_cpu: bool = False # force cpu inference, WIP! - flag_normalize_lip: bool = True # whether to let the lip to close state before animation, only take effect when flag_eye_retargeting and flag_lip_retargeting is False + flag_normalize_lip: bool = False # whether to let the lip to close state before animation, only take effect when flag_eye_retargeting and flag_lip_retargeting is False flag_source_video_eye_retargeting: bool = False # when the input is a source video, whether to let the eye-open scalar of each frame to be the same as the first source frame before the animation, only take effect when flag_eye_retargeting and flag_lip_retargeting is False, may cause the inter-frame jittering flag_video_editing_head_rotation: bool = False # when the input is a source video, whether to inherit the relative head rotation from the driving video flag_eye_retargeting: bool = False # not recommend to be True, WIP; whether to transfer the eyes-open ratio of each driving frame to the source image or the corresponding source frame @@ -35,7 +35,7 @@ class ArgumentConfig(PrintableConfig): driving_multiplier: float = 1.0 # be used only when driving_option is "expression-friendly" driving_smooth_observation_variance: float = 3e-7 # smooth strength scalar for the animated video when the input is a source video, the larger the number, the smoother the animated video; too much smoothness would result in loss of motion accuracy audio_priority: Literal['source', 'driving'] = 'driving' # whether to use the audio from source or driving video - animation_region: Literal["exp", "pose", "lip", "eyes", "all"] = "pose" # the region where the animation was performed, "exp" means the expression, "pose" means the head pose + animation_region: Literal["exp", "pose", "lip", "eyes", "all"] = "eyes" # the region where the animation was performed, "exp" means the expression, "pose" means the head pose ########## source crop arguments ########## det_thresh: float = 0.15 # detection threshold scale: float = 2.3 # the ratio of face area is smaller if scale is larger diff --git a/src/live_portrait_pipeline.py b/src/live_portrait_pipeline.py index dd66b89..e5067cc 100644 --- a/src/live_portrait_pipeline.py +++ b/src/live_portrait_pipeline.py @@ -290,6 +290,7 @@ class LivePortraitPipeline(object): R_d_0 = R_d_i x_d_0_info = x_d_i_info + delta_new = x_s_info['exp'].clone() if inf_cfg.flag_relative_motion: if flag_is_source_video: if inf_cfg.flag_video_editing_head_rotation: @@ -298,21 +299,18 @@ class LivePortraitPipeline(object): R_new = R_s else: if inf_cfg.animation_region == "all" or inf_cfg.animation_region == "pose": - delta_new = x_s_info['exp'] + R_new = (R_d_i @ R_d_0.permute(0, 2, 1)) @ R_s else: R_new = R_s if inf_cfg.animation_region == "all" or inf_cfg.animation_region == "exp": delta_new = x_d_exp_lst_smooth[i] if flag_is_source_video else x_s_info['exp'] + (x_d_i_info['exp'] - x_d_0_info['exp']) elif inf_cfg.animation_region == "lip": - delta_new = x_s_info['exp'] for lip_idx in [14, 17, 19, 20]: - delta_new[:, lip_idx, :] += (x_d_i_info['exp'][:, lip_idx, :] - x_d_0_info['exp'][:, lip_idx, :]) + delta_new[:, lip_idx, :] = x_d_exp_lst_smooth[i][lip_idx, :] if flag_is_source_video else (x_s_info['exp'] + (x_d_i_info['exp'] - x_d_0_info['exp']))[:, lip_idx, :] elif inf_cfg.animation_region == "eyes": - delta_new = x_s_info['exp'] for eyes_idx in [11, 13, 15, 16]: - delta_new[:, eyes_idx, :] += (x_d_i_info['exp'][:, eyes_idx, :] - x_d_0_info['exp'][:, eyes_idx, :]) - + delta_new[:, eyes_idx, :] = x_d_exp_lst_smooth[i][eyes_idx, :] if flag_is_source_video else (x_s_info['exp'] + (x_d_i_info['exp'] - x_d_0_info['exp']))[:, eyes_idx, :] if inf_cfg.animation_region == "all": scale_new = x_s_info['scale'] if flag_is_source_video else x_s_info['scale'] * (x_d_i_info['scale'] / x_d_0_info['scale']) else: @@ -329,20 +327,24 @@ class LivePortraitPipeline(object): R_new = R_s else: if inf_cfg.animation_region == "all" or inf_cfg.animation_region == "pose": - delta_new = x_s_info['exp'] + R_new = R_d_i else: R_new = R_s if inf_cfg.animation_region == "all" or inf_cfg.animation_region == "exp": - delta_new = x_d_exp_lst_smooth[i] if flag_is_source_video else x_d_i_info['exp'] + # delta_new = x_d_exp_lst_smooth[i] if flag_is_source_video else x_d_i_info['exp'] + for idx in [1,2,6,11,12,13,14,15,16,17,18,19,20]: + delta_new[:, idx, :] = x_d_exp_lst_smooth[i][idx, :] if flag_is_source_video else x_d_i_info['exp'][:, idx, :] + delta_new[:, 3:5, 1] = x_d_exp_lst_smooth[i][3:5, 1] if flag_is_source_video else x_d_i_info['exp'][:, 3:5, 1] + delta_new[:, 5, 2] = x_d_exp_lst_smooth[i][5, 2] if flag_is_source_video else x_d_i_info['exp'][:, 5, 2] + delta_new[:, 8, 2] = x_d_exp_lst_smooth[i][8, 2] if flag_is_source_video else x_d_i_info['exp'][:, 8, 2] + delta_new[:, 9, 1:] = x_d_exp_lst_smooth[i][9, 1:] if flag_is_source_video else x_d_i_info['exp'][:, 9, 1:] elif inf_cfg.animation_region == "lip": - delta_new = x_s_info['exp'] for lip_idx in [14, 17, 19, 20]: - delta_new[:, lip_idx, :] = x_d_i_info['exp'][:, lip_idx, :] + delta_new[:, lip_idx, :] = x_d_exp_lst_smooth[i][lip_idx, :] if flag_is_source_video else x_d_i_info['exp'][:, lip_idx, :] elif inf_cfg.animation_region == "eyes": - delta_new = x_s_info['exp'] for eyes_idx in [11, 13, 15, 16]: - delta_new[:, eyes_idx, :] = x_d_i_info['exp'][:, eyes_idx, :] + delta_new[:, eyes_idx, :] = x_d_exp_lst_smooth[i][eyes_idx, :] if flag_is_source_video else x_d_i_info['exp'][:, eyes_idx, :] scale_new = x_s_info['scale'] if inf_cfg.animation_region == "all" or inf_cfg.animation_region == "pose": t_new = x_d_i_info['t'] diff --git a/src/utils/cropper.py b/src/utils/cropper.py index 97e26c7..10b4370 100644 --- a/src/utils/cropper.py +++ b/src/utils/cropper.py @@ -66,14 +66,14 @@ class Cropper(object): providers=face_analysis_wrapper_provider, ) self.face_analysis_wrapper.prepare(ctx_id=device_id, det_size=(512, 512), det_thresh=self.crop_cfg.det_thresh) - self.face_analysis_wrapper.warmup() + # self.face_analysis_wrapper.warmup() self.human_landmark_runner = HumanLandmark( ckpt_path=self.crop_cfg.landmark_ckpt_path, onnx_provider=device, device_id=device_id, ) - self.human_landmark_runner.warmup() + # self.human_landmark_runner.warmup() if self.image_type == "animal_face": from .animal_landmark_runner import XPoseRunner as AnimalLandmarkRunner @@ -83,7 +83,7 @@ class Cropper(object): embeddings_cache_path=self.crop_cfg.xpose_embedding_cache_path, flag_use_half_precision=kwargs.get("flag_use_half_precision", True), ) - self.animal_landmark_runner.warmup() + # self.animal_landmark_runner.warmup() def update_config(self, user_args): for k, v in user_args.items():