From 172b852127b1f2f6595786e4d6af8f0675dd8c25 Mon Sep 17 00:00:00 2001 From: Jianzhu Guo Date: Thu, 25 Jul 2024 17:56:40 +0800 Subject: [PATCH] fix: v2v (#217) * chore: fix v2v lip normalization and add smooth to non relative motion * chore: fix v2v lip normalization and add smooth to non relative motion * chore: fix v2v lip normalization and add smooth to non relative motion --------- Co-authored-by: zhangdingyun --- src/config/argument_config.py | 4 ++-- src/live_portrait_pipeline.py | 36 +++++++++++++++++++++++++---------- 2 files changed, 28 insertions(+), 12 deletions(-) diff --git a/src/config/argument_config.py b/src/config/argument_config.py index 6653f9c..5930dbe 100644 --- a/src/config/argument_config.py +++ b/src/config/argument_config.py @@ -26,8 +26,8 @@ class ArgumentConfig(PrintableConfig): flag_normalize_lip: bool = True # whether to let the lip to close state before animation, only take effect when flag_eye_retargeting and flag_lip_retargeting is False flag_source_video_eye_retargeting: bool = False # when the input is a source video, whether to let the eye-open scalar of each frame to be the same as the first source frame before the animation, only take effect when flag_eye_retargeting and flag_lip_retargeting is False, may cause the inter-frame jittering flag_video_editing_head_rotation: bool = False # when the input is a source video, whether to inherit the relative head rotation from the driving video - flag_eye_retargeting: bool = False # not recommend to be True, WIP - flag_lip_retargeting: bool = False # not recommend to be True, WIP + flag_eye_retargeting: bool = False # not recommend to be True, WIP; whether to transfer the eyes-open ratio of each driving frame to the source image or the corresponding source frame + flag_lip_retargeting: bool = False # not recommend to be True, WIP; whether to transfer the lip-open ratio of each driving frame to the source image or the corresponding source frame flag_stitching: bool = True # recommend to True if head movement is small, False if head movement is large flag_relative_motion: bool = True # whether to use relative motion flag_pasteback: bool = True # whether to paste-back/stitch the animated face cropping from the face-cropping space to the original image space diff --git a/src/live_portrait_pipeline.py b/src/live_portrait_pipeline.py index 9eccc8e..95c2a50 100644 --- a/src/live_portrait_pipeline.py +++ b/src/live_portrait_pipeline.py @@ -193,12 +193,20 @@ class LivePortraitPipeline(object): I_s_lst = self.live_portrait_wrapper.prepare_videos(img_crop_256x256_lst) source_template_dct = self.make_motion_template(I_s_lst, c_s_eyes_lst, c_s_lip_lst, output_fps=source_fps) - x_d_exp_lst = [source_template_dct['motion'][i]['exp'] + driving_template_dct['motion'][i]['exp'] - driving_template_dct['motion'][0]['exp'] for i in range(n_frames)] - x_d_exp_lst_smooth = smooth(x_d_exp_lst, source_template_dct['motion'][0]['exp'].shape, device, inf_cfg.driving_smooth_observation_variance) - if inf_cfg.flag_video_editing_head_rotation: - key_r = 'R' if 'R' in driving_template_dct['motion'][0].keys() else 'R_d' # compatible with previous keys - x_d_r_lst = [(np.dot(driving_template_dct['motion'][i][key_r], driving_template_dct['motion'][0][key_r].transpose(0, 2, 1))) @ source_template_dct['motion'][i]['R'] for i in range(n_frames)] - x_d_r_lst_smooth = smooth(x_d_r_lst, source_template_dct['motion'][0]['R'].shape, device, inf_cfg.driving_smooth_observation_variance) + key_r = 'R' if 'R' in driving_template_dct['motion'][0].keys() else 'R_d' # compatible with previous keys + if inf_cfg.flag_relative_motion: + x_d_exp_lst = [source_template_dct['motion'][i]['exp'] + driving_template_dct['motion'][i]['exp'] - driving_template_dct['motion'][0]['exp'] for i in range(n_frames)] + x_d_exp_lst_smooth = smooth(x_d_exp_lst, source_template_dct['motion'][0]['exp'].shape, device, inf_cfg.driving_smooth_observation_variance) + if inf_cfg.flag_video_editing_head_rotation: + x_d_r_lst = [(np.dot(driving_template_dct['motion'][i][key_r], driving_template_dct['motion'][0][key_r].transpose(0, 2, 1))) @ source_template_dct['motion'][i]['R'] for i in range(n_frames)] + x_d_r_lst_smooth = smooth(x_d_r_lst, source_template_dct['motion'][0]['R'].shape, device, inf_cfg.driving_smooth_observation_variance) + else: + x_d_exp_lst = [driving_template_dct['motion'][i]['exp'] for i in range(n_frames)] + x_d_exp_lst_smooth = smooth(x_d_exp_lst, source_template_dct['motion'][0]['exp'].shape, device, inf_cfg.driving_smooth_observation_variance) + if inf_cfg.flag_video_editing_head_rotation: + x_d_r_lst = [driving_template_dct['motion'][i][key_r] for i in range(n_frames)] + x_d_r_lst_smooth = smooth(x_d_r_lst, source_template_dct['motion'][0]['R'].shape, device, inf_cfg.driving_smooth_observation_variance) + else: # if the input is a source image, process it only once if inf_cfg.flag_do_crop: crop_info = self.cropper.crop_source_image(source_rgb_lst[0], crop_cfg) @@ -217,7 +225,7 @@ class LivePortraitPipeline(object): x_s = self.live_portrait_wrapper.transform_keypoint(x_s_info) # let lip-open scalar to be 0 at first - if flag_normalize_lip and source_lmk is not None: + if flag_normalize_lip and inf_cfg.flag_relative_motion and source_lmk is not None: c_d_lip_before_animation = [0.] combined_lip_ratio_tensor_before_animation = self.live_portrait_wrapper.calc_combined_lip_ratio(c_d_lip_before_animation, source_lmk) if combined_lip_ratio_tensor_before_animation[0][0] >= inf_cfg.lip_normalize_threshold: @@ -244,11 +252,13 @@ class LivePortraitPipeline(object): x_s = self.live_portrait_wrapper.transform_keypoint(x_s_info) # let lip-open scalar to be 0 at first if the input is a video - if flag_normalize_lip and source_lmk is not None: + if flag_normalize_lip and inf_cfg.flag_relative_motion and source_lmk is not None: c_d_lip_before_animation = [0.] combined_lip_ratio_tensor_before_animation = self.live_portrait_wrapper.calc_combined_lip_ratio(c_d_lip_before_animation, source_lmk) if combined_lip_ratio_tensor_before_animation[0][0] >= inf_cfg.lip_normalize_threshold: lip_delta_before_animation = self.live_portrait_wrapper.retarget_lip(x_s, combined_lip_ratio_tensor_before_animation) + else: + lip_delta_before_animation = None # let eye-open scalar to be the same as the first frame if the latter is eye-open state if flag_source_video_eye_retargeting and source_lmk is not None: @@ -284,8 +294,14 @@ class LivePortraitPipeline(object): scale_new = x_s_info['scale'] if flag_is_source_video else x_s_info['scale'] * (x_d_i_info['scale'] / x_d_0_info['scale']) t_new = x_s_info['t'] if flag_is_source_video else x_s_info['t'] + (x_d_i_info['t'] - x_d_0_info['t']) else: - R_new = R_d_i - delta_new = x_d_i_info['exp'] + if flag_is_source_video: + if inf_cfg.flag_video_editing_head_rotation: + R_new = x_d_r_lst_smooth[i] + else: + R_new = R_s + else: + R_new = R_d_i + delta_new = x_d_exp_lst_smooth[i] if flag_is_source_video else x_d_i_info['exp'] scale_new = x_s_info['scale'] t_new = x_d_i_info['t']