feat: image driven, regional animation

2025-03-14 21:22:43 +00:00 · 2024-08-16 22:03:21 +08:00 · 2024-08-16 22:03:21 +08:00 · fbb8830b65
commit fbb8830b65
parent 8c91177935
2 changed files with 0 additions and 9 deletions
--- a/src/gradio_pipeline.py
+++ b/src/gradio_pipeline.py
@ -533,7 +533,6 @@ class GradioPipeline(LivePortraitPipeline):
                f_s_user_lst.append(f_s_user); x_s_user_lst.append(x_s_user); lip_delta_retargeting_lst.append(lip_delta_retargeting.cpu().numpy().astype(np.float32))
            lip_delta_retargeting_lst_smooth = smooth(lip_delta_retargeting_lst, lip_delta_retargeting_lst[0].shape, device, driving_smooth_observation_variance_retargeting)

-
            return f_s_user_lst, x_s_user_lst, source_lmk_crop_lst, source_M_c2o_lst, mask_ori_lst, source_rgb_lst, img_crop_256x256_lst, lip_delta_retargeting_lst_smooth, source_fps, n_frames
        else:
            # when press the clear button, go here
@ -584,9 +583,7 @@ class GradioPipeline(LivePortraitPipeline):
                I_s = I_s_lst[i]
                f_s_user = self.live_portrait_wrapper.extract_feature_3d(I_s)
                x_d_i_new = x_s_info['scale'] * (x_s_info['kp'] @ x_s_info['R'] + delta_new + torch.from_numpy(inference_cfg.lip_array).to(dtype=torch.float32, device=device)) + x_s_info['t']
-
                f_s_user_lst.append(f_s_user); x_s_user_lst.append(x_s_user); x_d_i_new_lst.append(x_d_i_new)
-
            return f_s_user_lst, x_s_user_lst, x_d_i_new_lst, source_M_c2o_lst, mask_ori_lst, source_rgb_lst, img_crop_256x256_lst, source_fps, n_frames
        else:
            # when press the clear button, go here
--- a/src/live_portrait_pipeline.py
+++ b/src/live_portrait_pipeline.py
@ -319,8 +319,6 @@ class LivePortraitPipeline(object):
            if i == 0:  # cache the first frame
                R_d_0 = R_d_i
                x_d_0_info = x_d_i_info.copy()
-                # if not flag_is_driving_video:
-                #     x_d_0_info['exp'] = 0

            delta_new = x_s_info['exp'].clone()
            if inf_cfg.flag_relative_motion:
@ -329,7 +327,6 @@ class LivePortraitPipeline(object):
                else:
                    R_new = R_s
                if inf_cfg.animation_region == "all" or inf_cfg.animation_region == "exp":
-                    # delta_new = x_d_exp_lst_smooth[i] if flag_is_source_video else x_s_info['exp'] + (x_d_i_info['exp'] - x_d_0_info['exp'])
                    if flag_is_source_video:
                        for idx in [1,2,6,11,12,13,14,15,16,17,18,19,20]:
                            delta_new[:, idx, :] = x_d_exp_lst_smooth[i][idx, :]
@ -341,7 +338,6 @@ class LivePortraitPipeline(object):
                        if flag_is_driving_video:
                            delta_new = x_s_info['exp'] + (x_d_i_info['exp'] - x_d_0_info['exp'])
                        else:
-                            # delta_new = x_s_info['exp'] + (x_d_i_info['exp'] - 0) if x_d_i_info['exp'].mean() > 0 else x_s_info['exp'] + (x_d_i_info['exp'] - torch.from_numpy(inf_cfg.lip_array).to(dtype=torch.float32, device=device))
                            delta_new = x_s_info['exp'] + (x_d_i_info['exp'] - torch.from_numpy(inf_cfg.lip_array).to(dtype=torch.float32, device=device))
                elif inf_cfg.animation_region == "lip":
                    for lip_idx in [6, 12, 14, 17, 19, 20]:
@ -350,7 +346,6 @@ class LivePortraitPipeline(object):
                        elif flag_is_driving_video:
                            delta_new[:, lip_idx, :] = (x_s_info['exp'] + (x_d_i_info['exp'] - x_d_0_info['exp']))[:, lip_idx, :]
                        else:
-                            # delta_new[:, lip_idx, :] = (x_s_info['exp'] + (x_d_i_info['exp'] - 0))[:, lip_idx, :] if x_d_i_info['exp'].mean() > 0 else (x_s_info['exp'] + (x_d_i_info['exp'] - torch.from_numpy(inf_cfg.lip_array).to(dtype=torch.float32, device=device)))[:, lip_idx, :]
                            delta_new[:, lip_idx, :] = (x_s_info['exp'] + (x_d_i_info['exp'] - torch.from_numpy(inf_cfg.lip_array).to(dtype=torch.float32, device=device)))[:, lip_idx, :]
                elif inf_cfg.animation_region == "eyes":
                    for eyes_idx in [11, 13, 15, 16, 18]:
@ -393,7 +388,6 @@ class LivePortraitPipeline(object):
                    t_new = x_s_info['t']

            t_new[..., 2].fill_(0)  # zero tz
-            # x_d_i_new = x_s_info['scale'] * (x_c_s @ R_s) + x_s_info['t']
            x_d_i_new = scale_new * (x_c_s @ R_new + delta_new) + t_new

            if inf_cfg.driving_option == "expression-friendly" and not flag_is_source_video and flag_is_driving_video: