feat: update

This commit is contained in:
zhangdingyun 2024-08-16 21:53:16 +08:00
parent 18ebcc8b61
commit 8c91177935
3 changed files with 5 additions and 73 deletions

68
app.py
View File

@ -126,74 +126,6 @@ retargeting_output_image_paste_back = gr.Image(type="numpy")
output_video = gr.Video(autoplay=False) output_video = gr.Video(autoplay=False)
output_video_paste_back = gr.Video(autoplay=False) output_video_paste_back = gr.Video(autoplay=False)
"""
每个点和每个维度对应的表情
(0,0): 头顶左右偏
(0,1): 头顶上下偏
(0,2): 头顶前后偏
(1,0): 眉毛上下眼睛左右
(1,1): 眉毛上下眼睛上下
(1,2): 嘴巴和眼睛的动作
(2,0): 眉毛上下眼睛左右
(2,1): 眉毛上下眼睛上下
(2,2): 嘴巴动作
(3,0): 左脸胖瘦, 眉毛上下
(3,1): 左脸上下眉毛上下
(3,2): 左脸前后会变形
(4,0): 右脸胖瘦
(4,1): 右脸上下
(4,2): 右脸前后会变形
(5,0): 头左右平移
(5,1): 头上下平移
(5,2): 嘴部动作
(6,0): 嘴部动作
(6,1): 嘴部动作
(6,2): 嘴部动作
(7,0): 右脸胖瘦
(7,1): 右脸上下
(7,2): 右脸前后
(8,0): 右脸胖瘦
(8,1): 右脸上下
(8,2): 嘴部动作
(9,0): 下巴胖瘦
(9,1): 嘴部动作
(9,2): 眼部动作
(10,0): 左边放缩
(10,1): 左边放缩眼部动作
(10,2): 下巴放缩
(11,0): 左眼左右转
(11,1): 左眼上下睁开闭合
(11,2): 左眼前后
(12,0): 嘴部动作
(12,1): 无明显
(12,2): 嘴部动作
(13,0): 眼部动作
(13,1): 眼部动作
(13,2): 眼部动作
(14,0): 嘴部动作
(14,1): 嘴部动作
(14,2): 嘴部动作
(15,0): 眼部动作
(15,1): 眼部动作嘴部动作
(15,2): 眼部动作
(16,0): 眼睛
(16,1): 右眼睁开闭合嘴部动作
(16,2): 眼部动作
(17,0): 嘴部动作眼部动作
(17,1): 嘴部动作眼部动作
(17,2): 撅嘴拉平嘴
(18,0): 眼部方向
(18,1): 眼部上下
(18,2): 嘴部动作眼部动作
(19,0): 撇嘴
(19,1): 张开闭合嘴
(19,2): 内收外翻嘴
(20,0): 下弯嘴
(20,1): 露牙闭合牙
(20,2): 下拉嘴哦形嘴
"""
with gr.Blocks(theme=gr.themes.Soft(font=[gr.themes.GoogleFont("Plus Jakarta Sans")])) as demo: with gr.Blocks(theme=gr.themes.Soft(font=[gr.themes.GoogleFont("Plus Jakarta Sans")])) as demo:
gr.HTML(load_description(title_md)) gr.HTML(load_description(title_md))

View File

@ -13,8 +13,8 @@ from .base_config import PrintableConfig, make_abs_path
@dataclass(repr=False) # use repr from PrintableConfig @dataclass(repr=False) # use repr from PrintableConfig
class ArgumentConfig(PrintableConfig): class ArgumentConfig(PrintableConfig):
########## input arguments ########## ########## input arguments ##########
source: Annotated[str, tyro.conf.arg(aliases=["-s"])] = make_abs_path('../../assets/examples/source/s3.jpg') # path to the source portrait (human/animal) or video (human) source: Annotated[str, tyro.conf.arg(aliases=["-s"])] = make_abs_path('../../assets/examples/source/s0.jpg') # path to the source portrait (human/animal) or video (human)
driving: Annotated[str, tyro.conf.arg(aliases=["-d"])] = make_abs_path('../../assets/examples/driving/d30.jpg') # path to driving video or template (.pkl format) driving: Annotated[str, tyro.conf.arg(aliases=["-d"])] = make_abs_path('../../assets/examples/driving/d0.mp4') # path to driving video or template (.pkl format)
output_dir: Annotated[str, tyro.conf.arg(aliases=["-o"])] = 'animations/' # directory to save output video output_dir: Annotated[str, tyro.conf.arg(aliases=["-o"])] = 'animations/' # directory to save output video
########## inference arguments ########## ########## inference arguments ##########
@ -34,7 +34,7 @@ class ArgumentConfig(PrintableConfig):
driving_multiplier: float = 1.0 # be used only when driving_option is "expression-friendly" driving_multiplier: float = 1.0 # be used only when driving_option is "expression-friendly"
driving_smooth_observation_variance: float = 3e-7 # smooth strength scalar for the animated video when the input is a source video, the larger the number, the smoother the animated video; too much smoothness would result in loss of motion accuracy driving_smooth_observation_variance: float = 3e-7 # smooth strength scalar for the animated video when the input is a source video, the larger the number, the smoother the animated video; too much smoothness would result in loss of motion accuracy
audio_priority: Literal['source', 'driving'] = 'driving' # whether to use the audio from source or driving video audio_priority: Literal['source', 'driving'] = 'driving' # whether to use the audio from source or driving video
animation_region: Literal["exp", "pose", "lip", "eyes", "all"] = "all" # the region where the animation was performed, "exp" means the expression, "pose" means the head pose animation_region: Literal["exp", "pose", "lip", "eyes", "all"] = "exp" # the region where the animation was performed, "exp" means the expression, "pose" means the head pose
########## source crop arguments ########## ########## source crop arguments ##########
det_thresh: float = 0.15 # detection threshold det_thresh: float = 0.15 # detection threshold
scale: float = 2.3 # the ratio of face area is smaller if scale is larger scale: float = 2.3 # the ratio of face area is smaller if scale is larger

View File

@ -431,7 +431,7 @@ class GradioPipeline(LivePortraitPipeline):
else: else:
inference_cfg = self.live_portrait_wrapper.inference_cfg inference_cfg = self.live_portrait_wrapper.inference_cfg
f_s_user_lst, x_s_user_lst, x_d_i_new_lst, source_M_c2o_lst, mask_ori_lst, source_rgb_lst, img_crop_256x256_lst, source_fps, n_frames = \ f_s_user_lst, x_s_user_lst, x_d_i_new_lst, source_M_c2o_lst, mask_ori_lst, source_rgb_lst, img_crop_256x256_lst, source_fps, n_frames = \
self.prepare_video_lip_silence(input_video, device, driving_smooth_observation_variance_retargeting, flag_do_crop=flag_do_crop_input_retargeting_video) self.prepare_video_lip_silence(input_video, device, flag_do_crop=flag_do_crop_input_retargeting_video)
I_p_pstbk_lst = None I_p_pstbk_lst = None
if flag_do_crop_input_retargeting_video: if flag_do_crop_input_retargeting_video:
@ -540,7 +540,7 @@ class GradioPipeline(LivePortraitPipeline):
raise gr.Error("Please upload a source video as the retargeting input 🤗🤗🤗", duration=5) raise gr.Error("Please upload a source video as the retargeting input 🤗🤗🤗", duration=5)
@torch.no_grad() @torch.no_grad()
def prepare_video_lip_silence(self, input_video, device, driving_smooth_observation_variance_retargeting, flag_do_crop=True): def prepare_video_lip_silence(self, input_video, device, flag_do_crop=True):
""" for keeping lips in the source video silent """ for keeping lips in the source video silent
""" """
if input_video is not None: if input_video is not None: