From 8c91177935d00d19e703e4694170be856ace5a4e Mon Sep 17 00:00:00 2001 From: zhangdingyun Date: Fri, 16 Aug 2024 21:53:16 +0800 Subject: [PATCH] feat: update --- app.py | 68 ----------------------------------- src/config/argument_config.py | 6 ++-- src/gradio_pipeline.py | 4 +-- 3 files changed, 5 insertions(+), 73 deletions(-) diff --git a/app.py b/app.py index 12b4dbf..50c7285 100644 --- a/app.py +++ b/app.py @@ -126,74 +126,6 @@ retargeting_output_image_paste_back = gr.Image(type="numpy") output_video = gr.Video(autoplay=False) output_video_paste_back = gr.Video(autoplay=False) -""" -每个点和每个维度对应的表情: -(0,0): 头顶左右偏 -(0,1): 头顶上下偏 -(0,2): 头顶前后偏 -(1,0): 眉毛上下,眼睛左右 -(1,1): 眉毛上下,眼睛上下 -(1,2): 嘴巴和眼睛的动作 -(2,0): 眉毛上下,眼睛左右 -(2,1): 眉毛上下,眼睛上下 -(2,2): 嘴巴动作 -(3,0): 左脸胖瘦, 眉毛上下 -(3,1): 左脸上下,眉毛上下 -(3,2): 左脸前后,会变形 -(4,0): 右脸胖瘦 -(4,1): 右脸上下 -(4,2): 右脸前后,会变形 -(5,0): 头左右平移 -(5,1): 头上下平移 -(5,2): 嘴部动作 -(6,0): 嘴部动作 -(6,1): 嘴部动作 -(6,2): 嘴部动作 -(7,0): 右脸胖瘦 -(7,1): 右脸上下 -(7,2): 右脸前后 -(8,0): 右脸胖瘦 -(8,1): 右脸上下 -(8,2): 嘴部动作 -(9,0): 下巴胖瘦 -(9,1): 嘴部动作 -(9,2): 眼部动作 -(10,0): 左边放缩 -(10,1): 左边放缩,眼部动作 -(10,2): 下巴放缩 -(11,0): 左眼左右转 -(11,1): 左眼上下睁开闭合 -(11,2): 左眼前后 -(12,0): 嘴部动作 -(12,1): 无明显 -(12,2): 嘴部动作 -(13,0): 眼部动作 -(13,1): 眼部动作 -(13,2): 眼部动作 -(14,0): 嘴部动作 -(14,1): 嘴部动作 -(14,2): 嘴部动作 -(15,0): 眼部动作 -(15,1): 眼部动作,嘴部动作 -(15,2): 眼部动作 -(16,0): 眼睛 -(16,1): 右眼睁开闭合,嘴部动作 -(16,2): 眼部动作 -(17,0): 嘴部动作,眼部动作 -(17,1): 嘴部动作,眼部动作 -(17,2): 撅嘴,拉平嘴 -(18,0): 眼部方向 -(18,1): 眼部上下 -(18,2): 嘴部动作,眼部动作 -(19,0): 撇嘴 -(19,1): 张开闭合嘴 -(19,2): 内收外翻嘴 -(20,0): 下弯嘴 -(20,1): 露牙,闭合牙 -(20,2): 下拉嘴,哦形嘴 -""" - - with gr.Blocks(theme=gr.themes.Soft(font=[gr.themes.GoogleFont("Plus Jakarta Sans")])) as demo: gr.HTML(load_description(title_md)) diff --git a/src/config/argument_config.py b/src/config/argument_config.py index aa482b8..f4a3b7d 100644 --- a/src/config/argument_config.py +++ b/src/config/argument_config.py @@ -13,8 +13,8 @@ from .base_config import PrintableConfig, make_abs_path @dataclass(repr=False) # use repr from PrintableConfig class ArgumentConfig(PrintableConfig): ########## input arguments ########## - source: Annotated[str, tyro.conf.arg(aliases=["-s"])] = make_abs_path('../../assets/examples/source/s3.jpg') # path to the source portrait (human/animal) or video (human) - driving: Annotated[str, tyro.conf.arg(aliases=["-d"])] = make_abs_path('../../assets/examples/driving/d30.jpg') # path to driving video or template (.pkl format) + source: Annotated[str, tyro.conf.arg(aliases=["-s"])] = make_abs_path('../../assets/examples/source/s0.jpg') # path to the source portrait (human/animal) or video (human) + driving: Annotated[str, tyro.conf.arg(aliases=["-d"])] = make_abs_path('../../assets/examples/driving/d0.mp4') # path to driving video or template (.pkl format) output_dir: Annotated[str, tyro.conf.arg(aliases=["-o"])] = 'animations/' # directory to save output video ########## inference arguments ########## @@ -34,7 +34,7 @@ class ArgumentConfig(PrintableConfig): driving_multiplier: float = 1.0 # be used only when driving_option is "expression-friendly" driving_smooth_observation_variance: float = 3e-7 # smooth strength scalar for the animated video when the input is a source video, the larger the number, the smoother the animated video; too much smoothness would result in loss of motion accuracy audio_priority: Literal['source', 'driving'] = 'driving' # whether to use the audio from source or driving video - animation_region: Literal["exp", "pose", "lip", "eyes", "all"] = "all" # the region where the animation was performed, "exp" means the expression, "pose" means the head pose + animation_region: Literal["exp", "pose", "lip", "eyes", "all"] = "exp" # the region where the animation was performed, "exp" means the expression, "pose" means the head pose ########## source crop arguments ########## det_thresh: float = 0.15 # detection threshold scale: float = 2.3 # the ratio of face area is smaller if scale is larger diff --git a/src/gradio_pipeline.py b/src/gradio_pipeline.py index 8a67b40..263f14d 100644 --- a/src/gradio_pipeline.py +++ b/src/gradio_pipeline.py @@ -431,7 +431,7 @@ class GradioPipeline(LivePortraitPipeline): else: inference_cfg = self.live_portrait_wrapper.inference_cfg f_s_user_lst, x_s_user_lst, x_d_i_new_lst, source_M_c2o_lst, mask_ori_lst, source_rgb_lst, img_crop_256x256_lst, source_fps, n_frames = \ - self.prepare_video_lip_silence(input_video, device, driving_smooth_observation_variance_retargeting, flag_do_crop=flag_do_crop_input_retargeting_video) + self.prepare_video_lip_silence(input_video, device, flag_do_crop=flag_do_crop_input_retargeting_video) I_p_pstbk_lst = None if flag_do_crop_input_retargeting_video: @@ -540,7 +540,7 @@ class GradioPipeline(LivePortraitPipeline): raise gr.Error("Please upload a source video as the retargeting input 🤗🤗🤗", duration=5) @torch.no_grad() - def prepare_video_lip_silence(self, input_video, device, driving_smooth_observation_variance_retargeting, flag_do_crop=True): + def prepare_video_lip_silence(self, input_video, device, flag_do_crop=True): """ for keeping lips in the source video silent """ if input_video is not None: