From 085b535020a8d8dfff92e7341c1dc88eef6e1ebd Mon Sep 17 00:00:00 2001 From: guojianzhu Date: Sat, 20 Jul 2024 17:55:25 +0800 Subject: [PATCH] chore: change default audio, smooth, autoplay --- app.py | 26 +++++++++++++------------- src/config/argument_config.py | 2 +- src/config/inference_config.py | 2 +- src/gradio_pipeline.py | 2 +- src/live_portrait_pipeline.py | 6 ++++-- src/utils/filter.py | 2 +- 6 files changed, 21 insertions(+), 19 deletions(-) diff --git a/app.py b/app.py index f618263..9b59a4d 100644 --- a/app.py +++ b/app.py @@ -69,12 +69,12 @@ data_examples_i2v = [ [osp.join(example_portrait_dir, "s2.jpg"), osp.join(example_video_dir, "d13.mp4"), True, True, True, True], ] data_examples_v2v = [ - [osp.join(example_portrait_dir, "s13.mp4"), osp.join(example_video_dir, "d0.mp4"), True, True, True, False, False, 3e-6], - # [osp.join(example_portrait_dir, "s14.mp4"), osp.join(example_video_dir, "d18.mp4"), True, True, True, False, False, 3e-6], - # [osp.join(example_portrait_dir, "s15.mp4"), osp.join(example_video_dir, "d19.mp4"), True, True, True, False, False, 3e-6], - [osp.join(example_portrait_dir, "s18.mp4"), osp.join(example_video_dir, "d6.mp4"), True, True, True, False, False, 3e-6], - # [osp.join(example_portrait_dir, "s19.mp4"), osp.join(example_video_dir, "d6.mp4"), True, True, True, False, False, 3e-6], - [osp.join(example_portrait_dir, "s20.mp4"), osp.join(example_video_dir, "d0.mp4"), True, True, True, False, False, 3e-6], + [osp.join(example_portrait_dir, "s13.mp4"), osp.join(example_video_dir, "d0.mp4"), True, True, True, False, False, 1e-7], + # [osp.join(example_portrait_dir, "s14.mp4"), osp.join(example_video_dir, "d18.mp4"), True, True, True, False, False, 1e-7], + # [osp.join(example_portrait_dir, "s15.mp4"), osp.join(example_video_dir, "d19.mp4"), True, True, True, False, False, 1e-7], + [osp.join(example_portrait_dir, "s18.mp4"), osp.join(example_video_dir, "d6.mp4"), True, True, True, False, False, 1e-7], + # [osp.join(example_portrait_dir, "s19.mp4"), osp.join(example_video_dir, "d6.mp4"), True, True, True, False, False, 1e-7], + [osp.join(example_portrait_dir, "s20.mp4"), osp.join(example_video_dir, "d0.mp4"), True, True, True, False, False, 1e-7], ] #################### interface logic #################### @@ -84,10 +84,10 @@ lip_retargeting_slider = gr.Slider(minimum=0, maximum=0.8, step=0.01, label="tar retargeting_input_image = gr.Image(type="filepath") output_image = gr.Image(type="numpy") output_image_paste_back = gr.Image(type="numpy") -output_video_i2v = gr.Video(autoplay=True) -output_video_concat_i2v = gr.Video(autoplay=True) -output_video_v2v = gr.Video(autoplay=True) -output_video_concat_v2v = gr.Video(autoplay=True) +output_video_i2v = gr.Video(autoplay=False) +output_video_concat_i2v = gr.Video(autoplay=False) +# output_video_v2v = gr.Video(autoplay=False) +# output_video_concat_v2v = gr.Video(autoplay=False) with gr.Blocks(theme=gr.themes.Soft(font=[gr.themes.GoogleFont("Plus Jakarta Sans")])) as demo: @@ -135,7 +135,7 @@ with gr.Blocks(theme=gr.themes.Soft(font=[gr.themes.GoogleFont("Plus Jakarta San with gr.Accordion(open=True, label="Cropping Options for Source Image or Video"): with gr.Row(): flag_do_crop_input = gr.Checkbox(value=True, label="do crop (source)") - scale = gr.Number(value=2.3, label="source crop scale", minimum=1.8, maximum=2.9, step=0.05) + scale = gr.Number(value=2.3, label="source crop scale", minimum=1.8, maximum=3.2, step=0.05) vx_ratio = gr.Number(value=0.0, label="source crop x", minimum=-0.5, maximum=0.5, step=0.01) vy_ratio = gr.Number(value=-0.125, label="source crop y", minimum=-0.5, maximum=0.5, step=0.01) @@ -158,7 +158,7 @@ with gr.Blocks(theme=gr.themes.Soft(font=[gr.themes.GoogleFont("Plus Jakarta San with gr.Accordion(open=True, label="Cropping Options for Driving Video"): with gr.Row(): flag_crop_driving_video_input = gr.Checkbox(value=False, label="do crop (driving)") - scale_crop_driving_video = gr.Number(value=2.2, label="driving crop scale", minimum=1.8, maximum=2.9, step=0.05) + scale_crop_driving_video = gr.Number(value=2.2, label="driving crop scale", minimum=1.8, maximum=3.2, step=0.05) vx_ratio_crop_driving_video = gr.Number(value=0.0, label="driving crop x", minimum=-0.5, maximum=0.5, step=0.01) vy_ratio_crop_driving_video = gr.Number(value=-0.1, label="driving crop y", minimum=-0.5, maximum=0.5, step=0.01) @@ -168,7 +168,7 @@ with gr.Blocks(theme=gr.themes.Soft(font=[gr.themes.GoogleFont("Plus Jakarta San flag_relative_input = gr.Checkbox(value=True, label="relative motion") flag_remap_input = gr.Checkbox(value=True, label="paste-back") flag_video_editing_head_rotation = gr.Checkbox(value=False, label="relative head rotation (v2v)") - driving_smooth_observation_variance = gr.Number(value=3e-6, label="motion smooth strength (v2v)", minimum=1e-11, maximum=1e-2, step=1e-11) + driving_smooth_observation_variance = gr.Number(value=1e-7, label="motion smooth strength (v2v)", minimum=1e-11, maximum=1e-2, step=1e-8) gr.Markdown(load_description("assets/gradio/gradio_description_animate_clear.md")) with gr.Row(): diff --git a/src/config/argument_config.py b/src/config/argument_config.py index 7a130e8..08d17a7 100644 --- a/src/config/argument_config.py +++ b/src/config/argument_config.py @@ -32,7 +32,7 @@ class ArgumentConfig(PrintableConfig): flag_relative_motion: bool = True # whether to use relative motion flag_pasteback: bool = True # whether to paste-back/stitch the animated face cropping from the face-cropping space to the original image space flag_do_crop: bool = True # whether to crop the source portrait or video to the face-cropping space - driving_smooth_observation_variance: float = 3e-6 # smooth strength scalar for the animated video when the input is a source video, the larger the number, the smoother the animated video; too much smoothness would result in loss of motion accuracy + driving_smooth_observation_variance: float = 1e-7 # smooth strength scalar for the animated video when the input is a source video, the larger the number, the smoother the animated video; too much smoothness would result in loss of motion accuracy ########## source crop arguments ########## scale: float = 2.3 # the ratio of face area is smaller if scale is larger diff --git a/src/config/inference_config.py b/src/config/inference_config.py index adb313f..48bf88c 100644 --- a/src/config/inference_config.py +++ b/src/config/inference_config.py @@ -41,7 +41,7 @@ class InferenceConfig(PrintableConfig): # NOT EXPORTED PARAMS lip_normalize_threshold: float = 0.03 # threshold for flag_normalize_lip source_video_eye_retargeting_threshold: float = 0.18 # threshold for eyes retargeting if the input is a source video - driving_smooth_observation_variance: float = 3e-6 # smooth strength scalar for the animated video when the input is a source video, the larger the number, the smoother the animated video; too much smoothness would result in loss of motion accuracy + driving_smooth_observation_variance: float = 1e-7 # smooth strength scalar for the animated video when the input is a source video, the larger the number, the smoother the animated video; too much smoothness would result in loss of motion accuracy anchor_frame: int = 0 # TO IMPLEMENT input_shape: Tuple[int, int] = (256, 256) # input shape diff --git a/src/gradio_pipeline.py b/src/gradio_pipeline.py index 7b01370..cbe898e 100644 --- a/src/gradio_pipeline.py +++ b/src/gradio_pipeline.py @@ -48,7 +48,7 @@ class GradioPipeline(LivePortraitPipeline): scale_crop_driving_video=2.2, vx_ratio_crop_driving_video=0.0, vy_ratio_crop_driving_video=-0.1, - driving_smooth_observation_variance=3e-6, + driving_smooth_observation_variance=1e-7, tab_selection=None, ): """ for video-driven potrait animation or video editing diff --git a/src/live_portrait_pipeline.py b/src/live_portrait_pipeline.py index 59fc9e1..e344ffc 100644 --- a/src/live_portrait_pipeline.py +++ b/src/live_portrait_pipeline.py @@ -367,7 +367,8 @@ class LivePortraitPipeline(object): if flag_source_has_audio or flag_driving_has_audio: # final result with concatenation wfp_concat_with_audio = osp.join(args.output_dir, f'{basename(args.source)}--{basename(args.driving)}_concat_with_audio.mp4') - audio_from_which_video = args.source if flag_source_has_audio else args.driving + # audio_from_which_video = args.source if flag_source_has_audio else args.driving # default source audio + audio_from_which_video = args.driving if flag_driving_has_audio else args.source # default driving audio log(f"Audio is selected from {audio_from_which_video}, concat mode") add_audio_to_video(wfp_concat, audio_from_which_video, wfp_concat_with_audio) os.replace(wfp_concat_with_audio, wfp_concat) @@ -383,7 +384,8 @@ class LivePortraitPipeline(object): ######### build the final result ######### if flag_source_has_audio or flag_driving_has_audio: wfp_with_audio = osp.join(args.output_dir, f'{basename(args.source)}--{basename(args.driving)}_with_audio.mp4') - audio_from_which_video = args.source if flag_source_has_audio else args.driving + # audio_from_which_video = args.source if flag_source_has_audio else args.driving # default source audio + audio_from_which_video = args.driving if flag_driving_has_audio else args.source # default driving audio log(f"Audio is selected from {audio_from_which_video}") add_audio_to_video(wfp, audio_from_which_video, wfp_with_audio) os.replace(wfp_with_audio, wfp) diff --git a/src/utils/filter.py b/src/utils/filter.py index 2ee6abc..5238f49 100644 --- a/src/utils/filter.py +++ b/src/utils/filter.py @@ -5,7 +5,7 @@ import numpy as np from pykalman import KalmanFilter -def smooth(x_d_lst, shape, device, observation_variance=3e-6, process_variance=1e-5): +def smooth(x_d_lst, shape, device, observation_variance=1e-7, process_variance=1e-5): x_d_lst_reshape = [x.reshape(-1) for x in x_d_lst] x_d_stacked = np.vstack(x_d_lst_reshape) kf = KalmanFilter(