mirror of
https://github.com/KwaiVGI/LivePortrait.git
synced 2024-12-22 12:22:38 +00:00
chore: change default audio, smooth, autoplay
This commit is contained in:
parent
51543310a2
commit
085b535020
26
app.py
26
app.py
@ -69,12 +69,12 @@ data_examples_i2v = [
|
||||
[osp.join(example_portrait_dir, "s2.jpg"), osp.join(example_video_dir, "d13.mp4"), True, True, True, True],
|
||||
]
|
||||
data_examples_v2v = [
|
||||
[osp.join(example_portrait_dir, "s13.mp4"), osp.join(example_video_dir, "d0.mp4"), True, True, True, False, False, 3e-6],
|
||||
# [osp.join(example_portrait_dir, "s14.mp4"), osp.join(example_video_dir, "d18.mp4"), True, True, True, False, False, 3e-6],
|
||||
# [osp.join(example_portrait_dir, "s15.mp4"), osp.join(example_video_dir, "d19.mp4"), True, True, True, False, False, 3e-6],
|
||||
[osp.join(example_portrait_dir, "s18.mp4"), osp.join(example_video_dir, "d6.mp4"), True, True, True, False, False, 3e-6],
|
||||
# [osp.join(example_portrait_dir, "s19.mp4"), osp.join(example_video_dir, "d6.mp4"), True, True, True, False, False, 3e-6],
|
||||
[osp.join(example_portrait_dir, "s20.mp4"), osp.join(example_video_dir, "d0.mp4"), True, True, True, False, False, 3e-6],
|
||||
[osp.join(example_portrait_dir, "s13.mp4"), osp.join(example_video_dir, "d0.mp4"), True, True, True, False, False, 1e-7],
|
||||
# [osp.join(example_portrait_dir, "s14.mp4"), osp.join(example_video_dir, "d18.mp4"), True, True, True, False, False, 1e-7],
|
||||
# [osp.join(example_portrait_dir, "s15.mp4"), osp.join(example_video_dir, "d19.mp4"), True, True, True, False, False, 1e-7],
|
||||
[osp.join(example_portrait_dir, "s18.mp4"), osp.join(example_video_dir, "d6.mp4"), True, True, True, False, False, 1e-7],
|
||||
# [osp.join(example_portrait_dir, "s19.mp4"), osp.join(example_video_dir, "d6.mp4"), True, True, True, False, False, 1e-7],
|
||||
[osp.join(example_portrait_dir, "s20.mp4"), osp.join(example_video_dir, "d0.mp4"), True, True, True, False, False, 1e-7],
|
||||
]
|
||||
#################### interface logic ####################
|
||||
|
||||
@ -84,10 +84,10 @@ lip_retargeting_slider = gr.Slider(minimum=0, maximum=0.8, step=0.01, label="tar
|
||||
retargeting_input_image = gr.Image(type="filepath")
|
||||
output_image = gr.Image(type="numpy")
|
||||
output_image_paste_back = gr.Image(type="numpy")
|
||||
output_video_i2v = gr.Video(autoplay=True)
|
||||
output_video_concat_i2v = gr.Video(autoplay=True)
|
||||
output_video_v2v = gr.Video(autoplay=True)
|
||||
output_video_concat_v2v = gr.Video(autoplay=True)
|
||||
output_video_i2v = gr.Video(autoplay=False)
|
||||
output_video_concat_i2v = gr.Video(autoplay=False)
|
||||
# output_video_v2v = gr.Video(autoplay=False)
|
||||
# output_video_concat_v2v = gr.Video(autoplay=False)
|
||||
|
||||
|
||||
with gr.Blocks(theme=gr.themes.Soft(font=[gr.themes.GoogleFont("Plus Jakarta Sans")])) as demo:
|
||||
@ -135,7 +135,7 @@ with gr.Blocks(theme=gr.themes.Soft(font=[gr.themes.GoogleFont("Plus Jakarta San
|
||||
with gr.Accordion(open=True, label="Cropping Options for Source Image or Video"):
|
||||
with gr.Row():
|
||||
flag_do_crop_input = gr.Checkbox(value=True, label="do crop (source)")
|
||||
scale = gr.Number(value=2.3, label="source crop scale", minimum=1.8, maximum=2.9, step=0.05)
|
||||
scale = gr.Number(value=2.3, label="source crop scale", minimum=1.8, maximum=3.2, step=0.05)
|
||||
vx_ratio = gr.Number(value=0.0, label="source crop x", minimum=-0.5, maximum=0.5, step=0.01)
|
||||
vy_ratio = gr.Number(value=-0.125, label="source crop y", minimum=-0.5, maximum=0.5, step=0.01)
|
||||
|
||||
@ -158,7 +158,7 @@ with gr.Blocks(theme=gr.themes.Soft(font=[gr.themes.GoogleFont("Plus Jakarta San
|
||||
with gr.Accordion(open=True, label="Cropping Options for Driving Video"):
|
||||
with gr.Row():
|
||||
flag_crop_driving_video_input = gr.Checkbox(value=False, label="do crop (driving)")
|
||||
scale_crop_driving_video = gr.Number(value=2.2, label="driving crop scale", minimum=1.8, maximum=2.9, step=0.05)
|
||||
scale_crop_driving_video = gr.Number(value=2.2, label="driving crop scale", minimum=1.8, maximum=3.2, step=0.05)
|
||||
vx_ratio_crop_driving_video = gr.Number(value=0.0, label="driving crop x", minimum=-0.5, maximum=0.5, step=0.01)
|
||||
vy_ratio_crop_driving_video = gr.Number(value=-0.1, label="driving crop y", minimum=-0.5, maximum=0.5, step=0.01)
|
||||
|
||||
@ -168,7 +168,7 @@ with gr.Blocks(theme=gr.themes.Soft(font=[gr.themes.GoogleFont("Plus Jakarta San
|
||||
flag_relative_input = gr.Checkbox(value=True, label="relative motion")
|
||||
flag_remap_input = gr.Checkbox(value=True, label="paste-back")
|
||||
flag_video_editing_head_rotation = gr.Checkbox(value=False, label="relative head rotation (v2v)")
|
||||
driving_smooth_observation_variance = gr.Number(value=3e-6, label="motion smooth strength (v2v)", minimum=1e-11, maximum=1e-2, step=1e-11)
|
||||
driving_smooth_observation_variance = gr.Number(value=1e-7, label="motion smooth strength (v2v)", minimum=1e-11, maximum=1e-2, step=1e-8)
|
||||
|
||||
gr.Markdown(load_description("assets/gradio/gradio_description_animate_clear.md"))
|
||||
with gr.Row():
|
||||
|
@ -32,7 +32,7 @@ class ArgumentConfig(PrintableConfig):
|
||||
flag_relative_motion: bool = True # whether to use relative motion
|
||||
flag_pasteback: bool = True # whether to paste-back/stitch the animated face cropping from the face-cropping space to the original image space
|
||||
flag_do_crop: bool = True # whether to crop the source portrait or video to the face-cropping space
|
||||
driving_smooth_observation_variance: float = 3e-6 # smooth strength scalar for the animated video when the input is a source video, the larger the number, the smoother the animated video; too much smoothness would result in loss of motion accuracy
|
||||
driving_smooth_observation_variance: float = 1e-7 # smooth strength scalar for the animated video when the input is a source video, the larger the number, the smoother the animated video; too much smoothness would result in loss of motion accuracy
|
||||
|
||||
########## source crop arguments ##########
|
||||
scale: float = 2.3 # the ratio of face area is smaller if scale is larger
|
||||
|
@ -41,7 +41,7 @@ class InferenceConfig(PrintableConfig):
|
||||
# NOT EXPORTED PARAMS
|
||||
lip_normalize_threshold: float = 0.03 # threshold for flag_normalize_lip
|
||||
source_video_eye_retargeting_threshold: float = 0.18 # threshold for eyes retargeting if the input is a source video
|
||||
driving_smooth_observation_variance: float = 3e-6 # smooth strength scalar for the animated video when the input is a source video, the larger the number, the smoother the animated video; too much smoothness would result in loss of motion accuracy
|
||||
driving_smooth_observation_variance: float = 1e-7 # smooth strength scalar for the animated video when the input is a source video, the larger the number, the smoother the animated video; too much smoothness would result in loss of motion accuracy
|
||||
anchor_frame: int = 0 # TO IMPLEMENT
|
||||
|
||||
input_shape: Tuple[int, int] = (256, 256) # input shape
|
||||
|
@ -48,7 +48,7 @@ class GradioPipeline(LivePortraitPipeline):
|
||||
scale_crop_driving_video=2.2,
|
||||
vx_ratio_crop_driving_video=0.0,
|
||||
vy_ratio_crop_driving_video=-0.1,
|
||||
driving_smooth_observation_variance=3e-6,
|
||||
driving_smooth_observation_variance=1e-7,
|
||||
tab_selection=None,
|
||||
):
|
||||
""" for video-driven potrait animation or video editing
|
||||
|
@ -367,7 +367,8 @@ class LivePortraitPipeline(object):
|
||||
if flag_source_has_audio or flag_driving_has_audio:
|
||||
# final result with concatenation
|
||||
wfp_concat_with_audio = osp.join(args.output_dir, f'{basename(args.source)}--{basename(args.driving)}_concat_with_audio.mp4')
|
||||
audio_from_which_video = args.source if flag_source_has_audio else args.driving
|
||||
# audio_from_which_video = args.source if flag_source_has_audio else args.driving # default source audio
|
||||
audio_from_which_video = args.driving if flag_driving_has_audio else args.source # default driving audio
|
||||
log(f"Audio is selected from {audio_from_which_video}, concat mode")
|
||||
add_audio_to_video(wfp_concat, audio_from_which_video, wfp_concat_with_audio)
|
||||
os.replace(wfp_concat_with_audio, wfp_concat)
|
||||
@ -383,7 +384,8 @@ class LivePortraitPipeline(object):
|
||||
######### build the final result #########
|
||||
if flag_source_has_audio or flag_driving_has_audio:
|
||||
wfp_with_audio = osp.join(args.output_dir, f'{basename(args.source)}--{basename(args.driving)}_with_audio.mp4')
|
||||
audio_from_which_video = args.source if flag_source_has_audio else args.driving
|
||||
# audio_from_which_video = args.source if flag_source_has_audio else args.driving # default source audio
|
||||
audio_from_which_video = args.driving if flag_driving_has_audio else args.source # default driving audio
|
||||
log(f"Audio is selected from {audio_from_which_video}")
|
||||
add_audio_to_video(wfp, audio_from_which_video, wfp_with_audio)
|
||||
os.replace(wfp_with_audio, wfp)
|
||||
|
@ -5,7 +5,7 @@ import numpy as np
|
||||
from pykalman import KalmanFilter
|
||||
|
||||
|
||||
def smooth(x_d_lst, shape, device, observation_variance=3e-6, process_variance=1e-5):
|
||||
def smooth(x_d_lst, shape, device, observation_variance=1e-7, process_variance=1e-5):
|
||||
x_d_lst_reshape = [x.reshape(-1) for x in x_d_lst]
|
||||
x_d_stacked = np.vstack(x_d_lst_reshape)
|
||||
kf = KalmanFilter(
|
||||
|
Loading…
Reference in New Issue
Block a user