mirror of
https://github.com/KwaiVGI/LivePortrait.git
synced 2024-12-22 04:12:38 +00:00
chore: modify arguments (#249)
This commit is contained in:
parent
5d1d71b1e2
commit
3f394785fb
@ -3,11 +3,10 @@
|
||||
"""
|
||||
All configs for user
|
||||
"""
|
||||
|
||||
from dataclasses import dataclass
|
||||
import tyro
|
||||
from typing_extensions import Annotated
|
||||
from typing import Optional
|
||||
from typing import Optional, Literal
|
||||
from .base_config import PrintableConfig, make_abs_path
|
||||
|
||||
|
||||
@ -33,13 +32,15 @@ class ArgumentConfig(PrintableConfig):
|
||||
flag_pasteback: bool = True # whether to paste-back/stitch the animated face cropping from the face-cropping space to the original image space
|
||||
flag_do_crop: bool = True # whether to crop the source portrait or video to the face-cropping space
|
||||
driving_smooth_observation_variance: float = 3e-7 # smooth strength scalar for the animated video when the input is a source video, the larger the number, the smoother the animated video; too much smoothness would result in loss of motion accuracy
|
||||
|
||||
audio_priority: Literal['source', 'driving'] = 'driving' # whether to use the audio from source or driving video
|
||||
########## source crop arguments ##########
|
||||
det_thresh: float = 0.15 # detection threshold
|
||||
scale: float = 2.3 # the ratio of face area is smaller if scale is larger
|
||||
vx_ratio: float = 0 # the ratio to move the face to left or right in cropping space
|
||||
vy_ratio: float = -0.125 # the ratio to move the face to up or down in cropping space
|
||||
flag_do_rot: bool = True # whether to conduct the rotation when flag_do_crop is True
|
||||
source_max_dim: int = 1280 # the max dim of height and width of source image or video, you can change it to a larger number, e.g., 1920
|
||||
source_division: int = 2 # make sure the height and width of source image or video can be divided by this number
|
||||
|
||||
########## driving crop arguments ##########
|
||||
scale_crop_driving_video: float = 2.2 # scale factor for cropping driving video
|
||||
|
@ -37,11 +37,13 @@ class InferenceConfig(PrintableConfig):
|
||||
flag_do_rot: bool = True
|
||||
flag_force_cpu: bool = False
|
||||
flag_do_torch_compile: bool = False
|
||||
driving_smooth_observation_variance: float = 3e-7 # smooth strength scalar for the animated video when the input is a source video, the larger the number, the smoother the animated video; too much smoothness would result in loss of motion accuracy
|
||||
source_max_dim: int = 1280 # the max dim of height and width of source image or video
|
||||
source_division: int = 2 # make sure the height and width of source image or video can be divided by this number
|
||||
|
||||
# NOT EXPORTED PARAMS
|
||||
lip_normalize_threshold: float = 0.03 # threshold for flag_normalize_lip
|
||||
source_video_eye_retargeting_threshold: float = 0.18 # threshold for eyes retargeting if the input is a source video
|
||||
driving_smooth_observation_variance: float = 3e-7 # smooth strength scalar for the animated video when the input is a source video, the larger the number, the smoother the animated video; too much smoothness would result in loss of motion accuracy
|
||||
anchor_frame: int = 0 # TO IMPLEMENT
|
||||
|
||||
input_shape: Tuple[int, int] = (256, 256) # input shape
|
||||
@ -51,5 +53,3 @@ class InferenceConfig(PrintableConfig):
|
||||
|
||||
mask_crop: ndarray = field(default_factory=lambda: cv2.imread(make_abs_path('../utils/resources/mask_template.png'), cv2.IMREAD_COLOR))
|
||||
size_gif: int = 256 # default gif size, TO IMPLEMENT
|
||||
source_max_dim: int = 1280 # the max dim of height and width of source image or video
|
||||
source_division: int = 2 # make sure the height and width of source image or video can be divided by this number
|
||||
|
@ -19,9 +19,9 @@ from .config.crop_config import CropConfig
|
||||
from .utils.cropper import Cropper
|
||||
from .utils.camera import get_rotation_matrix
|
||||
from .utils.video import images2video, concat_frames, get_fps, add_audio_to_video, has_audio_stream
|
||||
from .utils.crop import _transform_img, prepare_paste_back, paste_back
|
||||
from .utils.crop import prepare_paste_back, paste_back
|
||||
from .utils.io import load_image_rgb, load_video, resize_to_limit, dump, load
|
||||
from .utils.helper import mkdir, basename, dct2device, is_video, is_template, remove_suffix, is_image
|
||||
from .utils.helper import mkdir, basename, dct2device, is_video, is_template, remove_suffix, is_image, is_square_video
|
||||
from .utils.filter import smooth
|
||||
from .utils.rprint import rlog as log
|
||||
# from .utils.viz import viz_lmk
|
||||
@ -137,7 +137,7 @@ class LivePortraitPipeline(object):
|
||||
driving_rgb_lst = driving_rgb_lst[:n_frames]
|
||||
else:
|
||||
n_frames = driving_n_frames
|
||||
if inf_cfg.flag_crop_driving_video:
|
||||
if inf_cfg.flag_crop_driving_video or (not is_square_video(args.driving)):
|
||||
ret_d = self.cropper.crop_driving_video(driving_rgb_lst)
|
||||
log(f'Driving video is cropped, {len(ret_d["frame_crop_lst"])} frames are processed.')
|
||||
if len(ret_d["frame_crop_lst"]) is not n_frames:
|
||||
@ -382,8 +382,7 @@ class LivePortraitPipeline(object):
|
||||
if flag_source_has_audio or flag_driving_has_audio:
|
||||
# final result with concatenation
|
||||
wfp_concat_with_audio = osp.join(args.output_dir, f'{basename(args.source)}--{basename(args.driving)}_concat_with_audio.mp4')
|
||||
# audio_from_which_video = args.source if flag_source_has_audio else args.driving # default source audio
|
||||
audio_from_which_video = args.driving if flag_driving_has_audio else args.source # default driving audio
|
||||
audio_from_which_video = args.driving if ((flag_driving_has_audio and args.audio_priority == 'driving') or (not flag_source_has_audio)) else args.source
|
||||
log(f"Audio is selected from {audio_from_which_video}, concat mode")
|
||||
add_audio_to_video(wfp_concat, audio_from_which_video, wfp_concat_with_audio)
|
||||
os.replace(wfp_concat_with_audio, wfp_concat)
|
||||
@ -399,8 +398,7 @@ class LivePortraitPipeline(object):
|
||||
######### build the final result #########
|
||||
if flag_source_has_audio or flag_driving_has_audio:
|
||||
wfp_with_audio = osp.join(args.output_dir, f'{basename(args.source)}--{basename(args.driving)}_with_audio.mp4')
|
||||
# audio_from_which_video = args.source if flag_source_has_audio else args.driving # default source audio
|
||||
audio_from_which_video = args.driving if flag_driving_has_audio else args.source # default driving audio
|
||||
audio_from_which_video = args.driving if ((flag_driving_has_audio and args.audio_priority == 'driving') or (not flag_source_has_audio)) else args.source
|
||||
log(f"Audio is selected from {audio_from_which_video}")
|
||||
add_audio_to_video(wfp, audio_from_which_video, wfp_with_audio)
|
||||
os.replace(wfp_with_audio, wfp)
|
||||
|
@ -135,6 +135,7 @@ class Cropper(object):
|
||||
|
||||
return lmk
|
||||
|
||||
# TODO: support skipping frame with NO FACE
|
||||
def crop_source_video(self, source_rgb_lst, crop_cfg: CropConfig, **kwargs):
|
||||
"""Tracking based landmarks/alignment and cropping"""
|
||||
trajectory = Trajectory()
|
||||
@ -157,8 +158,10 @@ class Cropper(object):
|
||||
lmk = self.landmark_runner.run(frame_rgb, lmk)
|
||||
trajectory.start, trajectory.end = idx, idx
|
||||
else:
|
||||
# TODO: add IOU check for tracking
|
||||
lmk = self.landmark_runner.run(frame_rgb, trajectory.lmk_lst[-1])
|
||||
trajectory.end = idx
|
||||
|
||||
trajectory.lmk_lst.append(lmk)
|
||||
|
||||
# crop the face
|
||||
|
Loading…
Reference in New Issue
Block a user