feat: precise potrait editing (#296)

* feat: edit

* chore: edit

* doc: update

* doc: update

* doc: update

* chore: update

---------

Co-authored-by: zhangdingyun <zhangdingyun@kuaishou.com>
This commit is contained in:
Jianzhu Guo 2024-08-06 22:47:26 +08:00 committed by GitHub
parent f01202e9a9
commit 7fda929bf2
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 279 additions and 52 deletions

89
app.py
View File

@ -55,6 +55,7 @@ if args.gradio_temp_dir not in (None, ''):
os.environ["GRADIO_TEMP_DIR"] = args.gradio_temp_dir
os.makedirs(args.gradio_temp_dir, exist_ok=True)
def gpu_wrapped_execute_video(*args, **kwargs):
return gradio_pipeline.execute_video(*args, **kwargs)
@ -62,9 +63,15 @@ def gpu_wrapped_execute_video(*args, **kwargs):
def gpu_wrapped_execute_image_retargeting(*args, **kwargs):
return gradio_pipeline.execute_image_retargeting(*args, **kwargs)
def gpu_wrapped_execute_video_retargeting(*args, **kwargs):
return gradio_pipeline.execute_video_retargeting(*args, **kwargs)
def reset_sliders(*args, **kwargs):
return 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.5, True, True
# assets
title_md = "assets/gradio/gradio_title.md"
example_portrait_dir = "assets/examples/source"
@ -97,10 +104,24 @@ video_lip_retargeting_slider = gr.Slider(minimum=0, maximum=0.8, step=0.01, labe
head_pitch_slider = gr.Slider(minimum=-15.0, maximum=15.0, value=0, step=1, label="relative pitch")
head_yaw_slider = gr.Slider(minimum=-25, maximum=25, value=0, step=1, label="relative yaw")
head_roll_slider = gr.Slider(minimum=-15.0, maximum=15.0, value=0, step=1, label="relative roll")
mov_x = gr.Slider(minimum=-0.19, maximum=0.19, value=0.0, step=0.01, label="x-axis movement")
mov_y = gr.Slider(minimum=-0.19, maximum=0.19, value=0.0, step=0.01, label="y-axis movement")
mov_z = gr.Slider(minimum=0.9, maximum=1.2, value=1.0, step=0.01, label="z-axis movement")
lip_variation_zero = gr.Slider(minimum=-0.09, maximum=0.09, value=0, step=0.01, label="pouting")
lip_variation_one = gr.Slider(minimum=-20.0, maximum=15.0, value=0, step=0.01, label="pursing 😐")
lip_variation_two = gr.Slider(minimum=0.0, maximum=15.0, value=0, step=0.01, label="grin 😁")
lip_variation_three = gr.Slider(minimum=-90.0, maximum=120.0, value=0, step=1.0, label="lip close <-> open")
smile = gr.Slider(minimum=-0.3, maximum=1.3, value=0, step=0.01, label="smile 😄")
wink = gr.Slider(minimum=0, maximum=39, value=0, step=0.01, label="wink 😉")
eyebrow = gr.Slider(minimum=-30, maximum=30, value=0, step=0.01, label="eyebrow 🤨")
eyeball_direction_x = gr.Slider(minimum=-30.0, maximum=30.0, value=0, step=0.01, label="eye gaze (horizontal) 👀")
eyeball_direction_y = gr.Slider(minimum=-63.0, maximum=63.0, value=0, step=0.01, label="eye gaze (vertical) 🙄")
retargeting_input_image = gr.Image(type="filepath")
retargeting_input_video = gr.Video()
output_image = gr.Image(type="numpy")
output_image_paste_back = gr.Image(type="numpy")
retargeting_output_image = gr.Image(type="numpy")
retargeting_output_image_paste_back = gr.Image(type="numpy")
output_video = gr.Video(autoplay=False)
output_video_paste_back = gr.Video(autoplay=False)
output_video_i2v = gr.Video(autoplay=False)
@ -250,15 +271,46 @@ with gr.Blocks(theme=gr.themes.Soft(font=[gr.themes.GoogleFont("Plus Jakarta San
gr.Markdown(load_description("assets/gradio/gradio_description_retargeting.md"), visible=True)
with gr.Row(visible=True):
flag_do_crop_input_retargeting_image = gr.Checkbox(value=True, label="do crop (source)")
flag_stitching_retargeting_input = gr.Checkbox(value=True, label="stitching")
retargeting_source_scale.render()
eye_retargeting_slider.render()
lip_retargeting_slider.render()
with gr.Row(visible=True):
with gr.Column():
with gr.Accordion(open=True, label="Facial movement sliders"):
with gr.Row(visible=True):
head_pitch_slider.render()
head_yaw_slider.render()
head_roll_slider.render()
with gr.Row(visible=True):
process_button_retargeting = gr.Button("🚗 Retargeting Image", variant="primary")
mov_x.render()
mov_y.render()
mov_z.render()
with gr.Column():
with gr.Accordion(open=True, label="Facial expression sliders"):
with gr.Row(visible=True):
lip_variation_zero.render()
lip_variation_one.render()
lip_variation_two.render()
with gr.Row(visible=True):
lip_variation_three.render()
smile.render()
wink.render()
with gr.Row(visible=True):
eyebrow.render()
eyeball_direction_x.render()
eyeball_direction_y.render()
with gr.Row(visible=True):
reset_button = gr.Button("🔄 Reset")
reset_button.click(
fn=reset_sliders,
inputs=None,
outputs=[
head_pitch_slider, head_yaw_slider, head_roll_slider, mov_x, mov_y, mov_z,
lip_variation_zero, lip_variation_one, lip_variation_two, lip_variation_three, smile, wink, eyebrow, eyeball_direction_x, eyeball_direction_y,
retargeting_source_scale, flag_stitching_retargeting_input, flag_do_crop_input_retargeting_image
]
)
with gr.Row(visible=True):
with gr.Column():
with gr.Accordion(open=True, label="Retargeting Image Input"):
@ -272,28 +324,24 @@ with gr.Blocks(theme=gr.themes.Soft(font=[gr.themes.GoogleFont("Plus Jakarta San
[osp.join(example_portrait_dir, "s7.jpg")],
[osp.join(example_portrait_dir, "s12.jpg")],
[osp.join(example_portrait_dir, "s22.jpg")],
[osp.join(example_portrait_dir, "s23.jpg")],
# [osp.join(example_portrait_dir, "s23.jpg")],
[osp.join(example_portrait_dir, "s42.jpg")],
],
inputs=[retargeting_input_image],
cache_examples=False,
)
with gr.Column():
with gr.Accordion(open=True, label="Retargeting Result"):
output_image.render()
retargeting_output_image.render()
with gr.Column():
with gr.Accordion(open=True, label="Paste-back Result"):
output_image_paste_back.render()
retargeting_output_image_paste_back.render()
with gr.Row(visible=True):
process_button_reset_retargeting = gr.ClearButton(
[
eye_retargeting_slider,
lip_retargeting_slider,
head_pitch_slider,
head_yaw_slider,
head_roll_slider,
retargeting_input_image,
output_image,
output_image_paste_back
retargeting_output_image,
retargeting_output_image_paste_back,
],
value="🧹 Clear"
)
@ -306,7 +354,7 @@ with gr.Blocks(theme=gr.themes.Soft(font=[gr.themes.GoogleFont("Plus Jakarta San
video_lip_retargeting_slider.render()
driving_smooth_observation_variance_retargeting.render()
with gr.Row(visible=True):
process_button_retargeting_video = gr.Button("🍄 Retargeting Video", variant="primary")
process_button_retargeting_video = gr.Button("🚗 Retargeting Video", variant="primary")
with gr.Row(visible=True):
with gr.Column():
with gr.Accordion(open=True, label="Retargeting Video Input"):
@ -369,16 +417,21 @@ with gr.Blocks(theme=gr.themes.Soft(font=[gr.themes.GoogleFont("Plus Jakarta San
retargeting_input_image.change(
fn=gradio_pipeline.init_retargeting_image,
inputs=[retargeting_source_scale, retargeting_input_image],
inputs=[retargeting_source_scale, eye_retargeting_slider, lip_retargeting_slider, retargeting_input_image],
outputs=[eye_retargeting_slider, lip_retargeting_slider]
)
process_button_retargeting.click(
# fn=gradio_pipeline.execute_image,
sliders = [eye_retargeting_slider, lip_retargeting_slider, head_pitch_slider, head_yaw_slider, head_roll_slider, mov_x, mov_y, mov_z, lip_variation_zero, lip_variation_one, lip_variation_two, lip_variation_three, smile, wink, eyebrow, eyeball_direction_x, eyeball_direction_y]
for slider in sliders:
# NOTE: gradio >= 4.0.0 may cause slow response
slider.change(
fn=gpu_wrapped_execute_image_retargeting,
inputs=[eye_retargeting_slider, lip_retargeting_slider, head_pitch_slider, head_yaw_slider, head_roll_slider, retargeting_input_image, retargeting_source_scale, flag_do_crop_input_retargeting_image],
outputs=[output_image, output_image_paste_back],
show_progress=True
inputs=[
eye_retargeting_slider, lip_retargeting_slider, head_pitch_slider, head_yaw_slider, head_roll_slider, mov_x, mov_y, mov_z,
lip_variation_zero, lip_variation_one, lip_variation_two, lip_variation_three, smile, wink, eyebrow, eyeball_direction_x, eyeball_direction_y,
retargeting_input_image, retargeting_source_scale, flag_stitching_retargeting_input, flag_do_crop_input_retargeting_image
],
outputs=[retargeting_output_image, retargeting_output_image_paste_back],
)
process_button_retargeting_video.click(

View File

@ -0,0 +1,7 @@
## Precise Portrait Editing
<p align="center">
<img src="../editing-portrait-2024-08-06.jpg" alt="LivePortrait" width="960px">
<br>
Portrait Editing in the Gradio Interface
</p>

Binary file not shown.

After

Width:  |  Height:  |  Size: 301 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 88 KiB

View File

@ -6,9 +6,8 @@
<div style="display: flex; justify-content: center; align-items: center; text-align: center; font-size: 1.2em;">
<div>
<h2>Retargeting Image</h2>
<p>Upload a Source Portrait as Retargeting Input, then drag the sliders and click the <strong>🚗 Retargeting Image</strong> button. You can try running it multiple times.
<br>
<h2>Retargeting and Editing Portraits</h2>
<p>Upload a source portrait, and the <code>eyes-open ratio</code> and <code>lip-open ratio</code> will be auto-calculated. Adjust the sliders to see instant edits. Feel free to experiment! 🎨</p>
<strong>😊 Set both target eyes-open and lip-open ratios to 0.8 to see what's going on!</strong></p>
</div>
</div>

View File

@ -2,7 +2,7 @@
<div style="display: flex; justify-content: center; align-items: center; text-align: center; font-size: 1.2em;">
<div>
<h2>Retargeting Video</h2>
<p>Upload a Source Video as Retargeting Input, then drag the sliders and click the <strong>🍄 Retargeting Video</strong> button. You can try running it multiple times.
<p>Upload a Source Video as Retargeting Input, then drag the sliders and click the <strong>🚗 Retargeting Video</strong> button. You can try running it multiple times.
<br>
<strong>🤐 Set target lip-open ratio to 0 to see what's going on!</strong></p>
</div>

View File

@ -38,7 +38,8 @@
## 🔥 Updates
- **`2024/08/05`**: 📦 Windows users download the [one-click installer](https://huggingface.co/cleardusk/LivePortrait-Windows/blob/main/LivePortrait-Windows-v20240805.zip) for Humans mode and **Animals mode** now! For details, see [**here**](./assets/docs/changelog/2024-08-05.md).
- **`2024/08/06`**: 🎨 We support **precise portrait editing** in the Gradio interface, insipred by [ComfyUI-AdvancedLivePortrait](https://github.com/PowerHouseMan/ComfyUI-AdvancedLivePortrait). See [**here**](./assets/docs/changelog/2024-08-06.md).
- **`2024/08/05`**: 📦 Windows users can now download the [one-click installer](https://huggingface.co/cleardusk/LivePortrait-Windows/blob/main/LivePortrait-Windows-v20240805.zip) for Humans mode and **Animals mode** now! For details, see [**here**](./assets/docs/changelog/2024-08-05.md).
- **`2024/08/02`**: 😸 We released a version of the **Animals model**, along with several other updates and improvements. Check out the details [**here**](./assets/docs/changelog/2024-08-02.md)!
- **`2024/07/25`**: 📦 Windows users can now download the package from [HuggingFace](https://huggingface.co/cleardusk/LivePortrait-Windows/tree/main) or [BaiduYun](https://pan.baidu.com/s/1FWsWqKe0eNfXrwjEhhCqlw?pwd=86q2). Simply unzip and double-click `run_windows.bat` to enjoy!
- **`2024/07/24`**: 🎨 We support pose editing for source portraits in the Gradio interface. Weve also lowered the default detection threshold to increase recall. [Have fun](assets/docs/changelog/2024-07-24.md)!

View File

@ -43,6 +43,104 @@ class GradioPipeline(LivePortraitPipeline):
# self.live_portrait_wrapper = self.live_portrait_wrapper
self.args = args
@torch.no_grad()
def update_delta_new_eyeball_direction(self, eyeball_direction_x, eyeball_direction_y, delta_new, **kwargs):
if eyeball_direction_x > 0:
delta_new[0, 11, 0] += eyeball_direction_x * 0.0007
delta_new[0, 15, 0] += eyeball_direction_x * 0.001
else:
delta_new[0, 11, 0] += eyeball_direction_x * 0.001
delta_new[0, 15, 0] += eyeball_direction_x * 0.0007
delta_new[0, 11, 1] += eyeball_direction_y * -0.001
delta_new[0, 15, 1] += eyeball_direction_y * -0.001
blink = -eyeball_direction_y / 2.
delta_new[0, 11, 1] += blink * -0.001
delta_new[0, 13, 1] += blink * 0.0003
delta_new[0, 15, 1] += blink * -0.001
delta_new[0, 16, 1] += blink * 0.0003
return delta_new
@torch.no_grad()
def update_delta_new_smile(self, smile, delta_new, **kwargs):
delta_new[0, 20, 1] += smile * -0.01
delta_new[0, 14, 1] += smile * -0.02
delta_new[0, 17, 1] += smile * 0.0065
delta_new[0, 17, 2] += smile * 0.003
delta_new[0, 13, 1] += smile * -0.00275
delta_new[0, 16, 1] += smile * -0.00275
delta_new[0, 3, 1] += smile * -0.0035
delta_new[0, 7, 1] += smile * -0.0035
return delta_new
@torch.no_grad()
def update_delta_new_wink(self, wink, delta_new, **kwargs):
delta_new[0, 11, 1] += wink * 0.001
delta_new[0, 13, 1] += wink * -0.0003
delta_new[0, 17, 0] += wink * 0.0003
delta_new[0, 17, 1] += wink * 0.0003
delta_new[0, 3, 1] += wink * -0.0003
return delta_new
@torch.no_grad()
def update_delta_new_eyebrow(self, eyebrow, delta_new, **kwargs):
if eyebrow > 0:
delta_new[0, 1, 1] += eyebrow * 0.001
delta_new[0, 2, 1] += eyebrow * -0.001
else:
delta_new[0, 1, 0] += eyebrow * -0.001
delta_new[0, 2, 0] += eyebrow * 0.001
delta_new[0, 1, 1] += eyebrow * 0.0003
delta_new[0, 2, 1] += eyebrow * -0.0003
return delta_new
@torch.no_grad()
def update_delta_new_lip_variation_zero(self, lip_variation_zero, delta_new, **kwargs):
delta_new[0, 19, 0] += lip_variation_zero
return delta_new
@torch.no_grad()
def update_delta_new_lip_variation_one(self, lip_variation_one, delta_new, **kwargs):
delta_new[0, 14, 1] += lip_variation_one * 0.001
delta_new[0, 3, 1] += lip_variation_one * -0.0005
delta_new[0, 7, 1] += lip_variation_one * -0.0005
delta_new[0, 17, 2] += lip_variation_one * -0.0005
return delta_new
@torch.no_grad()
def update_delta_new_lip_variation_two(self, lip_variation_two, delta_new, **kwargs):
delta_new[0, 20, 2] += lip_variation_two * -0.001
delta_new[0, 20, 1] += lip_variation_two * -0.001
delta_new[0, 14, 1] += lip_variation_two * -0.001
return delta_new
@torch.no_grad()
def update_delta_new_lip_variation_three(self, lip_variation_three, delta_new, **kwargs):
delta_new[0, 19, 1] += lip_variation_three * 0.001
delta_new[0, 19, 2] += lip_variation_three * 0.0001
delta_new[0, 17, 1] += lip_variation_three * -0.0001
return delta_new
@torch.no_grad()
def update_delta_new_mov_x(self, mov_x, delta_new, **kwargs):
delta_new[0, 5, 0] += mov_x
return delta_new
@torch.no_grad()
def update_delta_new_mov_y(self, mov_y, delta_new, **kwargs):
delta_new[0, 5, 1] += mov_y
return delta_new
@torch.no_grad()
def execute_video(
self,
@ -112,14 +210,37 @@ class GradioPipeline(LivePortraitPipeline):
raise gr.Error("Please upload the source portrait or source video, and driving video 🤗🤗🤗", duration=5)
@torch.no_grad()
def execute_image_retargeting(self, input_eye_ratio: float, input_lip_ratio: float, input_head_pitch_variation: float, input_head_yaw_variation: float, input_head_roll_variation: float, input_image, retargeting_source_scale: float, flag_do_crop_input_retargeting_image=True):
def execute_image_retargeting(
self,
input_eye_ratio: float,
input_lip_ratio: float,
input_head_pitch_variation: float,
input_head_yaw_variation: float,
input_head_roll_variation: float,
mov_x: float,
mov_y: float,
mov_z: float,
lip_variation_zero: float,
lip_variation_one: float,
lip_variation_two: float,
lip_variation_three: float,
smile: float,
wink: float,
eyebrow: float,
eyeball_direction_x: float,
eyeball_direction_y: float,
input_image,
retargeting_source_scale: float,
flag_stitching_retargeting_input=True,
flag_do_crop_input_retargeting_image=True):
""" for single image retargeting
"""
if input_head_pitch_variation is None or input_head_yaw_variation is None or input_head_roll_variation is None:
raise gr.Error("Invalid relative pose input 💥!", duration=5)
# disposable feature
f_s_user, x_s_user, R_s_user, R_d_user, x_s_info, source_lmk_user, crop_M_c2o, mask_ori, img_rgb = \
self.prepare_retargeting_image(input_image, input_head_pitch_variation, input_head_yaw_variation, input_head_roll_variation, retargeting_source_scale, flag_do_crop=flag_do_crop_input_retargeting_image)
self.prepare_retargeting_image(
input_image, input_head_pitch_variation, input_head_yaw_variation, input_head_roll_variation, retargeting_source_scale, flag_do_crop=flag_do_crop_input_retargeting_image)
if input_eye_ratio is None or input_lip_ratio is None:
raise gr.Error("Invalid ratio input 💥!", duration=5)
@ -130,6 +251,18 @@ class GradioPipeline(LivePortraitPipeline):
f_s_user = f_s_user.to(device)
R_s_user = R_s_user.to(device)
R_d_user = R_d_user.to(device)
mov_x = torch.tensor(mov_x).to(device)
mov_y = torch.tensor(mov_y).to(device)
mov_z = torch.tensor(mov_z).to(device)
eyeball_direction_x = torch.tensor(eyeball_direction_x).to(device)
eyeball_direction_y = torch.tensor(eyeball_direction_y).to(device)
smile = torch.tensor(smile).to(device)
wink = torch.tensor(wink).to(device)
eyebrow = torch.tensor(eyebrow).to(device)
lip_variation_zero = torch.tensor(lip_variation_zero).to(device)
lip_variation_one = torch.tensor(lip_variation_one).to(device)
lip_variation_two = torch.tensor(lip_variation_two).to(device)
lip_variation_three = torch.tensor(lip_variation_three).to(device)
x_c_s = x_s_info['kp'].to(device)
delta_new = x_s_info['exp'].to(device)
@ -137,27 +270,56 @@ class GradioPipeline(LivePortraitPipeline):
t_new = x_s_info['t'].to(device)
R_d_new = (R_d_user @ R_s_user.permute(0, 2, 1)) @ R_s_user
x_d_new = scale_new * (x_c_s @ R_d_new + delta_new) + t_new
# ∆_eyes,i = R_eyes(x_s; c_s,eyes, c_d,eyes,i)
if eyeball_direction_x != 0 or eyeball_direction_y != 0:
delta_new = self.update_delta_new_eyeball_direction(eyeball_direction_x, eyeball_direction_y, delta_new)
if smile != 0:
delta_new = self.update_delta_new_smile(smile, delta_new)
if wink != 0:
delta_new = self.update_delta_new_wink(wink, delta_new)
if eyebrow != 0:
delta_new = self.update_delta_new_eyebrow(eyebrow, delta_new)
if lip_variation_zero != 0:
delta_new = self.update_delta_new_lip_variation_zero(lip_variation_zero, delta_new)
if lip_variation_one != 0:
delta_new = self.update_delta_new_lip_variation_one(lip_variation_one, delta_new)
if lip_variation_two != 0:
delta_new = self.update_delta_new_lip_variation_two(lip_variation_two, delta_new)
if lip_variation_three != 0:
delta_new = self.update_delta_new_lip_variation_three(lip_variation_three, delta_new)
if mov_x != 0:
delta_new = self.update_delta_new_mov_x(-mov_x, delta_new)
if mov_y !=0 :
delta_new = self.update_delta_new_mov_y(mov_y, delta_new)
x_d_new = mov_z * scale_new * (x_c_s @ R_d_new + delta_new) + t_new
eyes_delta, lip_delta = None, None
if input_eye_ratio != self.source_eye_ratio:
combined_eye_ratio_tensor = self.live_portrait_wrapper.calc_combined_eye_ratio([[float(input_eye_ratio)]], source_lmk_user)
eyes_delta = self.live_portrait_wrapper.retarget_eye(x_s_user, combined_eye_ratio_tensor)
# ∆_lip,i = R_lip(x_s; c_s,lip, c_d,lip,i)
if input_lip_ratio != self.source_lip_ratio:
combined_lip_ratio_tensor = self.live_portrait_wrapper.calc_combined_lip_ratio([[float(input_lip_ratio)]], source_lmk_user)
lip_delta = self.live_portrait_wrapper.retarget_lip(x_s_user, combined_lip_ratio_tensor)
x_d_new = x_d_new + eyes_delta + lip_delta
x_d_new = x_d_new + \
(eyes_delta if eyes_delta is not None else 0) + \
(lip_delta if lip_delta is not None else 0)
if flag_stitching_retargeting_input:
x_d_new = self.live_portrait_wrapper.stitching(x_s_user, x_d_new)
# D(W(f_s; x_s, x_d))
out = self.live_portrait_wrapper.warp_decode(f_s_user, x_s_user, x_d_new)
out = self.live_portrait_wrapper.parse_output(out['out'])[0]
if flag_do_crop_input_retargeting_image:
out_to_ori_blend = paste_back(out, crop_M_c2o, img_rgb, mask_ori)
else:
out_to_ori_blend = out
gr.Info("Run successfully!", duration=2)
return out, out_to_ori_blend
@torch.no_grad()
def prepare_retargeting_image(self, input_image, input_head_pitch_variation, input_head_yaw_variation, input_head_roll_variation, retargeting_source_scale, flag_do_crop=True):
def prepare_retargeting_image(
self,
input_image,
input_head_pitch_variation, input_head_yaw_variation, input_head_roll_variation,
retargeting_source_scale,
flag_do_crop=True):
""" for single image retargeting
"""
if input_image is not None:
@ -168,7 +330,6 @@ class GradioPipeline(LivePortraitPipeline):
inference_cfg = self.live_portrait_wrapper.inference_cfg
######## process source portrait ########
img_rgb = load_img_online(input_image, mode='rgb', max_dim=1280, n=2)
log(f"Load source image from {input_image}.")
if flag_do_crop:
crop_info = self.cropper.crop_source_image(img_rgb, self.cropper.crop_cfg)
I_s = self.live_portrait_wrapper.prepare_source(crop_info['img_crop_256x256'])
@ -181,27 +342,27 @@ class GradioPipeline(LivePortraitPipeline):
crop_M_c2o = None
mask_ori = None
x_s_info = self.live_portrait_wrapper.get_kp_info(I_s)
x_s_info_user_pitch = x_s_info['pitch'] + input_head_pitch_variation
x_s_info_user_yaw = x_s_info['yaw'] + input_head_yaw_variation
x_s_info_user_roll = x_s_info['roll'] + input_head_roll_variation
x_d_info_user_pitch = x_s_info['pitch'] + input_head_pitch_variation
x_d_info_user_yaw = x_s_info['yaw'] + input_head_yaw_variation
x_d_info_user_roll = x_s_info['roll'] + input_head_roll_variation
R_s_user = get_rotation_matrix(x_s_info['pitch'], x_s_info['yaw'], x_s_info['roll'])
R_d_user = get_rotation_matrix(x_s_info_user_pitch, x_s_info_user_yaw, x_s_info_user_roll)
R_d_user = get_rotation_matrix(x_d_info_user_pitch, x_d_info_user_yaw, x_d_info_user_roll)
############################################
f_s_user = self.live_portrait_wrapper.extract_feature_3d(I_s)
x_s_user = self.live_portrait_wrapper.transform_keypoint(x_s_info)
return f_s_user, x_s_user, R_s_user, R_d_user, x_s_info, source_lmk_user, crop_M_c2o, mask_ori, img_rgb
else:
# when press the clear button, go here
raise gr.Error("Please upload a source portrait as the retargeting input 🤗🤗🤗", duration=5)
def init_retargeting_image(self, retargeting_source_scale: float, input_image = None):
@torch.no_grad()
def init_retargeting_image(self, retargeting_source_scale: float, source_eye_ratio: float, source_lip_ratio:float, input_image = None):
""" initialize the retargeting slider
"""
if input_image != None:
args_user = {'scale': retargeting_source_scale}
self.args = update_args(self.args, args_user)
self.cropper.update_config(self.args.__dict__)
inference_cfg = self.live_portrait_wrapper.inference_cfg
# inference_cfg = self.live_portrait_wrapper.inference_cfg
######## process source portrait ########
img_rgb = load_img_online(input_image, mode='rgb', max_dim=1280, n=16)
log(f"Load source image from {input_image}.")
@ -210,9 +371,14 @@ class GradioPipeline(LivePortraitPipeline):
raise gr.Error("Source portrait NO face detected", duration=2)
source_eye_ratio = calc_eye_close_ratio(crop_info['lmk_crop'][None])
source_lip_ratio = calc_lip_close_ratio(crop_info['lmk_crop'][None])
return round(float(source_eye_ratio.mean()), 2), round(source_lip_ratio[0][0], 2)
return 0., 0.
self.source_eye_ratio = round(float(source_eye_ratio.mean()), 2)
self.source_lip_ratio = round(float(source_lip_ratio[0][0]), 2)
log("Calculating eyes-open and lip-open ratios successfully!")
return self.source_eye_ratio, self.source_lip_ratio
else:
return source_eye_ratio, source_lip_ratio
@torch.no_grad()
def execute_video_retargeting(self, input_lip_ratio: float, input_video, retargeting_source_scale: float, driving_smooth_observation_variance_retargeting: float, flag_do_crop_input_retargeting_video=True):
""" retargeting the lip-open ratio of each source frame
"""
@ -277,6 +443,7 @@ class GradioPipeline(LivePortraitPipeline):
gr.Info("Run successfully!", duration=2)
return wfp_concat, wfp
@torch.no_grad()
def prepare_retargeting_video(self, input_video, retargeting_source_scale, device, input_lip_ratio, driving_smooth_observation_variance_retargeting, flag_do_crop=True):
""" for video retargeting
"""