diff --git a/app.py b/app.py index c1e895a..b0d81dd 100644 --- a/app.py +++ b/app.py @@ -55,6 +55,7 @@ if args.gradio_temp_dir not in (None, ''): os.environ["GRADIO_TEMP_DIR"] = args.gradio_temp_dir os.makedirs(args.gradio_temp_dir, exist_ok=True) + def gpu_wrapped_execute_video(*args, **kwargs): return gradio_pipeline.execute_video(*args, **kwargs) @@ -62,9 +63,15 @@ def gpu_wrapped_execute_video(*args, **kwargs): def gpu_wrapped_execute_image_retargeting(*args, **kwargs): return gradio_pipeline.execute_image_retargeting(*args, **kwargs) + def gpu_wrapped_execute_video_retargeting(*args, **kwargs): return gradio_pipeline.execute_video_retargeting(*args, **kwargs) + +def reset_sliders(*args, **kwargs): + return 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.5, True, True + + # assets title_md = "assets/gradio/gradio_title.md" example_portrait_dir = "assets/examples/source" @@ -97,10 +104,24 @@ video_lip_retargeting_slider = gr.Slider(minimum=0, maximum=0.8, step=0.01, labe head_pitch_slider = gr.Slider(minimum=-15.0, maximum=15.0, value=0, step=1, label="relative pitch") head_yaw_slider = gr.Slider(minimum=-25, maximum=25, value=0, step=1, label="relative yaw") head_roll_slider = gr.Slider(minimum=-15.0, maximum=15.0, value=0, step=1, label="relative roll") +mov_x = gr.Slider(minimum=-0.19, maximum=0.19, value=0.0, step=0.01, label="x-axis movement") +mov_y = gr.Slider(minimum=-0.19, maximum=0.19, value=0.0, step=0.01, label="y-axis movement") +mov_z = gr.Slider(minimum=0.9, maximum=1.2, value=1.0, step=0.01, label="z-axis movement") +lip_variation_zero = gr.Slider(minimum=-0.09, maximum=0.09, value=0, step=0.01, label="pouting") +lip_variation_one = gr.Slider(minimum=-20.0, maximum=15.0, value=0, step=0.01, label="pursing ๐Ÿ˜") +lip_variation_two = gr.Slider(minimum=0.0, maximum=15.0, value=0, step=0.01, label="grin ๐Ÿ˜") +lip_variation_three = gr.Slider(minimum=-90.0, maximum=120.0, value=0, step=1.0, label="lip close <-> open") +smile = gr.Slider(minimum=-0.3, maximum=1.3, value=0, step=0.01, label="smile ๐Ÿ˜„") +wink = gr.Slider(minimum=0, maximum=39, value=0, step=0.01, label="wink ๐Ÿ˜‰") +eyebrow = gr.Slider(minimum=-30, maximum=30, value=0, step=0.01, label="eyebrow ๐Ÿคจ") +eyeball_direction_x = gr.Slider(minimum=-30.0, maximum=30.0, value=0, step=0.01, label="eye gaze (horizontal) ๐Ÿ‘€") +eyeball_direction_y = gr.Slider(minimum=-63.0, maximum=63.0, value=0, step=0.01, label="eye gaze (vertical) ๐Ÿ™„") retargeting_input_image = gr.Image(type="filepath") retargeting_input_video = gr.Video() output_image = gr.Image(type="numpy") output_image_paste_back = gr.Image(type="numpy") +retargeting_output_image = gr.Image(type="numpy") +retargeting_output_image_paste_back = gr.Image(type="numpy") output_video = gr.Video(autoplay=False) output_video_paste_back = gr.Video(autoplay=False) output_video_i2v = gr.Video(autoplay=False) @@ -250,15 +271,46 @@ with gr.Blocks(theme=gr.themes.Soft(font=[gr.themes.GoogleFont("Plus Jakarta San gr.Markdown(load_description("assets/gradio/gradio_description_retargeting.md"), visible=True) with gr.Row(visible=True): flag_do_crop_input_retargeting_image = gr.Checkbox(value=True, label="do crop (source)") + flag_stitching_retargeting_input = gr.Checkbox(value=True, label="stitching") retargeting_source_scale.render() eye_retargeting_slider.render() lip_retargeting_slider.render() with gr.Row(visible=True): - head_pitch_slider.render() - head_yaw_slider.render() - head_roll_slider.render() + with gr.Column(): + with gr.Accordion(open=True, label="Facial movement sliders"): + with gr.Row(visible=True): + head_pitch_slider.render() + head_yaw_slider.render() + head_roll_slider.render() + with gr.Row(visible=True): + mov_x.render() + mov_y.render() + mov_z.render() + with gr.Column(): + with gr.Accordion(open=True, label="Facial expression sliders"): + with gr.Row(visible=True): + lip_variation_zero.render() + lip_variation_one.render() + lip_variation_two.render() + with gr.Row(visible=True): + lip_variation_three.render() + smile.render() + wink.render() + with gr.Row(visible=True): + eyebrow.render() + eyeball_direction_x.render() + eyeball_direction_y.render() with gr.Row(visible=True): - process_button_retargeting = gr.Button("๐Ÿš— Retargeting Image", variant="primary") + reset_button = gr.Button("๐Ÿ”„ Reset") + reset_button.click( + fn=reset_sliders, + inputs=None, + outputs=[ + head_pitch_slider, head_yaw_slider, head_roll_slider, mov_x, mov_y, mov_z, + lip_variation_zero, lip_variation_one, lip_variation_two, lip_variation_three, smile, wink, eyebrow, eyeball_direction_x, eyeball_direction_y, + retargeting_source_scale, flag_stitching_retargeting_input, flag_do_crop_input_retargeting_image + ] + ) with gr.Row(visible=True): with gr.Column(): with gr.Accordion(open=True, label="Retargeting Image Input"): @@ -272,28 +324,24 @@ with gr.Blocks(theme=gr.themes.Soft(font=[gr.themes.GoogleFont("Plus Jakarta San [osp.join(example_portrait_dir, "s7.jpg")], [osp.join(example_portrait_dir, "s12.jpg")], [osp.join(example_portrait_dir, "s22.jpg")], - [osp.join(example_portrait_dir, "s23.jpg")], + # [osp.join(example_portrait_dir, "s23.jpg")], + [osp.join(example_portrait_dir, "s42.jpg")], ], inputs=[retargeting_input_image], cache_examples=False, ) with gr.Column(): with gr.Accordion(open=True, label="Retargeting Result"): - output_image.render() + retargeting_output_image.render() with gr.Column(): with gr.Accordion(open=True, label="Paste-back Result"): - output_image_paste_back.render() + retargeting_output_image_paste_back.render() with gr.Row(visible=True): process_button_reset_retargeting = gr.ClearButton( [ - eye_retargeting_slider, - lip_retargeting_slider, - head_pitch_slider, - head_yaw_slider, - head_roll_slider, retargeting_input_image, - output_image, - output_image_paste_back + retargeting_output_image, + retargeting_output_image_paste_back, ], value="๐Ÿงน Clear" ) @@ -306,7 +354,7 @@ with gr.Blocks(theme=gr.themes.Soft(font=[gr.themes.GoogleFont("Plus Jakarta San video_lip_retargeting_slider.render() driving_smooth_observation_variance_retargeting.render() with gr.Row(visible=True): - process_button_retargeting_video = gr.Button("๐Ÿ„ Retargeting Video", variant="primary") + process_button_retargeting_video = gr.Button("๐Ÿš— Retargeting Video", variant="primary") with gr.Row(visible=True): with gr.Column(): with gr.Accordion(open=True, label="Retargeting Video Input"): @@ -369,17 +417,22 @@ with gr.Blocks(theme=gr.themes.Soft(font=[gr.themes.GoogleFont("Plus Jakarta San retargeting_input_image.change( fn=gradio_pipeline.init_retargeting_image, - inputs=[retargeting_source_scale, retargeting_input_image], + inputs=[retargeting_source_scale, eye_retargeting_slider, lip_retargeting_slider, retargeting_input_image], outputs=[eye_retargeting_slider, lip_retargeting_slider] ) - process_button_retargeting.click( - # fn=gradio_pipeline.execute_image, - fn=gpu_wrapped_execute_image_retargeting, - inputs=[eye_retargeting_slider, lip_retargeting_slider, head_pitch_slider, head_yaw_slider, head_roll_slider, retargeting_input_image, retargeting_source_scale, flag_do_crop_input_retargeting_image], - outputs=[output_image, output_image_paste_back], - show_progress=True - ) + sliders = [eye_retargeting_slider, lip_retargeting_slider, head_pitch_slider, head_yaw_slider, head_roll_slider, mov_x, mov_y, mov_z, lip_variation_zero, lip_variation_one, lip_variation_two, lip_variation_three, smile, wink, eyebrow, eyeball_direction_x, eyeball_direction_y] + for slider in sliders: + # NOTE: gradio >= 4.0.0 may cause slow response + slider.change( + fn=gpu_wrapped_execute_image_retargeting, + inputs=[ + eye_retargeting_slider, lip_retargeting_slider, head_pitch_slider, head_yaw_slider, head_roll_slider, mov_x, mov_y, mov_z, + lip_variation_zero, lip_variation_one, lip_variation_two, lip_variation_three, smile, wink, eyebrow, eyeball_direction_x, eyeball_direction_y, + retargeting_input_image, retargeting_source_scale, flag_stitching_retargeting_input, flag_do_crop_input_retargeting_image + ], + outputs=[retargeting_output_image, retargeting_output_image_paste_back], + ) process_button_retargeting_video.click( fn=gpu_wrapped_execute_video_retargeting, diff --git a/assets/docs/changelog/2024-08-06.md b/assets/docs/changelog/2024-08-06.md new file mode 100644 index 0000000..c96a11f --- /dev/null +++ b/assets/docs/changelog/2024-08-06.md @@ -0,0 +1,7 @@ +## Precise Portrait Editing + +

+ LivePortrait +
+ Portrait Editing in the Gradio Interface +

diff --git a/assets/docs/editing-portrait-2024-08-06.jpg b/assets/docs/editing-portrait-2024-08-06.jpg new file mode 100644 index 0000000..0b09f18 Binary files /dev/null and b/assets/docs/editing-portrait-2024-08-06.jpg differ diff --git a/assets/examples/source/s42.jpg b/assets/examples/source/s42.jpg new file mode 100644 index 0000000..15b9deb Binary files /dev/null and b/assets/examples/source/s42.jpg differ diff --git a/assets/gradio/gradio_description_retargeting.md b/assets/gradio/gradio_description_retargeting.md index 5978d08..e483ec6 100644 --- a/assets/gradio/gradio_description_retargeting.md +++ b/assets/gradio/gradio_description_retargeting.md @@ -6,9 +6,8 @@
-

Retargeting Image

-

Upload a Source Portrait as Retargeting Input, then drag the sliders and click the ๐Ÿš— Retargeting Image button. You can try running it multiple times. -
+

Retargeting and Editing Portraits

+

Upload a source portrait, and the eyes-open ratio and lip-open ratio will be auto-calculated. Adjust the sliders to see instant edits. Feel free to experiment! ๐ŸŽจ

๐Ÿ˜Š Set both target eyes-open and lip-open ratios to 0.8 to see what's going on!

diff --git a/assets/gradio/gradio_description_retargeting_video.md b/assets/gradio/gradio_description_retargeting_video.md index e54be9e..9d6bb8c 100644 --- a/assets/gradio/gradio_description_retargeting_video.md +++ b/assets/gradio/gradio_description_retargeting_video.md @@ -2,7 +2,7 @@

Retargeting Video

-

Upload a Source Video as Retargeting Input, then drag the sliders and click the ๐Ÿ„ Retargeting Video button. You can try running it multiple times. +

Upload a Source Video as Retargeting Input, then drag the sliders and click the ๐Ÿš— Retargeting Video button. You can try running it multiple times.
๐Ÿค Set target lip-open ratio to 0 to see what's going on!

diff --git a/readme.md b/readme.md index 8848e5b..b33f878 100644 --- a/readme.md +++ b/readme.md @@ -38,7 +38,8 @@ ## ๐Ÿ”ฅ Updates -- **`2024/08/05`**: ๐Ÿ“ฆ Windows users download the [one-click installer](https://huggingface.co/cleardusk/LivePortrait-Windows/blob/main/LivePortrait-Windows-v20240805.zip) for Humans mode and **Animals mode** now! For details, see [**here**](./assets/docs/changelog/2024-08-05.md). +- **`2024/08/06`**: ๐ŸŽจ We support **precise portrait editing** in the Gradio interface, insipred by [ComfyUI-AdvancedLivePortrait](https://github.com/PowerHouseMan/ComfyUI-AdvancedLivePortrait). See [**here**](./assets/docs/changelog/2024-08-06.md). +- **`2024/08/05`**: ๐Ÿ“ฆ Windows users can now download the [one-click installer](https://huggingface.co/cleardusk/LivePortrait-Windows/blob/main/LivePortrait-Windows-v20240805.zip) for Humans mode and **Animals mode** now! For details, see [**here**](./assets/docs/changelog/2024-08-05.md). - **`2024/08/02`**: ๐Ÿ˜ธ We released a version of the **Animals model**, along with several other updates and improvements. Check out the details [**here**](./assets/docs/changelog/2024-08-02.md)! - **`2024/07/25`**: ๐Ÿ“ฆ Windows users can now download the package from [HuggingFace](https://huggingface.co/cleardusk/LivePortrait-Windows/tree/main) or [BaiduYun](https://pan.baidu.com/s/1FWsWqKe0eNfXrwjEhhCqlw?pwd=86q2). Simply unzip and double-click `run_windows.bat` to enjoy! - **`2024/07/24`**: ๐ŸŽจ We support pose editing for source portraits in the Gradio interface. Weโ€™ve also lowered the default detection threshold to increase recall. [Have fun](assets/docs/changelog/2024-07-24.md)! diff --git a/src/gradio_pipeline.py b/src/gradio_pipeline.py index 92e72dd..b3fd405 100644 --- a/src/gradio_pipeline.py +++ b/src/gradio_pipeline.py @@ -43,6 +43,104 @@ class GradioPipeline(LivePortraitPipeline): # self.live_portrait_wrapper = self.live_portrait_wrapper self.args = args + @torch.no_grad() + def update_delta_new_eyeball_direction(self, eyeball_direction_x, eyeball_direction_y, delta_new, **kwargs): + if eyeball_direction_x > 0: + delta_new[0, 11, 0] += eyeball_direction_x * 0.0007 + delta_new[0, 15, 0] += eyeball_direction_x * 0.001 + else: + delta_new[0, 11, 0] += eyeball_direction_x * 0.001 + delta_new[0, 15, 0] += eyeball_direction_x * 0.0007 + + delta_new[0, 11, 1] += eyeball_direction_y * -0.001 + delta_new[0, 15, 1] += eyeball_direction_y * -0.001 + blink = -eyeball_direction_y / 2. + + delta_new[0, 11, 1] += blink * -0.001 + delta_new[0, 13, 1] += blink * 0.0003 + delta_new[0, 15, 1] += blink * -0.001 + delta_new[0, 16, 1] += blink * 0.0003 + + return delta_new + + @torch.no_grad() + def update_delta_new_smile(self, smile, delta_new, **kwargs): + delta_new[0, 20, 1] += smile * -0.01 + delta_new[0, 14, 1] += smile * -0.02 + delta_new[0, 17, 1] += smile * 0.0065 + delta_new[0, 17, 2] += smile * 0.003 + delta_new[0, 13, 1] += smile * -0.00275 + delta_new[0, 16, 1] += smile * -0.00275 + delta_new[0, 3, 1] += smile * -0.0035 + delta_new[0, 7, 1] += smile * -0.0035 + + return delta_new + + @torch.no_grad() + def update_delta_new_wink(self, wink, delta_new, **kwargs): + delta_new[0, 11, 1] += wink * 0.001 + delta_new[0, 13, 1] += wink * -0.0003 + delta_new[0, 17, 0] += wink * 0.0003 + delta_new[0, 17, 1] += wink * 0.0003 + delta_new[0, 3, 1] += wink * -0.0003 + + return delta_new + + @torch.no_grad() + def update_delta_new_eyebrow(self, eyebrow, delta_new, **kwargs): + if eyebrow > 0: + delta_new[0, 1, 1] += eyebrow * 0.001 + delta_new[0, 2, 1] += eyebrow * -0.001 + else: + delta_new[0, 1, 0] += eyebrow * -0.001 + delta_new[0, 2, 0] += eyebrow * 0.001 + delta_new[0, 1, 1] += eyebrow * 0.0003 + delta_new[0, 2, 1] += eyebrow * -0.0003 + return delta_new + + @torch.no_grad() + def update_delta_new_lip_variation_zero(self, lip_variation_zero, delta_new, **kwargs): + delta_new[0, 19, 0] += lip_variation_zero + + return delta_new + + @torch.no_grad() + def update_delta_new_lip_variation_one(self, lip_variation_one, delta_new, **kwargs): + delta_new[0, 14, 1] += lip_variation_one * 0.001 + delta_new[0, 3, 1] += lip_variation_one * -0.0005 + delta_new[0, 7, 1] += lip_variation_one * -0.0005 + delta_new[0, 17, 2] += lip_variation_one * -0.0005 + + return delta_new + + @torch.no_grad() + def update_delta_new_lip_variation_two(self, lip_variation_two, delta_new, **kwargs): + delta_new[0, 20, 2] += lip_variation_two * -0.001 + delta_new[0, 20, 1] += lip_variation_two * -0.001 + delta_new[0, 14, 1] += lip_variation_two * -0.001 + + return delta_new + + @torch.no_grad() + def update_delta_new_lip_variation_three(self, lip_variation_three, delta_new, **kwargs): + delta_new[0, 19, 1] += lip_variation_three * 0.001 + delta_new[0, 19, 2] += lip_variation_three * 0.0001 + delta_new[0, 17, 1] += lip_variation_three * -0.0001 + + return delta_new + + @torch.no_grad() + def update_delta_new_mov_x(self, mov_x, delta_new, **kwargs): + delta_new[0, 5, 0] += mov_x + + return delta_new + + @torch.no_grad() + def update_delta_new_mov_y(self, mov_y, delta_new, **kwargs): + delta_new[0, 5, 1] += mov_y + + return delta_new + @torch.no_grad() def execute_video( self, @@ -112,14 +210,37 @@ class GradioPipeline(LivePortraitPipeline): raise gr.Error("Please upload the source portrait or source video, and driving video ๐Ÿค—๐Ÿค—๐Ÿค—", duration=5) @torch.no_grad() - def execute_image_retargeting(self, input_eye_ratio: float, input_lip_ratio: float, input_head_pitch_variation: float, input_head_yaw_variation: float, input_head_roll_variation: float, input_image, retargeting_source_scale: float, flag_do_crop_input_retargeting_image=True): + def execute_image_retargeting( + self, + input_eye_ratio: float, + input_lip_ratio: float, + input_head_pitch_variation: float, + input_head_yaw_variation: float, + input_head_roll_variation: float, + mov_x: float, + mov_y: float, + mov_z: float, + lip_variation_zero: float, + lip_variation_one: float, + lip_variation_two: float, + lip_variation_three: float, + smile: float, + wink: float, + eyebrow: float, + eyeball_direction_x: float, + eyeball_direction_y: float, + input_image, + retargeting_source_scale: float, + flag_stitching_retargeting_input=True, + flag_do_crop_input_retargeting_image=True): """ for single image retargeting """ if input_head_pitch_variation is None or input_head_yaw_variation is None or input_head_roll_variation is None: raise gr.Error("Invalid relative pose input ๐Ÿ’ฅ!", duration=5) # disposable feature f_s_user, x_s_user, R_s_user, R_d_user, x_s_info, source_lmk_user, crop_M_c2o, mask_ori, img_rgb = \ - self.prepare_retargeting_image(input_image, input_head_pitch_variation, input_head_yaw_variation, input_head_roll_variation, retargeting_source_scale, flag_do_crop=flag_do_crop_input_retargeting_image) + self.prepare_retargeting_image( + input_image, input_head_pitch_variation, input_head_yaw_variation, input_head_roll_variation, retargeting_source_scale, flag_do_crop=flag_do_crop_input_retargeting_image) if input_eye_ratio is None or input_lip_ratio is None: raise gr.Error("Invalid ratio input ๐Ÿ’ฅ!", duration=5) @@ -130,6 +251,18 @@ class GradioPipeline(LivePortraitPipeline): f_s_user = f_s_user.to(device) R_s_user = R_s_user.to(device) R_d_user = R_d_user.to(device) + mov_x = torch.tensor(mov_x).to(device) + mov_y = torch.tensor(mov_y).to(device) + mov_z = torch.tensor(mov_z).to(device) + eyeball_direction_x = torch.tensor(eyeball_direction_x).to(device) + eyeball_direction_y = torch.tensor(eyeball_direction_y).to(device) + smile = torch.tensor(smile).to(device) + wink = torch.tensor(wink).to(device) + eyebrow = torch.tensor(eyebrow).to(device) + lip_variation_zero = torch.tensor(lip_variation_zero).to(device) + lip_variation_one = torch.tensor(lip_variation_one).to(device) + lip_variation_two = torch.tensor(lip_variation_two).to(device) + lip_variation_three = torch.tensor(lip_variation_three).to(device) x_c_s = x_s_info['kp'].to(device) delta_new = x_s_info['exp'].to(device) @@ -137,27 +270,56 @@ class GradioPipeline(LivePortraitPipeline): t_new = x_s_info['t'].to(device) R_d_new = (R_d_user @ R_s_user.permute(0, 2, 1)) @ R_s_user - x_d_new = scale_new * (x_c_s @ R_d_new + delta_new) + t_new - # โˆ†_eyes,i = R_eyes(x_s; c_s,eyes, c_d,eyes,i) - combined_eye_ratio_tensor = self.live_portrait_wrapper.calc_combined_eye_ratio([[float(input_eye_ratio)]], source_lmk_user) - eyes_delta = self.live_portrait_wrapper.retarget_eye(x_s_user, combined_eye_ratio_tensor) - # โˆ†_lip,i = R_lip(x_s; c_s,lip, c_d,lip,i) - combined_lip_ratio_tensor = self.live_portrait_wrapper.calc_combined_lip_ratio([[float(input_lip_ratio)]], source_lmk_user) - lip_delta = self.live_portrait_wrapper.retarget_lip(x_s_user, combined_lip_ratio_tensor) - x_d_new = x_d_new + eyes_delta + lip_delta - x_d_new = self.live_portrait_wrapper.stitching(x_s_user, x_d_new) - # D(W(f_s; x_s, xโ€ฒ_d)) + if eyeball_direction_x != 0 or eyeball_direction_y != 0: + delta_new = self.update_delta_new_eyeball_direction(eyeball_direction_x, eyeball_direction_y, delta_new) + if smile != 0: + delta_new = self.update_delta_new_smile(smile, delta_new) + if wink != 0: + delta_new = self.update_delta_new_wink(wink, delta_new) + if eyebrow != 0: + delta_new = self.update_delta_new_eyebrow(eyebrow, delta_new) + if lip_variation_zero != 0: + delta_new = self.update_delta_new_lip_variation_zero(lip_variation_zero, delta_new) + if lip_variation_one != 0: + delta_new = self.update_delta_new_lip_variation_one(lip_variation_one, delta_new) + if lip_variation_two != 0: + delta_new = self.update_delta_new_lip_variation_two(lip_variation_two, delta_new) + if lip_variation_three != 0: + delta_new = self.update_delta_new_lip_variation_three(lip_variation_three, delta_new) + if mov_x != 0: + delta_new = self.update_delta_new_mov_x(-mov_x, delta_new) + if mov_y !=0 : + delta_new = self.update_delta_new_mov_y(mov_y, delta_new) + + x_d_new = mov_z * scale_new * (x_c_s @ R_d_new + delta_new) + t_new + eyes_delta, lip_delta = None, None + if input_eye_ratio != self.source_eye_ratio: + combined_eye_ratio_tensor = self.live_portrait_wrapper.calc_combined_eye_ratio([[float(input_eye_ratio)]], source_lmk_user) + eyes_delta = self.live_portrait_wrapper.retarget_eye(x_s_user, combined_eye_ratio_tensor) + if input_lip_ratio != self.source_lip_ratio: + combined_lip_ratio_tensor = self.live_portrait_wrapper.calc_combined_lip_ratio([[float(input_lip_ratio)]], source_lmk_user) + lip_delta = self.live_portrait_wrapper.retarget_lip(x_s_user, combined_lip_ratio_tensor) + x_d_new = x_d_new + \ + (eyes_delta if eyes_delta is not None else 0) + \ + (lip_delta if lip_delta is not None else 0) + + if flag_stitching_retargeting_input: + x_d_new = self.live_portrait_wrapper.stitching(x_s_user, x_d_new) out = self.live_portrait_wrapper.warp_decode(f_s_user, x_s_user, x_d_new) out = self.live_portrait_wrapper.parse_output(out['out'])[0] if flag_do_crop_input_retargeting_image: out_to_ori_blend = paste_back(out, crop_M_c2o, img_rgb, mask_ori) else: out_to_ori_blend = out - gr.Info("Run successfully!", duration=2) return out, out_to_ori_blend @torch.no_grad() - def prepare_retargeting_image(self, input_image, input_head_pitch_variation, input_head_yaw_variation, input_head_roll_variation, retargeting_source_scale, flag_do_crop=True): + def prepare_retargeting_image( + self, + input_image, + input_head_pitch_variation, input_head_yaw_variation, input_head_roll_variation, + retargeting_source_scale, + flag_do_crop=True): """ for single image retargeting """ if input_image is not None: @@ -168,7 +330,6 @@ class GradioPipeline(LivePortraitPipeline): inference_cfg = self.live_portrait_wrapper.inference_cfg ######## process source portrait ######## img_rgb = load_img_online(input_image, mode='rgb', max_dim=1280, n=2) - log(f"Load source image from {input_image}.") if flag_do_crop: crop_info = self.cropper.crop_source_image(img_rgb, self.cropper.crop_cfg) I_s = self.live_portrait_wrapper.prepare_source(crop_info['img_crop_256x256']) @@ -181,27 +342,27 @@ class GradioPipeline(LivePortraitPipeline): crop_M_c2o = None mask_ori = None x_s_info = self.live_portrait_wrapper.get_kp_info(I_s) - x_s_info_user_pitch = x_s_info['pitch'] + input_head_pitch_variation - x_s_info_user_yaw = x_s_info['yaw'] + input_head_yaw_variation - x_s_info_user_roll = x_s_info['roll'] + input_head_roll_variation + x_d_info_user_pitch = x_s_info['pitch'] + input_head_pitch_variation + x_d_info_user_yaw = x_s_info['yaw'] + input_head_yaw_variation + x_d_info_user_roll = x_s_info['roll'] + input_head_roll_variation R_s_user = get_rotation_matrix(x_s_info['pitch'], x_s_info['yaw'], x_s_info['roll']) - R_d_user = get_rotation_matrix(x_s_info_user_pitch, x_s_info_user_yaw, x_s_info_user_roll) + R_d_user = get_rotation_matrix(x_d_info_user_pitch, x_d_info_user_yaw, x_d_info_user_roll) ############################################ f_s_user = self.live_portrait_wrapper.extract_feature_3d(I_s) x_s_user = self.live_portrait_wrapper.transform_keypoint(x_s_info) return f_s_user, x_s_user, R_s_user, R_d_user, x_s_info, source_lmk_user, crop_M_c2o, mask_ori, img_rgb else: - # when press the clear button, go here raise gr.Error("Please upload a source portrait as the retargeting input ๐Ÿค—๐Ÿค—๐Ÿค—", duration=5) - def init_retargeting_image(self, retargeting_source_scale: float, input_image = None): + @torch.no_grad() + def init_retargeting_image(self, retargeting_source_scale: float, source_eye_ratio: float, source_lip_ratio:float, input_image = None): """ initialize the retargeting slider """ if input_image != None: args_user = {'scale': retargeting_source_scale} self.args = update_args(self.args, args_user) self.cropper.update_config(self.args.__dict__) - inference_cfg = self.live_portrait_wrapper.inference_cfg + # inference_cfg = self.live_portrait_wrapper.inference_cfg ######## process source portrait ######## img_rgb = load_img_online(input_image, mode='rgb', max_dim=1280, n=16) log(f"Load source image from {input_image}.") @@ -210,9 +371,14 @@ class GradioPipeline(LivePortraitPipeline): raise gr.Error("Source portrait NO face detected", duration=2) source_eye_ratio = calc_eye_close_ratio(crop_info['lmk_crop'][None]) source_lip_ratio = calc_lip_close_ratio(crop_info['lmk_crop'][None]) - return round(float(source_eye_ratio.mean()), 2), round(source_lip_ratio[0][0], 2) - return 0., 0. + self.source_eye_ratio = round(float(source_eye_ratio.mean()), 2) + self.source_lip_ratio = round(float(source_lip_ratio[0][0]), 2) + log("Calculating eyes-open and lip-open ratios successfully!") + return self.source_eye_ratio, self.source_lip_ratio + else: + return source_eye_ratio, source_lip_ratio + @torch.no_grad() def execute_video_retargeting(self, input_lip_ratio: float, input_video, retargeting_source_scale: float, driving_smooth_observation_variance_retargeting: float, flag_do_crop_input_retargeting_video=True): """ retargeting the lip-open ratio of each source frame """ @@ -277,6 +443,7 @@ class GradioPipeline(LivePortraitPipeline): gr.Info("Run successfully!", duration=2) return wfp_concat, wfp + @torch.no_grad() def prepare_retargeting_video(self, input_video, retargeting_source_scale, device, input_lip_ratio, driving_smooth_observation_variance_retargeting, flag_do_crop=True): """ for video retargeting """