From 470c58fe5a0d8325ec9f0e4f3cfd175ba0f8a18b Mon Sep 17 00:00:00 2001 From: longredzhong <774931894@qq.com> Date: Thu, 11 Jul 2024 16:30:57 +0800 Subject: [PATCH] feat: update crop configuration parameters for clarify (#97) The crop configuration parameters in `crop_config.py` have been updated. The changes include: - Updating the paths for insightface_root and landmark_ckpt_path These changes aim to improve the cropping functionality of the application. --- src/config/crop_config.py | 17 +++--- src/utils/cropper.py | 121 ++++++++++++++++++++++++-------------- 2 files changed, 86 insertions(+), 52 deletions(-) diff --git a/src/config/crop_config.py b/src/config/crop_config.py index a4c8b12..f3b12ef 100644 --- a/src/config/crop_config.py +++ b/src/config/crop_config.py @@ -4,25 +4,26 @@ parameters used for crop faces """ -import os.path as osp from dataclasses import dataclass -from typing import Union, List + from .base_config import PrintableConfig @dataclass(repr=False) # use repr from PrintableConfig class CropConfig(PrintableConfig): - device_id: int = 0 # gpu device id - flag_force_cpu: bool = False # force cpu inference, WIP + insightface_root: str = "../../pretrained_weights/insightface" + landmark_ckpt_path: str = "../../pretrained_weights/liveportrait/landmark.onnx" + device_id: int = 0 # gpu device id + flag_force_cpu: bool = False # force cpu inference, WIP ########## source image cropping option ########## dsize: int = 512 # crop size scale: float = 2.5 # scale factor vx_ratio: float = 0 # vx ratio vy_ratio: float = -0.125 # vy ratio +up, -down - max_face_num: int = 0 # max face number, 0 mean no limit + max_face_num: int = 0 # max face number, 0 mean no limit ########## driving video auto cropping option ########## - scale_crop_video: float = 2.2 #2.0 # scale factor for cropping video - vx_ratio_crop_video: float = 0. # adjust y offset + scale_crop_video: float = 2.2 # 2.0 # scale factor for cropping video + vx_ratio_crop_video: float = 0.0 # adjust y offset vy_ratio_crop_video: float = -0.1 # adjust x offset - direction: str = 'large-small' # direction of cropping + direction: str = "large-small" # direction of cropping diff --git a/src/utils/cropper.py b/src/utils/cropper.py index 81fe74d..f8847c3 100644 --- a/src/utils/cropper.py +++ b/src/utils/cropper.py @@ -1,17 +1,26 @@ # coding: utf-8 -import numpy as np import os.path as osp -from typing import List, Union, Tuple from dataclasses import dataclass, field -import cv2; cv2.setNumThreads(0); cv2.ocl.setUseOpenCL(False) +from typing import List, Tuple, Union + +import cv2 +import numpy as np + +cv2.setNumThreads(0) +cv2.ocl.setUseOpenCL(False) from ..config.crop_config import CropConfig -from .landmark_runner import LandmarkRunner +from .crop import ( + average_bbox_lst, + crop_image, + crop_image_by_bbox, + parse_bbox_from_landmark, +) from .face_analysis_diy import FaceAnalysisDIY -from .crop import crop_image, crop_image_by_bbox, parse_bbox_from_landmark, average_bbox_lst -from .rprint import rlog as log from .io import contiguous +from .landmark_runner import LandmarkRunner +from .rprint import rlog as log def make_abs_path(fn): @@ -25,40 +34,44 @@ class Trajectory: lmk_lst: Union[Tuple, List, np.ndarray] = field(default_factory=list) # lmk list bbox_lst: Union[Tuple, List, np.ndarray] = field(default_factory=list) # bbox list - frame_rgb_lst: Union[Tuple, List, np.ndarray] = field(default_factory=list) # frame list + frame_rgb_lst: Union[Tuple, List, np.ndarray] = field( + default_factory=list + ) # frame list - lmk_crop_lst: Union[Tuple, List, np.ndarray] = field(default_factory=list) # lmk list - frame_rgb_crop_lst: Union[Tuple, List, np.ndarray] = field(default_factory=list) # frame crop list + lmk_crop_lst: Union[Tuple, List, np.ndarray] = field( + default_factory=list + ) # lmk list + frame_rgb_crop_lst: Union[Tuple, List, np.ndarray] = field( + default_factory=list + ) # frame crop list class Cropper(object): def __init__(self, **kwargs) -> None: - device_id = kwargs.get('device_id', 0) - flag_force_cpu = kwargs.get('flag_force_cpu', False) + self.crop_cfg: CropConfig = kwargs.get("crop_cfg", None) + device_id = kwargs.get("device_id", 0) + flag_force_cpu = kwargs.get("flag_force_cpu", False) if flag_force_cpu: - device = 'cpu' - face_analysis_wrapper_provicer = ['CPUExecutionProvider'] + device = "cpu" + face_analysis_wrapper_provicer = ["CPUExecutionProvider"] else: - device = 'cuda' + device = "cuda" face_analysis_wrapper_provicer = ["CUDAExecutionProvider"] self.landmark_runner = LandmarkRunner( - ckpt_path=make_abs_path('../../pretrained_weights/liveportrait/landmark.onnx'), + ckpt_path=make_abs_path(self.crop_cfg.landmark_ckpt_path), onnx_provider=device, - device_id=device_id + device_id=device_id, ) self.landmark_runner.warmup() - self.face_analysis_wrapper = FaceAnalysisDIY( - name='buffalo_l', - root=make_abs_path('../../pretrained_weights/insightface'), - providers=face_analysis_wrapper_provicer + name="buffalo_l", + root=make_abs_path(self.crop_cfg.insightface_root), + providers=face_analysis_wrapper_provicer, ) self.face_analysis_wrapper.prepare(ctx_id=device_id, det_size=(512, 512)) self.face_analysis_wrapper.warmup() - self.crop_cfg: CropConfig = kwargs.get('crop_cfg', None) - def update_config(self, user_args): for k, v in user_args.items(): if hasattr(self.crop_cfg, k): @@ -77,10 +90,12 @@ class Cropper(object): ) if len(src_face) == 0: - log('No face detected in the source image.') + log("No face detected in the source image.") return None elif len(src_face) > 1: - log(f'More than one face detected in the image, only pick one face by rule {crop_cfg.direction}.') + log( + f"More than one face detected in the image, only pick one face by rule {crop_cfg.direction}." + ) # NOTE: temporarily only pick the first face, to support multiple face in the future src_face = src_face[0] @@ -97,30 +112,34 @@ class Cropper(object): ) lmk = self.landmark_runner.run(img_rgb, lmk) - ret_dct['lmk_crop'] = lmk + ret_dct["lmk_crop"] = lmk # update a 256x256 version for network input - ret_dct['img_crop_256x256'] = cv2.resize(ret_dct['img_crop'], (256, 256), interpolation=cv2.INTER_AREA) - ret_dct['lmk_crop_256x256'] = ret_dct['lmk_crop'] * 256 / crop_cfg.dsize + ret_dct["img_crop_256x256"] = cv2.resize( + ret_dct["img_crop"], (256, 256), interpolation=cv2.INTER_AREA + ) + ret_dct["lmk_crop_256x256"] = ret_dct["lmk_crop"] * 256 / crop_cfg.dsize return ret_dct def crop_driving_video(self, driving_rgb_lst, **kwargs): """Tracking based landmarks/alignment and cropping""" trajectory = Trajectory() - direction = kwargs.get('direction', 'large-small') + direction = kwargs.get("direction", "large-small") for idx, frame_rgb in enumerate(driving_rgb_lst): if idx == 0 or trajectory.start == -1: src_face = self.face_analysis_wrapper.get( contiguous(frame_rgb[..., ::-1]), flag_do_landmark_2d_106=True, - direction=direction + direction=direction, ) if len(src_face) == 0: - log(f'No face detected in the frame #{idx}') + log(f"No face detected in the frame #{idx}") continue elif len(src_face) > 1: - log(f'More than one face detected in the driving frame_{idx}, only pick one face by rule {direction}.') + log( + f"More than one face detected in the driving frame_{idx}, only pick one face by rule {direction}." + ) src_face = src_face[0] lmk = src_face.landmark_2d_106 lmk = self.landmark_runner.run(frame_rgb, lmk) @@ -130,47 +149,61 @@ class Cropper(object): trajectory.end = idx trajectory.lmk_lst.append(lmk) - ret_bbox = parse_bbox_from_landmark(lmk, scale=self.crop_cfg.scale_crop_video, vx_ratio_crop_video=self.crop_cfg.vx_ratio_crop_video, vy_ratio=self.crop_cfg.vy_ratio_crop_video)['bbox'] - bbox = [ret_bbox[0, 0], ret_bbox[0, 1], ret_bbox[2, 0], ret_bbox[2, 1]] # 4, + ret_bbox = parse_bbox_from_landmark( + lmk, + scale=self.crop_cfg.scale_crop_video, + vx_ratio_crop_video=self.crop_cfg.vx_ratio_crop_video, + vy_ratio=self.crop_cfg.vy_ratio_crop_video, + )["bbox"] + bbox = [ + ret_bbox[0, 0], + ret_bbox[0, 1], + ret_bbox[2, 0], + ret_bbox[2, 1], + ] # 4, trajectory.bbox_lst.append(bbox) # bbox trajectory.frame_rgb_lst.append(frame_rgb) global_bbox = average_bbox_lst(trajectory.bbox_lst) - for idx, (frame_rgb, lmk) in enumerate(zip(trajectory.frame_rgb_lst, trajectory.lmk_lst)): + for idx, (frame_rgb, lmk) in enumerate( + zip(trajectory.frame_rgb_lst, trajectory.lmk_lst) + ): ret_dct = crop_image_by_bbox( frame_rgb, global_bbox, lmk=lmk, - dsize=kwargs.get('dsize', 512), + dsize=kwargs.get("dsize", 512), flag_rot=False, borderValue=(0, 0, 0), ) - trajectory.frame_rgb_crop_lst.append(ret_dct['img_crop']) - trajectory.lmk_crop_lst.append(ret_dct['lmk_crop']) + trajectory.frame_rgb_crop_lst.append(ret_dct["img_crop"]) + trajectory.lmk_crop_lst.append(ret_dct["lmk_crop"]) return { - 'frame_crop_lst': trajectory.frame_rgb_crop_lst, - 'lmk_crop_lst': trajectory.lmk_crop_lst, + "frame_crop_lst": trajectory.frame_rgb_crop_lst, + "lmk_crop_lst": trajectory.lmk_crop_lst, } def calc_lmks_from_cropped_video(self, driving_rgb_crop_lst, **kwargs): """Tracking based landmarks/alignment""" trajectory = Trajectory() - direction = kwargs.get('direction', 'large-small') + direction = kwargs.get("direction", "large-small") for idx, frame_rgb_crop in enumerate(driving_rgb_crop_lst): if idx == 0 or trajectory.start == -1: src_face = self.face_analysis_wrapper.get( contiguous(frame_rgb_crop[..., ::-1]), # convert to BGR flag_do_landmark_2d_106=True, - direction=direction + direction=direction, ) if len(src_face) == 0: - log(f'No face detected in the frame #{idx}') - raise Exception(f'No face detected in the frame #{idx}') + log(f"No face detected in the frame #{idx}") + raise Exception(f"No face detected in the frame #{idx}") elif len(src_face) > 1: - log(f'More than one face detected in the driving frame_{idx}, only pick one face by rule {direction}.') + log( + f"More than one face detected in the driving frame_{idx}, only pick one face by rule {direction}." + ) src_face = src_face[0] lmk = src_face.landmark_2d_106 lmk = self.landmark_runner.run(frame_rgb_crop, lmk)