【AI 绘画】Q版人物定制生成

Arthur.AI 2024-09-12 13:01:07 阅读 64

AI 绘画-PulID手办定制

1. 效果展示

本次测试主要结果展示如下:

牛仔风

结果1

古风

在这里插入图片描述

2. 基本原理

PuLID是一种类似于 ip-adapter 的恢复面部特征的方法。它同时使用 insightface 嵌入和 CLIP 嵌入,类似于 ip-adapter faceid plus 模型所做的。但是,在将图像传递给 CLIP 之前,还需要使用 facexlib 从背景环境中屏蔽面部。PuLID 还使用 Eva CLIP 而不是普通 CLIP。在 attn 覆盖中,PuLID 还做了比 IPAdapter 更多的事情,因为它将张量零填充并添加到隐藏状态的正交中。如果您有兴趣,可以阅读他们的论文,了解如何处理这个问题。

在这里插入图片描述

3. 环境安装

环境安装及依赖

<code>pip install opencv-python transformers accelerate insightface

git clone https://github.com/ToTheBeginning/PuLID.git

cd PuLID

conda create --name pulid python=3.10

conda activate pulid

pip install -r requirements.txt

依赖模型下载

from huggingface_hub import hf_hub_download

hf_hub_download(repo_id="InstantX/PuLID", filename="ControlNetModel/config.json", local_dir="./checkpoints")code>

hf_hub_download(repo_id="InstantX/PuLID", filename="ControlNetModel/diffusion_pytorch_model.safetensors", local_dir="./checkpoints")code>

hf_hub_download(repo_id="InstantX/PuLID", filename="ip-adapter.bin", local_dir="./checkpoints")code>

4. 代码实现

主要测试代码:

核心部分需要调整的参数有:lora的权重、adapter的权重、prompt、negative_prompt以及输入图片的landmark信息

import sys

import cv2

import numpy as np

import torch

from pulid import attention_processor as attention

from pulid.pipeline import PuLIDPipeline

from pulid.utils import resize_numpy_image_long, seed_everything

torch.set_grad_enabled(False)

class PulidInference:

def __init__(self):

self.pipeline = PuLIDPipeline()

self.pipeline.pipe.load_lora_weights("../data/data282269/", weight_name="cute.safetensors", adapter_name="cute")code>

self.pipeline.pipe.set_adapters(["cute"], adapter_weights=[0.9])

# other params

self.DEFAULT_NEGATIVE_PROMPT = (

'flaws in the eyes, flaws in the face, flaws, lowres, non-HDRi, low quality, worst quality,'

'bad hands, bad fingers,artifacts noise, text, watermark, glitch, deformed, mutated, ugly, disfigured, '

'low resolution, partially rendered objects, deformed or partially rendered eyes, '

'deformed, deformed eyeballs, cross-eyed,blurry'

)

def run(self,*args):

id_image = args[0]

supp_images = args[1:4]

prompt, neg_prompt, scale, n_samples, seed, steps, H, W, id_scale, mode, id_mix = args[4:]

self.pipeline.debug_img_list = []

if mode == 'fidelity':

attention.NUM_ZERO = 8

attention.ORTHO = False

attention.ORTHO_v2 = True

elif mode == 'extremely style':

attention.NUM_ZERO = 16

attention.ORTHO = True

attention.ORTHO_v2 = False

else:

raise ValueError

if id_image is not None:

id_image = resize_numpy_image_long(id_image, 1024)

id_embeddings = self.pipeline.get_id_embedding(id_image)

for supp_id_image in supp_images:

if supp_id_image is not None:

supp_id_image = resize_numpy_image_long(supp_id_image, 1024)

supp_id_embeddings = self.pipeline.get_id_embedding(supp_id_image)

id_embeddings = torch.cat(

(id_embeddings, supp_id_embeddings if id_mix else supp_id_embeddings[:, :5]), dim=1

)

else:

id_embeddings = None

seed_everything(seed)

ims = []

for _ in range(n_samples):

img = self.pipeline.inference(prompt, (1, H, W), neg_prompt, id_embeddings, id_scale, scale, steps)[0]

ims.append(np.array(img))

return ims, self.pipeline.debug_img_list

def pulid_inference(self,face_image, prompt_in,neg_prompt_in,supp_images=None,seed = 55):

scale = 1.2

n_samples = 1

steps = 4

H, W = 1024, 768

id_scale = 0.99

mode = 'extremely style' #'fidelity', 'extremely style' (full body:1.2),slim,simple clothes, black dress,

id_mix = False #MG_ip,pixar,chibi,

prompt='MG_ip,pixar,chibi,portrait,cinematic, best quality,' + prompt_in #cinematic film, cinematic photo, realistic, portraitcode>

neg_prompt = self.DEFAULT_NEGATIVE_PROMPT + neg_prompt_in

# face_image = cv2.imread('../y.jpg')

# cv2.cvtColor(face_image, cv2.COLOR_BGR2RGB, face_image)

#supp_image2 = cv2.imread('../fei3.jpg')

inps = [

face_image,

None,

None,

None,

prompt,

neg_prompt,

scale,

n_samples,

seed,

steps,

H,

W,

id_scale,

mode,

id_mix,

]

ims, debug_img = self.run(*inps)

return ims

if __name__ == '__main__':

pulid = PulidInference()

face_image = cv2.imread('./input3.jpg')

ims = pulid.pulid_inference(face_image, 'cinematic film, 1 girl')

for i in range(len(ims)):

cv2.cvtColor(ims[i], cv2.COLOR_BGR2RGB, ims[i])

cv2.imwrite(f'plu-a-{ i}.jpg', ims[i])

实验结果:

在这里插入图片描述

5. 总结及扩展

添加不同的lora可以产生不同的效果,比如我们添加一个龙的背景lora,来生成不同风格的写真

<code>

import sys

import cv2

import numpy as np

import torch

from pulid import attention_processor as attention

from pulid.pipeline import PuLIDPipeline

from pulid.utils import resize_numpy_image_long, seed_everything

torch.set_grad_enabled(False)

class PulidInference:

def __init__(self):

self.pipeline = PuLIDPipeline()

self.pipeline.pipe.load_lora_weights("../data/data282269/", weight_name="cute.safetensors", adapter_name="cute")code>

self.pipeline.pipe.set_adapters(["cute"], adapter_weights=[0.9])

self.pipeline.pipe.load_lora_weights("../data/data283423/", weight_name="Sacred_beast_v2.3.safetensors", adapter_name="Sacred")code>

self.pipeline.pipe.set_adapters(["Sacred"], adapter_weights=[0.9])

# other params

self.DEFAULT_NEGATIVE_PROMPT = (

'flaws in the eyes, flaws in the face, flaws, lowres, non-HDRi, low quality, worst quality,'

'bad hands, bad fingers,artifacts noise, text, watermark, glitch, deformed, mutated, ugly, disfigured, '

'low resolution, partially rendered objects, deformed or partially rendered eyes, '

'deformed, deformed eyeballs, cross-eyed,blurry'

)

def run(self,*args):

id_image = args[0]

supp_images = args[1:4]

prompt, neg_prompt, scale, n_samples, seed, steps, H, W, id_scale, mode, id_mix = args[4:]

self.pipeline.debug_img_list = []

if mode == 'fidelity':

attention.NUM_ZERO = 8

attention.ORTHO = False

attention.ORTHO_v2 = True

elif mode == 'extremely style':

attention.NUM_ZERO = 16

attention.ORTHO = True

attention.ORTHO_v2 = False

else:

raise ValueError

if id_image is not None:

id_image = resize_numpy_image_long(id_image, 1024)

id_embeddings = self.pipeline.get_id_embedding(id_image)

for supp_id_image in supp_images:

if supp_id_image is not None:

supp_id_image = resize_numpy_image_long(supp_id_image, 1024)

supp_id_embeddings = self.pipeline.get_id_embedding(supp_id_image)

id_embeddings = torch.cat(

(id_embeddings, supp_id_embeddings if id_mix else supp_id_embeddings[:, :5]), dim=1

)

else:

id_embeddings = None

seed_everything(seed)

ims = []

for _ in range(n_samples):

img = self.pipeline.inference(prompt, (1, H, W), neg_prompt, id_embeddings, id_scale, scale, steps)[0]

ims.append(np.array(img))

return ims, self.pipeline.debug_img_list

def pulid_inference(self,face_image, prompt_in,neg_prompt_in,supp_images=None,seed = 55):

scale = 1.2

n_samples = 8

steps = 4

H, W = 1024, 768

id_scale = 0.99

mode = 'extremely style' #'fidelity', 'extremely style' (full body:1.2),slim,simple clothes, black dress,

id_mix = False #MG_ip,pixar,chibi,

prompt='MG_ip,pixar,chibi,portrait,cinematic, best quality,' + prompt_in #cinematic film, cinematic photo, realistic, portraitcode>

neg_prompt = self.DEFAULT_NEGATIVE_PROMPT + neg_prompt_in

# face_image = cv2.imread('../y.jpg')

# cv2.cvtColor(face_image, cv2.COLOR_BGR2RGB, face_image)

#supp_image2 = cv2.imread('../fei3.jpg')

inps = [

face_image,

None,

None,

None,

prompt,

neg_prompt,

scale,

n_samples,

seed,

steps,

H,

W,

id_scale,

mode,

id_mix,

]

ims, debug_img = self.run(*inps)

return ims

if __name__ == '__main__':

pulid = PulidInference()

face_image = cv2.imread('./input3.jpg')

ims = pulid.pulid_inference(face_image, 'BJ_Sacred_beast, 1 girl, white hanfu dress,')

for i in range(len(ims)):

cv2.cvtColor(ims[i], cv2.COLOR_BGR2RGB, ims[i])

cv2.imwrite(f'plu-b-{ i}.jpg', ims[i])

结果:

在这里插入图片描述

6. 资源链接

Online Demo

Colab: https://github.com/camenduru/PuLID-jupyter provided by camenduruReplicate: https://replicate.com/zsxkib/pulid provided by zsxkib

ComfyUI

https://github.com/cubiq/PuLID_ComfyUI provided by cubiq, native ComfyUI implementationhttps://github.com/ZHO-ZHO-ZHO/ComfyUI-PuLID-ZHO provided by ZHO, diffusers-based implementation

WebUI

Mikubill/sd-webui-controlnet#2838 provided by huchenlei



声明

本文内容仅代表作者观点,或转载于其他网站,本站不以此文作为商业用途
如有涉及侵权,请联系本站进行删除
转载本站原创文章,请注明来源及作者。