Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions projects/powerpaint/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,20 @@

This README provides a step-by-step guide to download the repository, set up the required virtual environment named "PowerPaint" using conda, and run PowerPaint with or without ControlNet.

## News

**December 18, 2023**

*Enhanced PowerPaint Model*

- We are delighted to announce the release of more stable model weights. These refined weights can now be accessed on [Hugging Face](https://huggingface.co/JunhaoZhuang/PowerPaint-v1/tree/main). The `gradio_PowerPaint.py` file and [Online Demo](https://openxlab.org.cn/apps/detail/rangoliu/PowerPaint) have also been updated as part of this release.


## Next

**Stronger Model Weights Coming Soon!**

________________
<img src='https://github.com/open-mmlab/mmagic/assets/12782558/acd01391-c73f-4997-aafd-0869aebcc915'/>

## Getting Started
Expand Down
150 changes: 77 additions & 73 deletions projects/powerpaint/gradio_PowerPaint.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
StableDiffusionInpaintPipeline as Pipeline
from pipeline.pipeline_PowerPaint_ControlNet import \
StableDiffusionControlNetInpaintPipeline as controlnetPipeline
from safetensors.torch import load_file
from transformers import DPTFeatureExtractor, DPTForDepthEstimation
from utils.utils import TokenizerWrapper, add_tokens

Expand All @@ -21,8 +20,7 @@
global pipe
pipe = Pipeline.from_pretrained(
'runwayml/stable-diffusion-inpainting',
torch_dtype=weight_dtype,
safety_checker=None)
torch_dtype=weight_dtype)
pipe.tokenizer = TokenizerWrapper(
from_pretrained='runwayml/stable-diffusion-v1-5',
subfolder='tokenizer',
Expand All @@ -34,14 +32,13 @@
placeholder_tokens=['P_ctxt', 'P_shape', 'P_obj'],
initialize_tokens=['a', 'a', 'a'],
num_vectors_per_token=10)
pipe.unet.load_state_dict(
load_file(
'./models/unet/diffusion_pytorch_model.safetensors', device='cuda'),
strict=False)
pipe.text_encoder.load_state_dict(
torch.load('./models/text_encoder/pytorch_model.bin'), strict=False)

from safetensors.torch import load_model
load_model(pipe.unet, "./models/unet/diffusion_pytorch_model.safetensors")
pipe.text_encoder.load_state_dict(torch.load("./models/text_encoder/pytorch_model.bin"), strict=False)
pipe = pipe.to('cuda')


depth_estimator = DPTForDepthEstimation.from_pretrained(
'Intel/dpt-hybrid-midas').to('cuda')
feature_extractor = DPTFeatureExtractor.from_pretrained(
Expand All @@ -51,7 +48,7 @@

global current_control
current_control = 'canny'
controlnet_conditioning_scale = 0.5
# controlnet_conditioning_scale = 0.8


def set_seed(seed):
Expand Down Expand Up @@ -94,8 +91,8 @@ def add_task(prompt, negative_prompt, control_type):
elif control_type == 'shape-guided':
promptA = prompt + ' P_shape'
promptB = prompt + ' P_ctxt'
negative_promptA = negative_prompt + ' P_shape'
negative_promptB = negative_prompt + ' P_ctxt'
negative_promptA = negative_prompt
negative_promptB = negative_prompt
elif control_type == 'image-outpainting':
promptA = prompt + ' P_ctxt'
promptB = prompt + ' P_ctxt'
Expand All @@ -104,18 +101,18 @@ def add_task(prompt, negative_prompt, control_type):
else:
promptA = prompt + ' P_obj'
promptB = prompt + ' P_obj'
negative_promptA = negative_prompt + ' P_obj'
negative_promptB = negative_prompt + ' P_obj'
negative_promptA = negative_prompt
negative_promptB = negative_prompt

return promptA, promptB, negative_promptA, negative_promptB



def predict(input_image, prompt, fitting_degree, ddim_steps, scale, seed,
negative_prompt, task, vertical_expansion_ratio,
horizontal_expansion_ratio):
negative_prompt, task,vertical_expansion_ratio,horizontal_expansion_ratio):
size1, size2 = input_image['image'].convert('RGB').size

if task != 'image-outpainting':
if task!='image-outpainting':
if size1 < size2:
input_image['image'] = input_image['image'].convert('RGB').resize(
(640, int(size2 / size1 * 640)))
Expand All @@ -130,44 +127,34 @@ def predict(input_image, prompt, fitting_degree, ddim_steps, scale, seed,
input_image['image'] = input_image['image'].convert('RGB').resize(
(int(size1 / size2 * 512), 512))

if (vertical_expansion_ratio is not None) and (horizontal_expansion_ratio
is not None): # noqa
o_W, o_H = input_image['image'].convert('RGB').size
c_W = int(horizontal_expansion_ratio * o_W)
c_H = int(vertical_expansion_ratio * o_H)
if vertical_expansion_ratio!=None and horizontal_expansion_ratio!=None:
o_W,o_H = input_image['image'].convert('RGB').size
c_W = int(horizontal_expansion_ratio*o_W)
c_H = int(vertical_expansion_ratio*o_H)

expand_img = np.ones((c_H, c_W, 3), dtype=np.uint8) * 127
expand_img = np.ones((c_H, c_W,3), dtype=np.uint8)*127
original_img = np.array(input_image['image'])
expand_img[int((c_H - o_H) / 2.0):int((c_H - o_H) / 2.0) + o_H,
int((c_W - o_W) / 2.0):int((c_W - o_W) / 2.0) +
o_W, :] = original_img
expand_img[int((c_H-o_H)/2.0):int((c_H-o_H)/2.0)+o_H,int((c_W-o_W)/2.0):int((c_W-o_W)/2.0)+o_W,:] = original_img

blurry_gap = 10

expand_mask = np.ones((c_H, c_W, 3), dtype=np.uint8) * 255
if vertical_expansion_ratio == 1 and horizontal_expansion_ratio != 1:
expand_mask[int((c_H - o_H) / 2.0):int((c_H - o_H) / 2.0) + o_H,
int((c_W - o_W) / 2.0) +
blurry_gap:int((c_W - o_W) / 2.0) + o_W -
blurry_gap, :] = 0
elif vertical_expansion_ratio != 1 and horizontal_expansion_ratio != 1:
expand_mask[int((c_H - o_H) / 2.0) +
blurry_gap:int((c_H - o_H) / 2.0) + o_H - blurry_gap,
int((c_W - o_W) / 2.0) +
blurry_gap:int((c_W - o_W) / 2.0) + o_W -
blurry_gap, :] = 0
elif vertical_expansion_ratio != 1 and horizontal_expansion_ratio == 1:
expand_mask[int((c_H - o_H) / 2.0) +
blurry_gap:int((c_H - o_H) / 2.0) + o_H - blurry_gap,
int((c_W - o_W) /
2.0):int((c_W - o_W) / 2.0) + o_W, :] = 0
expand_mask = np.ones((c_H, c_W,3), dtype=np.uint8)*255
if vertical_expansion_ratio == 1 and horizontal_expansion_ratio!=1:
expand_mask[int((c_H-o_H)/2.0):int((c_H-o_H)/2.0)+o_H,int((c_W-o_W)/2.0)+blurry_gap:int((c_W-o_W)/2.0)+o_W-blurry_gap,:] = 0 #noqa
elif vertical_expansion_ratio != 1 and horizontal_expansion_ratio!=1:
expand_mask[int((c_H-o_H)/2.0)+blurry_gap:int((c_H-o_H)/2.0)+o_H-blurry_gap,int((c_W-o_W)/2.0)+blurry_gap:int((c_W-o_W)/2.0)+o_W-blurry_gap,:] = 0 #noqa
elif vertical_expansion_ratio != 1 and horizontal_expansion_ratio==1:
expand_mask[int((c_H-o_H)/2.0)+blurry_gap:int((c_H-o_H)/2.0)+o_H-blurry_gap,int((c_W-o_W)/2.0):int((c_W-o_W)/2.0)+o_W,:] = 0 #noqa

input_image['image'] = Image.fromarray(expand_img)
input_image['mask'] = Image.fromarray(expand_mask)




promptA, promptB, negative_promptA, negative_promptB = add_task(
prompt, negative_prompt, task)
# print(promptA, promptB, negative_promptA, negative_promptB)
print(promptA, promptB, negative_promptA, negative_promptB)
img = np.array(input_image['image'].convert('RGB'))

W = int(np.shape(img)[0] - np.shape(img)[0] % 8)
Expand All @@ -191,8 +178,8 @@ def predict(input_image, prompt, fitting_degree, ddim_steps, scale, seed,
num_inference_steps=ddim_steps).images[0]
mask_np = np.array(input_image['mask'].convert('RGB'))
red = np.array(result).astype('float') * 1
red[:, :, 0] = 0
red[:, :, 2] = 180.0
red[:, :, 0] = 180.0
red[:, :, 2] = 0
red[:, :, 1] = 0
result_m = np.array(result)
result_m = Image.fromarray(
Expand All @@ -208,15 +195,17 @@ def predict(input_image, prompt, fitting_degree, ddim_steps, scale, seed,

dict_res = [input_image['mask'].convert('RGB'), result_m]

return result_paste, dict_res
dict_out = [input_image['image'].convert('RGB'), result_paste]

return dict_out, dict_res


def predict_controlnet(input_image, input_control_image, control_type, prompt,
ddim_steps, scale, seed, negative_prompt):
ddim_steps, scale, seed, negative_prompt,controlnet_conditioning_scale):
promptA = prompt + ' P_obj'
promptB = prompt + ' P_obj'
negative_promptA = negative_prompt + ' P_obj'
negative_promptB = negative_prompt + ' P_obj'
negative_promptA = negative_prompt
negative_promptB = negative_prompt
size1, size2 = input_image['image'].convert('RGB').size

if size1 < size2:
Expand Down Expand Up @@ -289,6 +278,7 @@ def predict_controlnet(input_image, input_control_image, control_type, prompt,
width=H,
height=W,
guidance_scale=scale,
controlnet_conditioning_scale = controlnet_conditioning_scale,
num_inference_steps=ddim_steps).images[0]
red = np.array(result).astype('float') * 1
red[:, :, 0] = 180.0
Expand All @@ -307,29 +297,29 @@ def predict_controlnet(input_image, input_control_image, control_type, prompt,
ours_np = np.asarray(result) / 255.0
ours_np = ours_np * m_img + (1 - m_img) * img_np
result_paste = Image.fromarray(np.uint8(ours_np * 255))
return result_paste, [controlnet_image, result_m]
return [input_image['image'].convert('RGB'), result_paste], [controlnet_image, result_m]


def infer(input_image, text_guided_prompt, text_guided_negative_prompt,
shape_guided_prompt, shape_guided_negative_prompt, fitting_degree,
ddim_steps, scale, seed, task, enable_control, input_control_image,
control_type, vertical_expansion_ratio, horizontal_expansion_ratio,
outpaint_prompt, outpaint_negative_prompt):
control_type,vertical_expansion_ratio,horizontal_expansion_ratio,outpaint_prompt,
outpaint_negative_prompt,controlnet_conditioning_scale,removal_prompt,
removal_negative_prompt):
if task == 'text-guided':
prompt = text_guided_prompt
negative_prompt = text_guided_negative_prompt
elif task == 'shape-guided':
prompt = shape_guided_prompt
negative_prompt = shape_guided_negative_prompt
elif task == 'object-removal':
prompt = ''
negative_prompt = ''
prompt = removal_prompt
negative_prompt = removal_negative_prompt
elif task == 'image-outpainting':
prompt = outpaint_prompt
negative_prompt = outpaint_negative_prompt
return predict(input_image, prompt, fitting_degree, ddim_steps, scale,
seed, negative_prompt, task, vertical_expansion_ratio,
horizontal_expansion_ratio)
seed, negative_prompt, task,vertical_expansion_ratio,horizontal_expansion_ratio)
else:
task = 'text-guided'
prompt = text_guided_prompt
Expand All @@ -338,10 +328,10 @@ def infer(input_image, text_guided_prompt, text_guided_negative_prompt,
if enable_control and task == 'text-guided':
return predict_controlnet(input_image, input_control_image,
control_type, prompt, ddim_steps, scale,
seed, negative_prompt)
seed, negative_prompt,controlnet_conditioning_scale)
else:
return predict(input_image, prompt, fitting_degree, ddim_steps, scale,
seed, negative_prompt, task, None, None)
seed, negative_prompt, task,None,None)


def select_tab_text_guided():
Expand All @@ -351,7 +341,6 @@ def select_tab_text_guided():
def select_tab_object_removal():
return 'object-removal'


def select_tab_image_outpainting():
return 'image-outpainting'

Expand All @@ -371,16 +360,16 @@ def select_tab_shape_guided():
"<a href='https://arxiv.org/abs/2312.03594/'>Paper</a> &ensp;"
"<a href='https://github.com/open-mmlab/mmagic/tree/main/projects/powerpaint'>Code</a> </font></div>" # noqa
)

with gr.Row():
gr.Markdown(
"**Note:** Due to network-related factors, the page may experience occasional bugs! If the inpainting results deviate significantly from expectations, consider toggling between task options to refresh the content." # noqa
)
with gr.Row():
with gr.Column():
gr.Markdown('### Input image and draw mask')
input_image = gr.Image(source='upload', tool='sketch', type='pil')

task = gr.Radio([
'text-guided', 'object-removal', 'shape-guided',
'image-outpainting'
],
task = gr.Radio(['text-guided', 'object-removal', 'shape-guided', 'image-outpainting'],
show_label=False,
visible=False)

Expand All @@ -397,6 +386,13 @@ def select_tab_shape_guided():
enable_control = gr.Checkbox(
label='Enable controlnet',
info='Enable this if you want to use controlnet')
controlnet_conditioning_scale = gr.Slider(
label='controlnet conditioning scale',
minimum=0,
maximum=1,
step=0.05,
value=0.5,
)
control_type = gr.Radio(['canny', 'pose', 'depth', 'hed'],
label='Control type')
input_control_image = gr.Image(source='upload', type='pil')
Expand All @@ -408,7 +404,13 @@ def select_tab_shape_guided():
enable_object_removal = gr.Checkbox(
label='Enable object removal inpainting',
value=True,
info='The recommended configuration for the Guidance Scale is 10 or higher. \
If undesired objects appear in the masked area, \
you can address this by specifically increasing the Guidance Scale.',
interactive=False)
removal_prompt = gr.Textbox(label='Prompt')
removal_negative_prompt = gr.Textbox(
label='negative_prompt')
tab_object_removal.select(
fn=select_tab_object_removal, inputs=None, outputs=task)

Expand All @@ -417,6 +419,9 @@ def select_tab_shape_guided():
enable_object_removal = gr.Checkbox(
label='Enable image outpainting',
value=True,
info='The recommended configuration for the Guidance Scale is 10 or higher. \
If unwanted random objects appear in the extended image region, \
you can enhance the cleanliness of the extension area by increasing the Guidance Scale.',
interactive=False)
outpaint_prompt = gr.Textbox(label='Outpainting_prompt')
outpaint_negative_prompt = gr.Textbox(
Expand Down Expand Up @@ -463,10 +468,7 @@ def select_tab_shape_guided():
label='Steps', minimum=1, maximum=50, value=45, step=1)
scale = gr.Slider(
label='Guidance Scale',
info='For object removal, \
it is recommended to set the value at 10 or above, \
while for image outpainting, \
it is advisable to set it at 18 or above.',
info='For object removal and image outpainting, it is recommended to set the value at 10 or above.', #noqa
minimum=0.1,
maximum=30.0,
value=7.5,
Expand All @@ -480,19 +482,21 @@ def select_tab_shape_guided():
)
with gr.Column():
gr.Markdown('### Inpainting result')
inpaint_result = gr.Image()
inpaint_result = gr.Gallery(
label='Generated images', show_label=False, columns=2)
gr.Markdown('### Mask')
gallery = gr.Gallery(
label='Generated images', show_label=False, columns=2)
label='Generated masks', show_label=False, columns=2)

run_button.click(
fn=infer,
inputs=[
input_image, text_guided_prompt, text_guided_negative_prompt,
shape_guided_prompt, shape_guided_negative_prompt, fitting_degree,
ddim_steps, scale, seed, task, enable_control, input_control_image,
control_type, vertical_expansion_ratio, horizontal_expansion_ratio,
outpaint_prompt, outpaint_negative_prompt
control_type,vertical_expansion_ratio,horizontal_expansion_ratio,
outpaint_prompt,outpaint_negative_prompt,controlnet_conditioning_scale,
removal_prompt,removal_negative_prompt
],
outputs=[inpaint_result, gallery])

Expand Down