diff --git a/projects/powerpaint/README.md b/projects/powerpaint/README.md index 26cca91a0..4f74ba1e1 100644 --- a/projects/powerpaint/README.md +++ b/projects/powerpaint/README.md @@ -4,8 +4,20 @@ This README provides a step-by-step guide to download the repository, set up the required virtual environment named "PowerPaint" using conda, and run PowerPaint with or without ControlNet. +## News + +**December 18, 2023** + +*Enhanced PowerPaint Model* + +- We are delighted to announce the release of more stable model weights. These refined weights can now be accessed on [Hugging Face](https://huggingface.co/JunhaoZhuang/PowerPaint-v1/tree/main). The `gradio_PowerPaint.py` file and [Online Demo](https://openxlab.org.cn/apps/detail/rangoliu/PowerPaint) have also been updated as part of this release. + + +## Next + **Stronger Model Weights Coming Soon!** +________________ ## Getting Started diff --git a/projects/powerpaint/gradio_PowerPaint.py b/projects/powerpaint/gradio_PowerPaint.py index 11981560c..0a9c3c74c 100644 --- a/projects/powerpaint/gradio_PowerPaint.py +++ b/projects/powerpaint/gradio_PowerPaint.py @@ -11,7 +11,6 @@ StableDiffusionInpaintPipeline as Pipeline from pipeline.pipeline_PowerPaint_ControlNet import \ StableDiffusionControlNetInpaintPipeline as controlnetPipeline -from safetensors.torch import load_file from transformers import DPTFeatureExtractor, DPTForDepthEstimation from utils.utils import TokenizerWrapper, add_tokens @@ -21,8 +20,7 @@ global pipe pipe = Pipeline.from_pretrained( 'runwayml/stable-diffusion-inpainting', - torch_dtype=weight_dtype, - safety_checker=None) + torch_dtype=weight_dtype) pipe.tokenizer = TokenizerWrapper( from_pretrained='runwayml/stable-diffusion-v1-5', subfolder='tokenizer', @@ -34,14 +32,13 @@ placeholder_tokens=['P_ctxt', 'P_shape', 'P_obj'], initialize_tokens=['a', 'a', 'a'], num_vectors_per_token=10) -pipe.unet.load_state_dict( - load_file( - './models/unet/diffusion_pytorch_model.safetensors', device='cuda'), - strict=False) -pipe.text_encoder.load_state_dict( - torch.load('./models/text_encoder/pytorch_model.bin'), strict=False) + +from safetensors.torch import load_model +load_model(pipe.unet, "./models/unet/diffusion_pytorch_model.safetensors") +pipe.text_encoder.load_state_dict(torch.load("./models/text_encoder/pytorch_model.bin"), strict=False) pipe = pipe.to('cuda') + depth_estimator = DPTForDepthEstimation.from_pretrained( 'Intel/dpt-hybrid-midas').to('cuda') feature_extractor = DPTFeatureExtractor.from_pretrained( @@ -51,7 +48,7 @@ global current_control current_control = 'canny' -controlnet_conditioning_scale = 0.5 +# controlnet_conditioning_scale = 0.8 def set_seed(seed): @@ -94,8 +91,8 @@ def add_task(prompt, negative_prompt, control_type): elif control_type == 'shape-guided': promptA = prompt + ' P_shape' promptB = prompt + ' P_ctxt' - negative_promptA = negative_prompt + ' P_shape' - negative_promptB = negative_prompt + ' P_ctxt' + negative_promptA = negative_prompt + negative_promptB = negative_prompt elif control_type == 'image-outpainting': promptA = prompt + ' P_ctxt' promptB = prompt + ' P_ctxt' @@ -104,18 +101,18 @@ def add_task(prompt, negative_prompt, control_type): else: promptA = prompt + ' P_obj' promptB = prompt + ' P_obj' - negative_promptA = negative_prompt + ' P_obj' - negative_promptB = negative_prompt + ' P_obj' + negative_promptA = negative_prompt + negative_promptB = negative_prompt return promptA, promptB, negative_promptA, negative_promptB + def predict(input_image, prompt, fitting_degree, ddim_steps, scale, seed, - negative_prompt, task, vertical_expansion_ratio, - horizontal_expansion_ratio): + negative_prompt, task,vertical_expansion_ratio,horizontal_expansion_ratio): size1, size2 = input_image['image'].convert('RGB').size - if task != 'image-outpainting': + if task!='image-outpainting': if size1 < size2: input_image['image'] = input_image['image'].convert('RGB').resize( (640, int(size2 / size1 * 640))) @@ -130,44 +127,34 @@ def predict(input_image, prompt, fitting_degree, ddim_steps, scale, seed, input_image['image'] = input_image['image'].convert('RGB').resize( (int(size1 / size2 * 512), 512)) - if (vertical_expansion_ratio is not None) and (horizontal_expansion_ratio - is not None): # noqa - o_W, o_H = input_image['image'].convert('RGB').size - c_W = int(horizontal_expansion_ratio * o_W) - c_H = int(vertical_expansion_ratio * o_H) + if vertical_expansion_ratio!=None and horizontal_expansion_ratio!=None: + o_W,o_H = input_image['image'].convert('RGB').size + c_W = int(horizontal_expansion_ratio*o_W) + c_H = int(vertical_expansion_ratio*o_H) - expand_img = np.ones((c_H, c_W, 3), dtype=np.uint8) * 127 + expand_img = np.ones((c_H, c_W,3), dtype=np.uint8)*127 original_img = np.array(input_image['image']) - expand_img[int((c_H - o_H) / 2.0):int((c_H - o_H) / 2.0) + o_H, - int((c_W - o_W) / 2.0):int((c_W - o_W) / 2.0) + - o_W, :] = original_img + expand_img[int((c_H-o_H)/2.0):int((c_H-o_H)/2.0)+o_H,int((c_W-o_W)/2.0):int((c_W-o_W)/2.0)+o_W,:] = original_img blurry_gap = 10 - expand_mask = np.ones((c_H, c_W, 3), dtype=np.uint8) * 255 - if vertical_expansion_ratio == 1 and horizontal_expansion_ratio != 1: - expand_mask[int((c_H - o_H) / 2.0):int((c_H - o_H) / 2.0) + o_H, - int((c_W - o_W) / 2.0) + - blurry_gap:int((c_W - o_W) / 2.0) + o_W - - blurry_gap, :] = 0 - elif vertical_expansion_ratio != 1 and horizontal_expansion_ratio != 1: - expand_mask[int((c_H - o_H) / 2.0) + - blurry_gap:int((c_H - o_H) / 2.0) + o_H - blurry_gap, - int((c_W - o_W) / 2.0) + - blurry_gap:int((c_W - o_W) / 2.0) + o_W - - blurry_gap, :] = 0 - elif vertical_expansion_ratio != 1 and horizontal_expansion_ratio == 1: - expand_mask[int((c_H - o_H) / 2.0) + - blurry_gap:int((c_H - o_H) / 2.0) + o_H - blurry_gap, - int((c_W - o_W) / - 2.0):int((c_W - o_W) / 2.0) + o_W, :] = 0 + expand_mask = np.ones((c_H, c_W,3), dtype=np.uint8)*255 + if vertical_expansion_ratio == 1 and horizontal_expansion_ratio!=1: + expand_mask[int((c_H-o_H)/2.0):int((c_H-o_H)/2.0)+o_H,int((c_W-o_W)/2.0)+blurry_gap:int((c_W-o_W)/2.0)+o_W-blurry_gap,:] = 0 #noqa + elif vertical_expansion_ratio != 1 and horizontal_expansion_ratio!=1: + expand_mask[int((c_H-o_H)/2.0)+blurry_gap:int((c_H-o_H)/2.0)+o_H-blurry_gap,int((c_W-o_W)/2.0)+blurry_gap:int((c_W-o_W)/2.0)+o_W-blurry_gap,:] = 0 #noqa + elif vertical_expansion_ratio != 1 and horizontal_expansion_ratio==1: + expand_mask[int((c_H-o_H)/2.0)+blurry_gap:int((c_H-o_H)/2.0)+o_H-blurry_gap,int((c_W-o_W)/2.0):int((c_W-o_W)/2.0)+o_W,:] = 0 #noqa input_image['image'] = Image.fromarray(expand_img) input_image['mask'] = Image.fromarray(expand_mask) + + + promptA, promptB, negative_promptA, negative_promptB = add_task( prompt, negative_prompt, task) - # print(promptA, promptB, negative_promptA, negative_promptB) + print(promptA, promptB, negative_promptA, negative_promptB) img = np.array(input_image['image'].convert('RGB')) W = int(np.shape(img)[0] - np.shape(img)[0] % 8) @@ -191,8 +178,8 @@ def predict(input_image, prompt, fitting_degree, ddim_steps, scale, seed, num_inference_steps=ddim_steps).images[0] mask_np = np.array(input_image['mask'].convert('RGB')) red = np.array(result).astype('float') * 1 - red[:, :, 0] = 0 - red[:, :, 2] = 180.0 + red[:, :, 0] = 180.0 + red[:, :, 2] = 0 red[:, :, 1] = 0 result_m = np.array(result) result_m = Image.fromarray( @@ -208,15 +195,17 @@ def predict(input_image, prompt, fitting_degree, ddim_steps, scale, seed, dict_res = [input_image['mask'].convert('RGB'), result_m] - return result_paste, dict_res + dict_out = [input_image['image'].convert('RGB'), result_paste] + + return dict_out, dict_res def predict_controlnet(input_image, input_control_image, control_type, prompt, - ddim_steps, scale, seed, negative_prompt): + ddim_steps, scale, seed, negative_prompt,controlnet_conditioning_scale): promptA = prompt + ' P_obj' promptB = prompt + ' P_obj' - negative_promptA = negative_prompt + ' P_obj' - negative_promptB = negative_prompt + ' P_obj' + negative_promptA = negative_prompt + negative_promptB = negative_prompt size1, size2 = input_image['image'].convert('RGB').size if size1 < size2: @@ -289,6 +278,7 @@ def predict_controlnet(input_image, input_control_image, control_type, prompt, width=H, height=W, guidance_scale=scale, + controlnet_conditioning_scale = controlnet_conditioning_scale, num_inference_steps=ddim_steps).images[0] red = np.array(result).astype('float') * 1 red[:, :, 0] = 180.0 @@ -307,14 +297,15 @@ def predict_controlnet(input_image, input_control_image, control_type, prompt, ours_np = np.asarray(result) / 255.0 ours_np = ours_np * m_img + (1 - m_img) * img_np result_paste = Image.fromarray(np.uint8(ours_np * 255)) - return result_paste, [controlnet_image, result_m] + return [input_image['image'].convert('RGB'), result_paste], [controlnet_image, result_m] def infer(input_image, text_guided_prompt, text_guided_negative_prompt, shape_guided_prompt, shape_guided_negative_prompt, fitting_degree, ddim_steps, scale, seed, task, enable_control, input_control_image, - control_type, vertical_expansion_ratio, horizontal_expansion_ratio, - outpaint_prompt, outpaint_negative_prompt): + control_type,vertical_expansion_ratio,horizontal_expansion_ratio,outpaint_prompt, + outpaint_negative_prompt,controlnet_conditioning_scale,removal_prompt, + removal_negative_prompt): if task == 'text-guided': prompt = text_guided_prompt negative_prompt = text_guided_negative_prompt @@ -322,14 +313,13 @@ def infer(input_image, text_guided_prompt, text_guided_negative_prompt, prompt = shape_guided_prompt negative_prompt = shape_guided_negative_prompt elif task == 'object-removal': - prompt = '' - negative_prompt = '' + prompt = removal_prompt + negative_prompt = removal_negative_prompt elif task == 'image-outpainting': prompt = outpaint_prompt negative_prompt = outpaint_negative_prompt return predict(input_image, prompt, fitting_degree, ddim_steps, scale, - seed, negative_prompt, task, vertical_expansion_ratio, - horizontal_expansion_ratio) + seed, negative_prompt, task,vertical_expansion_ratio,horizontal_expansion_ratio) else: task = 'text-guided' prompt = text_guided_prompt @@ -338,10 +328,10 @@ def infer(input_image, text_guided_prompt, text_guided_negative_prompt, if enable_control and task == 'text-guided': return predict_controlnet(input_image, input_control_image, control_type, prompt, ddim_steps, scale, - seed, negative_prompt) + seed, negative_prompt,controlnet_conditioning_scale) else: return predict(input_image, prompt, fitting_degree, ddim_steps, scale, - seed, negative_prompt, task, None, None) + seed, negative_prompt, task,None,None) def select_tab_text_guided(): @@ -351,7 +341,6 @@ def select_tab_text_guided(): def select_tab_object_removal(): return 'object-removal' - def select_tab_image_outpainting(): return 'image-outpainting' @@ -371,16 +360,16 @@ def select_tab_shape_guided(): "Paper  " "Code " # noqa ) - + with gr.Row(): + gr.Markdown( + "**Note:** Due to network-related factors, the page may experience occasional bugs! If the inpainting results deviate significantly from expectations, consider toggling between task options to refresh the content." # noqa + ) with gr.Row(): with gr.Column(): gr.Markdown('### Input image and draw mask') input_image = gr.Image(source='upload', tool='sketch', type='pil') - task = gr.Radio([ - 'text-guided', 'object-removal', 'shape-guided', - 'image-outpainting' - ], + task = gr.Radio(['text-guided', 'object-removal', 'shape-guided', 'image-outpainting'], show_label=False, visible=False) @@ -397,6 +386,13 @@ def select_tab_shape_guided(): enable_control = gr.Checkbox( label='Enable controlnet', info='Enable this if you want to use controlnet') + controlnet_conditioning_scale = gr.Slider( + label='controlnet conditioning scale', + minimum=0, + maximum=1, + step=0.05, + value=0.5, + ) control_type = gr.Radio(['canny', 'pose', 'depth', 'hed'], label='Control type') input_control_image = gr.Image(source='upload', type='pil') @@ -408,7 +404,13 @@ def select_tab_shape_guided(): enable_object_removal = gr.Checkbox( label='Enable object removal inpainting', value=True, + info='The recommended configuration for the Guidance Scale is 10 or higher. \ + If undesired objects appear in the masked area, \ + you can address this by specifically increasing the Guidance Scale.', interactive=False) + removal_prompt = gr.Textbox(label='Prompt') + removal_negative_prompt = gr.Textbox( + label='negative_prompt') tab_object_removal.select( fn=select_tab_object_removal, inputs=None, outputs=task) @@ -417,6 +419,9 @@ def select_tab_shape_guided(): enable_object_removal = gr.Checkbox( label='Enable image outpainting', value=True, + info='The recommended configuration for the Guidance Scale is 10 or higher. \ + If unwanted random objects appear in the extended image region, \ + you can enhance the cleanliness of the extension area by increasing the Guidance Scale.', interactive=False) outpaint_prompt = gr.Textbox(label='Outpainting_prompt') outpaint_negative_prompt = gr.Textbox( @@ -463,10 +468,7 @@ def select_tab_shape_guided(): label='Steps', minimum=1, maximum=50, value=45, step=1) scale = gr.Slider( label='Guidance Scale', - info='For object removal, \ - it is recommended to set the value at 10 or above, \ - while for image outpainting, \ - it is advisable to set it at 18 or above.', + info='For object removal and image outpainting, it is recommended to set the value at 10 or above.', #noqa minimum=0.1, maximum=30.0, value=7.5, @@ -480,10 +482,11 @@ def select_tab_shape_guided(): ) with gr.Column(): gr.Markdown('### Inpainting result') - inpaint_result = gr.Image() + inpaint_result = gr.Gallery( + label='Generated images', show_label=False, columns=2) gr.Markdown('### Mask') gallery = gr.Gallery( - label='Generated images', show_label=False, columns=2) + label='Generated masks', show_label=False, columns=2) run_button.click( fn=infer, @@ -491,8 +494,9 @@ def select_tab_shape_guided(): input_image, text_guided_prompt, text_guided_negative_prompt, shape_guided_prompt, shape_guided_negative_prompt, fitting_degree, ddim_steps, scale, seed, task, enable_control, input_control_image, - control_type, vertical_expansion_ratio, horizontal_expansion_ratio, - outpaint_prompt, outpaint_negative_prompt + control_type,vertical_expansion_ratio,horizontal_expansion_ratio, + outpaint_prompt,outpaint_negative_prompt,controlnet_conditioning_scale, + removal_prompt,removal_negative_prompt ], outputs=[inpaint_result, gallery])