38c38 < width, _ = draw.textsize(test_line, font=font)

    width, _ = draw.textlength(test_line, font=font)
113c113,115 < pad_image = pad_image.resize(images[0].size, Image.ANTIALIAS)
pad_image = pad_image.resize(images[0].size, Image.Resampling.LANCZOS)

lvalics commented 2 months ago

Replacing with Image.Resampling.LANCZOS should fix it. But I am failing too on width, _ = draw.textsize(test_line, font=font)

lvalics commented 2 months ago

Possible fix, but not yet tested carefully

In def process_mulline_text

width = draw.textlength(test_line, font=font)

no more , _

`def add_caption(image, text, position = "bottom-mid", font = None, text_color= 'black', bg_color = (255, 255, 255) , bg_opacity = 200): if text == "": return image image = image.convert("RGBA") draw = ImageDraw.Draw(image) width, height = image.size lines = process_mulline_text(draw,text,font,width) text_positions = [] maxwidth = 0 overall_text_height = 0

Calculate positions for each line of text

for line in lines:
    left, top, right, bottom = draw.textbbox((0, 0), line, font=font)
    text_width = right - left
    text_height = bottom - top
    overall_text_height += text_height + 5  # Add a little extra spacing between lines

    if position == 'bottom-right':
        text_position = (width - text_width - 10, height - overall_text_height)
    elif position == 'bottom-left':
        text_position = (10, height - overall_text_height)
    elif position == 'bottom-mid':
        text_position = ((width - text_width) // 2, height - overall_text_height)

    text_positions.append(text_position)
    maxwidth = max(maxwidth, text_width)

# Draw a rectangle behind the text for better visibility
rectpos = (width - maxwidth) // 2
rectangle_position = [rectpos - 5, height - overall_text_height - 5, rectpos + maxwidth + 5, height - 10]

# Create a transparent layer to draw the rectangle
image_with_transparency = Image.new('RGBA', image.size)
draw_with_transparency = ImageDraw.Draw(image_with_transparency)
draw_with_transparency.rectangle(rectangle_position, fill=bg_color + (bg_opacity,))

image.paste(Image.alpha_composite(image.convert('RGBA'), image_with_transparency))
# print(ind,text_position)
draw = ImageDraw.Draw(image)
for ind, line in enumerate(lines[::-1]):
    text_position = text_positions[ind]
    draw.text(text_position, line, fill=text_color, font=font)

return image.convert('RGB')`

Speedway1 commented 2 months ago

Possible fix, but not yet tested carefully

In def process_mulline_text

width = draw.textlength(test_line, font=font)

no more , _

`def add_caption(image, text, position = "bottom-mid", font = None, text_color= 'black', bg_color = (255, 255, 255) , bg_opacity = 200): if text == "": return image image = image.convert("RGBA") draw = ImageDraw.Draw(image) width, height = image.size lines = process_mulline_text(draw,text,font,width) text_positions = [] maxwidth = 0 overall_text_height = 0 # Calculate positions for each line of text for line in lines: left, top, right, bottom = draw.textbbox((0, 0), line, font=font) text_width = right - left text_height = bottom - top overall_text_height += text_height + 5 # Add a little extra spacing between lines
    if position == 'bottom-right':
        text_position = (width - text_width - 10, height - overall_text_height)
    elif position == 'bottom-left':
        text_position = (10, height - overall_text_height)
    elif position == 'bottom-mid':
        text_position = ((width - text_width) // 2, height - overall_text_height)

    text_positions.append(text_position)
    maxwidth = max(maxwidth, text_width)

# Draw a rectangle behind the text for better visibility
rectpos = (width - maxwidth) // 2
rectangle_position = [rectpos - 5, height - overall_text_height - 5, rectpos + maxwidth + 5, height - 10]

# Create a transparent layer to draw the rectangle
image_with_transparency = Image.new('RGBA', image.size)
draw_with_transparency = ImageDraw.Draw(image_with_transparency)
draw_with_transparency.rectangle(rectangle_position, fill=bg_color + (bg_opacity,))

image.paste(Image.alpha_composite(image.convert('RGBA'), image_with_transparency))
# print(ind,text_position)
draw = ImageDraw.Draw(image)
for ind, line in enumerate(lines[::-1]):
    text_position = text_positions[ind]
    draw.text(text_position, line, fill=text_color, font=font)

return image.convert('RGB')`

There were many issues in the file, especially with PIL being incompatible with the latest version and also textsize issues, should be textbox for multiple lines in the caption.

The attached version of the file works well on the latest library and also with multiple text lines and multiple captions. We've run quite a lot of tests and all working well.

Because of all the work done on the file, diff shows up lots of lines, so attaching the file here. utils.zip

HVision-NKU / StoryDiffusion

Bug fixes in utils/utils.py #28

38c38 < width, _ = draw.textsize(test_line, font=font)

113c113,115 < pad_image = pad_image.resize(images[0].size, Image.ANTIALIAS)

Calculate positions for each line of text