[feature] optimize Browsertack by putting ALL samples on one big page and cut screenshot afterwards.

See #26 a previous suggestion where we'd request more browsertack renderings in parallel, to max out our browserstack quota.

This one is a suggestion to improve on an optimization that @m4rc1e already implemented. We already have 3 samples (I think) on one page per font. but we still do a lot of Browserstack API calls.

I wonder if we could create one very huge page of all samples that we need for one job and request that with just one Browserstack API call per browser. The resulting big page can be cut in code automatically and then used accordingly.

I made a "cutting" test tool to demonstrate:

Developer tools CSS modification on gfregressions, we will cut at the black rectangles:

Screenshot from 2020-08-26 02-26-11

The resulting screenshot, also taken from the developer tools, not via browserstack (I cut this smaller on the right side, because it was huge with my screen resolution.):

35 238 63 0_compare_1254ee0c-7ebb-4a12-b948-63b968c420eb

The three resulting cut images:

… cut …

35 238 63 0_compare_1254ee0c-7ebb-4a12-b948-63b968c420eb_0

… cut …

35 238 63 0_compare_1254ee0c-7ebb-4a12-b948-63b968c420eb_1

… cut …

35 238 63 0_compare_1254ee0c-7ebb-4a12-b948-63b968c420eb_2

This is how the test tool is invoked:

(venv) ./image cutting proposal> ./cut.py 35.238.63.0_compare_1254ee0c-7ebb-4a12-b948-63b968c420eb.png 
[debug] ('[find_cut] Starting from 221.',)
[debug] ('[find_cut] Found 240 continue at 259.',)
[debug] ('[find_cut] Starting from 4489.',)
[debug] ('[find_cut] Found 4508 continue at 4527.',)
[debug] ('[find_cut] Starting from 8757.',)
[debug] ('[find_cut] Found 8776 continue at 8795.',)
Got cuts: [240, 4508, 8776]
Saved 240 to 4508 as 35.238.63.0_compare_1254ee0c-7ebb-4a12-b948-63b968c420eb_0.png.
Saved 4508 to 8776 as 35.238.63.0_compare_1254ee0c-7ebb-4a12-b948-63b968c420eb_1.png.
Saved 8776 to 13119 as 35.238.63.0_compare_1254ee0c-7ebb-4a12-b948-63b968c420eb_2.png.

And here's the test tools code:

#!/usr/bin/env python

import os
from math import ceil
from PIL import Image

def debug(*args):
    print('[debug]', args)

def is_blackish(pixel):
    """Pixel is a tuple of pixel color values from 0 to 255 (r,g, b, a)
    Return true if r, g, b are below 15
    Alfa is ignored.

    This is not proper color handling but good enough for now. In fact,
    we could probably look at only pitch black (0, 0, 0, a) pixels, but
    this makes it a bit more flexible if colors are mangled by color
    management or compression.
    """
    return any(map(lambda p: p < 15, pixel[: 3]))

def analyze_triangle_cut(marked):
    """
    Find the peak of what is roughly a triangle.
     Not very precise but good enough for now, to not have false
    positives or false negatives.
    """
    pre_max = 0
    all_max = 0
    post_max = 0
    max_val = max(marked)

    for i in range(0, len(marked)):
        val = marked[i]
        if all_max == 0:  # not found_max, ascending
            if val == max_val:
                all_max += 1
            elif i > 0 and val < marked[i-1]:
                # i + 1 is a local max
                # Until a max was found each prior item must be lower than
                # the current.
                return (f'Descending at {i}, but we did not encounter '
                        f'max {max_val} yet.', -1)
            else:
                pre_max += 1
        elif post_max == 0:  # found_max, not descending
            # Max can be more than one line, i.e. it will be often two.
            if val == max_val:
                all_max += 1
            else:
                post_max += 1
        else:  # found_max, descending
            if val > marked[i-1]:
                # i - 1 is a local min
                return (f'Ascending at {i}, but we did encounter '
                        f'max {max_val} already.', -1)
            post_max += 1
    # Just a self sanity check, ensure the above code registered all lines.
    assert len(marked) == sum([pre_max, all_max, post_max])

    # Some more checks to make sure we got roughly what is expected.
    if pre_max < all_max:
        return(f'Less lines ascending ({pre_max}) '
               f'than at maximum {all_max}.', -1)
    if post_max < all_max:
        return(f'Less lines descending ({post_max}) '
               f'than at maximum {all_max}.', -1)

    # Let's use 1/6 as the relative max size difference between pre and
    # post, ideally it's 0.
    if 1/6 < 1 - min(pre_max, post_max)/max(pre_max, post_max):
        return(f'Lines ascending ({pre_max}) are not rougly the same amout'
               f'as lines descending {post_max}.', -1)

    # Cut in the middle of max or one row after, if there's a uneven number
    # of max rows.
    return None, ceil(all_max / 2) + pre_max

def find_cut(im, initial_y):
    debug(f'[find_cut] Starting from {initial_y}.')
    lines = []
    max_x = im.width - 1
    for y in range(initial_y, im.height):
        pixel = im.getpixel((0, y))
        if not is_blackish(pixel):
            break
        blackish_xs = 0
        for x in range(0, max_x):
            pixel = im.getpixel((x, y))
            if is_blackish(pixel):
                blackish_xs += 1
        lines.append(blackish_xs)

    continue_y = initial_y + len(lines)
    fail_message, found_y = analyze_triangle_cut(lines)

    if fail_message is not None:
        debug(f'[find_cut] Not a match between {initial_y} and {continue_y}: '
              f'{fail_message}')
        return -1, continue_y

    if found_y >= 0:
        cut_y = found_y + initial_y
    debug(f'[find_cut] Found {cut_y} continue at {continue_y}.')
    return cut_y, continue_y

def get_cuts(im):
    cuts = []
    continue_y = 0
    max_y = im.height - 1
    while True:
        for y in range(continue_y, im.height):
            pixel = im.getpixel((0, y))
            if not is_blackish(pixel):
                continue

            if is_blackish(pixel):
                cut_y, continue_y = find_cut(im, y)
                assert continue_y > y, 'Must advance.'
                if cut_y >= 0:
                    cuts.append(cut_y)
                break
        if y == max_y:
            return cuts

def main(image_path):
    im = Image.open(image_path)
    basepath, _ = os.path.splitext(image_path)
    cuts = get_cuts(im)
    print('Got cuts:', cuts)
    x_high = im.width - 1
    for i, y_low in enumerate(cuts):
        y_high = cuts[i + 1] if i + 1 < len(cuts) else im.height - 1
        im_crop = im.crop((0, y_low, x_high, y_high))
        # Always save as .png
        cropped_path = f'{basepath}_{i}.png'
        im_crop.save(cropped_path)
        print(f'Saved {y_low} to {y_high} as {cropped_path}.')

if __name__ == '__main__':
    import sys
    main(sys.argv[1])

googlefonts / diffbrowsers

[feature] optimize Browsertack by putting ALL samples on one big page and cut screenshot afterwards. #27