Run experiments to show differences between penalizers

jara001 commented 2 years ago

In 1.5.0 penalizers were added. However we do not know about their performance. It would be nice to run some long experiments and compare them (using #25).

jara001 commented 2 years ago

(This is also partially covered in #32.)

I have run experiments on both Torino and Ruudskogen maps. I have used 100 loops and 100 budget. Success is marked when a valid solution is found within the given budget.

Penalizer	Method	Ruudskogen (25 seg)	Torino (8 seg)	Torino (9 seg)
Count	-	29%	0%	100%
Centerl.	max	18%	0%	97%
Centerl.	sum	17%	0%	100%
Segment	max	21%	0%	99%
Segment	sum	26%	0%	100%

jara001 commented 2 years ago

Since the success rate on Torino 8 is 0%, I assume that it really depends on the number of workers.

These experiments were performed on ritchie (with 32 workers), whereas experiments in #32 were done locally with 4 workers.

jara001 commented 2 years ago

Edit: Torino 8 was done on optim with 26 workers, not ritchie.

jara001 commented 2 years ago

In addition, penalty was set to 300 for centerline and count(?), whereas it was 600 for segment penalizer.

jara001 commented 2 years ago

For showing the results I have used following script. Called python3 cumhist.py NAME_OF_FIGURE LOG_FILE [LOG_FILES].

def quantiles(data, n = 10, method = "inclusive"):
    """Ported from Python 3.10.

    https://github.com/python/cpython/blob/3.10/Lib/statistics.py

    Note: We are using 'inclusive' as default (similar to numpy),
    as it is what we want.

    Inclusive observes the data as min-max, exclusive expects that
    the extrema might be outside of the given data.
    """
    if n < 1:
        raise ValueError("n must be at least 1")
    data = sorted(data)
    ld = len(data)
    if ld < 2:
        raise ValueError("must have at least two data points")
    if method == "inclusive":
        m = ld - 1
        result = []
        for i in range(1, n):
            j, delta = divmod(i * m, n)
            interpolated = (data[j] * (n - delta) + data[j + 1] * delta) / n
            result.append(interpolated)
        return result
    if method == "exclusive":
        m = ld + 1
        result = []
        for i in range(1, n):
            j = i * m // n                               # rescale i to m/n
            j = 1 if j < 1 else ld-1 if j > ld-1 else j  # clamp to 1 .. ld-1
            delta = i*m - j*n                            # exact integer math
            interpolated = (data[j - 1] * (n - delta) + data[j] * delta) / n
            result.append(interpolated)
        return result
    raise ValueError("Unknown method: %s" % str(method))

import numpy

def compute_quantiles(data, n = 10):
    """Wrapper around numpy's quantile.

    See: https://numpy.org/doc/stable/reference/generated/numpy.quantile.html
    """
    result = []

    for i in range(1, n):
        result.append(
            numpy.quantile(
                data,
                i / 100.0
            )
        )

    return result

import sys
import os
import matplotlib.pyplot
import tqdm

data = []
num_files = 0
data_len = 0

for arg in tqdm.tqdm(sys.argv[2:], desc = "Loading"):
    if os.path.exists(arg):
        num_files += 1
        data.append(
            numpy.cumsum(
                numpy.asarray(
                    [
                        int(_d) for _d in os.popen(
                            "grep -e '^correct' -e '^penalty' %s | head -n -1 | cut -d: -f1 | sed 's/penalty/0/g' | sed 's/correct/1/g'" % arg
                        ).read().rstrip().split("\n")
                    ]
                )
            )
        )
        data_len = max(data_len, len(data[-1]))

f = matplotlib.pyplot.figure()
a = f.add_subplot(111)
a.set_title(sys.argv[1])
a.set_xlabel("Generation index [-]")
#per = lambda x: x; a.set_ylabel("Cumulated valid solutions [-]")
per = lambda x: (x / data_len) * 100.0; a.set_ylabel("Cumulated valid solutions [%]")
a.grid(color = "lightgray", dashes = (5, 5))

x = range(len(data[0]))

full_data = numpy.asarray(data)

#print(numpy.asarray(data))
#print(numpy.mean(data, axis=0))

# Mins / max
mins = []
maxs = []

# Quantiles
Q = []

if num_files > 1:
    for col in tqdm.tqdm(full_data.T, desc = "Processing"):
        Q.append(quantiles(col, n = 100))
        mins.append(min(col))
        maxs.append(max(col))

    Q = numpy.asarray(Q).T

    mins = numpy.asarray(mins).T
    maxs = numpy.asarray(maxs).T

    # Fill between (min/max background)
    matplotlib.pyplot.fill_between(x, per(mins), per(maxs), figure = f, color=(0.39, 0.78, 0.98), label = "Min/Max")

    # Fill between (Q background)
    matplotlib.pyplot.fill_between(x, per(Q[0, :]), per(Q[-1, :]), figure = f, color="lightskyblue", label = "P1/P99") # Q1, Q99
    matplotlib.pyplot.fill_between(x, per(Q[5-1, :]), per(Q[95-1, :]), figure = f, color="lightblue", label = "P5/P95") # Q5, Q95
    matplotlib.pyplot.fill_between(x, per(Q[10-1, :]), per(Q[90-1, :]), figure = f, color="paleturquoise", label = "P10/P90") # Q10, Q90
    matplotlib.pyplot.fill_between(x, per(Q[25-1, :]), per(Q[75-1, :]), figure = f, color="lightcyan", label = "P25/P75") # Q25, Q75

else:
    Q.append(quantiles(data[0], n = 100))

matplotlib.pyplot.plot(x, per(numpy.mean(data, axis=0)), figure = f, label = "Mean")

a.legend(loc = "upper left")
#a.legend(["Minmax", "1", "5", "10", "25", "mean"])

matplotlib.pyplot.savefig(sys.argv[1] + ".pdf", format = "pdf", dpi = 300)
matplotlib.pyplot.savefig(sys.argv[1] + ".png", format = "png", dpi = 300)

#matplotlib.pyplot.show()

jara001 commented 2 years ago

Torino 9 results are displayed like this:

Cumulative histogram shows, how many valid solutions were found.

centerline_max centerline_sum count segment_max segment_sum

jara001 commented 2 years ago

However, since it was not easy to compare these graphs, Tonda suggested different visualization "how long it took to find at least one solution".

Following script was used: python3 cumhist2.py NAME_OF_FIGURE LOG_FILE [LOG_FILES]

MAX_DATA = 200

def quantiles(data, n = 10, method = "inclusive"):
    """Ported from Python 3.10.

    https://github.com/python/cpython/blob/3.10/Lib/statistics.py

    Note: We are using 'inclusive' as default (similar to numpy),
    as it is what we want.

    Inclusive observes the data as min-max, exclusive expects that
    the extrema might be outside of the given data.
    """
    if n < 1:
        raise ValueError("n must be at least 1")
    data = sorted(data)
    ld = len(data)
    if ld < 2:
        raise ValueError("must have at least two data points")
    if method == "inclusive":
        m = ld - 1
        result = []
        for i in range(1, n):
            j, delta = divmod(i * m, n)
            interpolated = (data[j] * (n - delta) + data[j + 1] * delta) / n
            result.append(interpolated)
        return result
    if method == "exclusive":
        m = ld + 1
        result = []
        for i in range(1, n):
            j = i * m // n                               # rescale i to m/n
            j = 1 if j < 1 else ld-1 if j > ld-1 else j  # clamp to 1 .. ld-1
            delta = i*m - j*n                            # exact integer math
            interpolated = (data[j - 1] * (n - delta) + data[j] * delta) / n
            result.append(interpolated)
        return result
    raise ValueError("Unknown method: %s" % str(method))

import numpy

def compute_quantiles(data, n = 10):
    """Wrapper around numpy's quantile.

    See: https://numpy.org/doc/stable/reference/generated/numpy.quantile.html
    """
    result = []

    for i in range(1, n):
        result.append(
            numpy.quantile(
                data,
                i / 100.0
            )
        )

    return result

import sys
import os
import matplotlib.pyplot
import tqdm

data = []
num_files = 0
data_len = 0

for arg in tqdm.tqdm(sys.argv[2:], desc = "Loading"):
    if os.path.exists(arg):
        num_files += 1
        data.append(
            numpy.cumsum(
                numpy.asarray(
                    [
                        int(_d) for _d in os.popen(
                            "grep -e '^correct' -e '^penalty' %s | head -n -1 | cut -d: -f1 | sed 's/penalty/0/g' | sed 's/correct/1/g' | head -n %d" % (arg, MAX_DATA)
                        ).read().rstrip().split("\n")
                    ]
                )
            )
        )
        data[-1][data[-1] > 1] = 1
        data_len = max(data_len, len(data[-1]))

f = matplotlib.pyplot.figure()
a = f.add_subplot(111)
a.set_title(sys.argv[1])
a.set_xlabel("Budget spent [-]")
#per = lambda x: x; a.set_ylabel("Cumulated valid solutions [-]")
per = lambda x: x * 100.0; a.set_ylabel("Found solutions [%]")
a.grid(color = "lightgray", dashes = (5, 5))
a.set_ylim((-1, 101))

x = range(len(data[0]))

full_data = numpy.asarray(data)

#print(numpy.asarray(data))
#print(numpy.mean(data, axis=0))

# Mins / max
mins = []
maxs = []

# Quantiles
Q = []

if num_files > 1:
    for col in tqdm.tqdm(full_data.T, desc = "Processing"):
        Q.append(quantiles(col, n = 100))
        mins.append(min(col))
        maxs.append(max(col))

    Q = numpy.asarray(Q).T

    mins = numpy.asarray(mins).T
    maxs = numpy.asarray(maxs).T

    # Fill between (min/max background)
    #matplotlib.pyplot.fill_between(x, per(mins), per(maxs), figure = f, color=(0.39, 0.78, 0.98), label = "Min/Max")

    # Fill between (Q background)
    #matplotlib.pyplot.fill_between(x, per(Q[0, :]), per(Q[-1, :]), figure = f, color="lightskyblue", label = "P1/P99") # Q1, Q99
    #matplotlib.pyplot.fill_between(x, per(Q[5-1, :]), per(Q[95-1, :]), figure = f, color="lightblue", label = "P5/P95") # Q5, Q95
    #matplotlib.pyplot.fill_between(x, per(Q[10-1, :]), per(Q[90-1, :]), figure = f, color="paleturquoise", label = "P10/P90") # Q10, Q90
    #matplotlib.pyplot.fill_between(x, per(Q[25-1, :]), per(Q[75-1, :]), figure = f, color="lightcyan", label = "P25/P75") # Q25, Q75

else:
    Q.append(quantiles(data[0], n = 100))

matplotlib.pyplot.plot(x, per(numpy.mean(data, axis=0)), figure = f, label = "Mean")
matplotlib.pyplot.fill_between(x, [0 for _ in range(len(x))], per(numpy.mean(data, axis=0)), figure = f, color=(0.87, 1.0, 1.0, 0.77))

a.set_title("%s, AUC: %.3f" % (sys.argv[1], sum(numpy.mean(data, axis = 0)) / len(x)))

a.legend(loc = "upper left")
#a.legend(["Minmax", "1", "5", "10", "25", "mean"])

matplotlib.pyplot.savefig(sys.argv[1] + ".pdf", format = "pdf", dpi = 300)
matplotlib.pyplot.savefig(sys.argv[1] + ".png", format = "png", dpi = 300)

#matplotlib.pyplot.show()

jara001 commented 2 years ago

The results are then shown as a single (mean) line with a simple metric AUC (area under curve). Higher number = better. centerline_max2 centerline_sum2 count2 segment_max2 segment_sum2

Penalizer	Method	Torino (9 seg)
Count	-	0.726
Centerl.	max	0.737
Centerl.	sum	0.739
Segment	max	0.789
Segment	sum	0.832

jara001 commented 2 years ago

A small comparison of both tables.

100x100 runs

Successfully found solutions:

Penalizer	Method	Ruudskogen (25 seg)	Torino (8 seg)	Torino (9 seg)
Count	-	29%	0%	100%
Centerl.	max	18%	0%	97%
Centerl.	sum	17%	0%	100%
Segment	max	21%	0%	99%
Segment	sum	26%	0%	100%

AUC:

Penalizer	Method	Ruudskogen (25 seg)	Torino (9 seg)
Count	-	0.079	0.726
Centerl.	max	0.046	0.737
Centerl.	sum	0.045	0.739
Segment	max	0.063	0.789
Segment	sum	0.095	0.832

jara001 / ng_trajectory

Run experiments to show differences between penalizers #26