Tidy up typefaces - Githubissues

bbkr commented 1 year ago

There is huge mess in character ranges:

MATHEMATICAL SCRIPT SMALL A 𝒶 MATHEMATICAL SCRIPT SMALL B 𝒷 MATHEMATICAL SCRIPT SMALL C 𝒸 MATHEMATICAL SCRIPT SMALL D 𝒹 SCRIPT SMALL E ℯ MATHEMATICAL SCRIPT SMALL F 𝒻 ...

All typefaces have to be regenerated and merged into consistent blocks.

bbkr commented 1 year ago

Oh yes, mathematical and letterlike spaces have extremely rare overlaps:

SCRIPT CAPITAL P MATHEMATICAL SCRIPT CAPITAL P

bbkr commented 1 year ago

Phew... quick-and-dirty helper script:

my %typefaces;

my %ascii = (
    'A' => 'A',
    'B' => 'B',
    'C' => 'C',
    'D' => 'D',
    'E' => 'E',
    'F' => 'F',
    'G' => 'G',
    'H' => 'H',
    'I' => 'I',
    'J' => 'J',
    'K' => 'K',
    'L' => 'L',
    'M' => 'M',
    'N' => 'N',
    'O' => 'O',
    'P' => 'P',
    'Q' => 'Q',
    'R' => 'R',
    'S' => 'S',
    'T' => 'T',
    'U' => 'U',
    'V' => 'V',
    'W' => 'W',
    'X' => 'X',
    'Y' => 'Y',
    'Z' => 'Z',
    'ZERO' => 0,
    'ONE' => 1,
    'TWO' => 2,
    'THREE' => 3,
    'FOUR' => 4,
    'FIVE' => 5,
    'SIX' => 6,
    'SEVEN' => 7,
    'EIGHT' => 8,
    'NINE' => 9
);

for (1..0x10FFFF).map( *.uniname ) {

    my @props = .split: ' ';

    # letter or digit name must be last
    next unless %ascii{ @props.tail }:exists;

    # filter out typeface modifiers like MODIFIER LETTER SMALL SCRIPT G
    next if @props.grep: 'MODIFIER';

    # filter out latin versions like LATIN SMALL LETTER SCRIPT G
    next if @props.grep: 'LATIN';

    # filter out shapes like BALLOT BOX WITH BOLD SCRIPT X
    next if @props.grep: 'BOX' | 'DINGBAT' | 'CIRCLED' | 'ARROW';

    # filter out old versions like OLD ITALIC NUMERAL ONE
    next if @props.grep: 'OLD';

    # filter out dotless forms like MATHEMATICAL ITALIC SMALL DOTLESS I
    next if @props.grep: 'DOTLESS';

    # filter out turned forms like TURNED SANS-SERIF Q
    next if @props.grep: 'TURNED' | 'REVERSED';

    # all typefaces
    next unless @props.grep: 'SCRIPT' | 'FULLWIDTH' | 'MONOSPACE' | 'BOLD' | 'ITALIC' | 'SANS-SERIF' | 'FRAKTUR' | 'DOUBLE-STRUCK' | 'BALLOT' | 'OBLIQUE' | 'HEAVY';

    my @path = @props;

    # always looked up character
    my $char = %ascii{ @path.pop };

    my $type = @path.pop if @path.tail eq 'CAPITAL' | 'UPPER'| 'LOWER' | 'SMALL' | 'DIGIT';
    $char .= lc if defined $type and $type eq 'LOWER' | 'SMALL';

    # merge mathematical into consistent blocks
    @path.shift if @path.head eq 'MATHEMATICAL';

    # remove hyphen from property names because it will be used to join properties
    my $path = @path.map( { .subst( '-' ) } ).join( '-' ).lc;

    ( %typefaces{ $path }{ $char } //= [] ).push: .uniparse;

}

for %typefaces.keys.sort -> $path {

    my %values = %typefaces{ $path };

    say 'our %' ~ $path ~ ' = (';

    for (0..9, 'a'..'z', 'A'..'Z').flat {

        next unless %values{ $_ }:exists;
        my @values = %values{ $_ };

        print '    \'', $_, '\' => [ ', @values.map( { '\'' ~ $_ ~ '\'' } ).join( ', ') ,' ],', "\n";
    }

    say ');';
}

say 'our @typeface = (';
for %typefaces.keys.sort -> $path {
    say '%' ~ $path ~ ',';
}
say ');';

bbkr / HomoGlypher

Tidy up typefaces #8