Pomax / lib-font

This library adds a new Font() object to the JavaScript toolbox, similar to new Image() for images
MIT License
728 stars 72 forks source link

Lookuptype 6 tests #105

Open RoelN opened 3 years ago

RoelN commented 3 years ago

I'm testing as follows:

import { Font } from "./Font.js";

const font = new Font("boink");
font.src = `./fonts/Recursive_VF_1.064.ttf`;
// font.src = `./fonts/MehrNastaliqWeb-Regular.ttf`;

font.onload = (evt) => {
  let font = evt.detail.font;

  const { cmap, name, GSUB } = font.opentype.tables;

  let scripts = GSUB.getSupportedScripts();

  scripts.forEach((script) => {
    let langsys = GSUB.getSupportedLangSys(script);

    langsys.forEach((lang) => {
      let langSysTable = GSUB.getLangSysTable(script, lang);
      let features = GSUB.getFeatures(langSysTable);
      let featureCount = features.length;

      features.forEach((feature) => {
        const lookupIDs = feature.lookupListIndices;

        lookupIDs.forEach((id) => {
          const lookup = GSUB.getLookup(id);

          // Only dump lookup type 6 for DFLT/dflt
          if (lookup.lookupType === 6 && lang === "dflt") {
            const subtable = lookup.getSubTable(0);
            const coverage = subtable.getCoverageTable();

            console.log(coverage);
            // console.log(subtable);
          }
        });
      });
    });
  });
};

For Recusive, this outputs stuff like:

CoverageTable { coverageFormat: 768 }
CoverageTable { coverageFormat: 768 }
CoverageTable { coverageFormat: 768 }
CoverageTable { coverageFormat: 768 }
CoverageTable { coverageFormat: 45062 }
CoverageTable { coverageFormat: 3 }
CoverageTable { coverageFormat: 3 }
CoverageTable { coverageFormat: 3 }
CoverageTable { coverageFormat: 3 }

Which seems wrong.

If I understand https://github.com/Pomax/Font.js/blob/master/src/opentype/tables/advanced/shared/subtables/gsub.js#L301 correctly there's only getters for substFormat 1 and 2, while most fonts I test with have a substFormat of 3.

Dumping the subtable appears to provide valid data, if these offsets are supposed to jump around this much (from 1 to 1410, to 1, to 20, etc.)

LookupType6 {
  substFormat: 3,
  coverageOffset: 1,
  backtrackGlyphCount: 164,
  backtrackCoverageOffsets: [
       1, 1410,   1,   20,    1,    0,   52,   2,   3, 530,  532,   0,
     536,  542,   3, 1257, 1257,   10,    3,   1, 122,   1, 1368,   1,
      20,    1,   0,   53,    2,    2,  533, 535,   0, 553,  558,   3,
       1,    0,   1,    8,    1, 1324,  743,   1,   0,   1,    8,   2,
      64,   11, 536,  530,  531,  532,  537, 538, 539, 540,  541, 542,
    1257,    1,   0,    1,    8,    2,   28,  11, 543, 544,  545, 546,
     547,  548, 549,  550,  551,  552, 1258,   2,   2, 520,  529,   0,
    1255, 1255,  10,    1,    0,    1,    8,   1,   6, 735,    1,   1,
     521,    1,   0,    1,
    ... 64 more items
  ],
  inputGlyphCount: 810,
  inputCoverageOffsets: [
    811, 812,  813,  814,  815,    2,   11,  560,  560,    0, 563, 564,
      1, 566,  571,    3,  585,  588,    9,  598,  598,   13, 600, 602,
     14, 606,  606,   17,  612,  613,   18,  617,  629,   20, 631, 635,
     33, 694,  698,   38,    4,    0,    1,    8,    1, 4056,   1,   8,
      4,  10,   18,   26,   32,  734,    3,  300,  946,  735,   3, 300,
    964, 732,    2,  946,  733,    2,  964,    1,    0,    1,   8,   1,
      6, 929,    2,    1,  240,  264,    0,    1,    0,    1,   8,   2,
     22,   8, 1195, 1196, 1197, 1198, 1199, 1200, 1201, 1195,   2,   2,
    301, 307,    0, 1062,
    ... 710 more items
  ],
  lookaheadGlyphCount: 12,
  lookaheadCoverageOffsets: [
      30, 48,    3,    1,
    4152,  3, 4152, 4152,
    4152,  0,    0,    3
  ],
  substitutionCount: 0,
  substLookupRecords: []
}

Getting backtrackCoverage, inputCoverage and lookaheadCoverage with a list of glyphs sounds like what I could use for Wakamai Fondue. Is there a way to get to that?

Pomax commented 3 years ago

substFormat 3 doesn't have additional function, it already parses everything in the constructor, as per https://github.com/Pomax/Font.js/blob/master/src/opentype/tables/advanced/shared/subtables/gsub.js#L281-L297, you can directly consult lookup.substLookupRecords[...]

However, there is no coverage format 3. Only versions 1 and 2 exist, so that looks like a genuine bug.

RoelN commented 3 years ago

Ah, I was thinking this referred to the Chained Sequence Context Format

Pomax commented 3 years ago

Time to figure out why the CoverageTable format is so very wrong.

Step one: does getSubTable yield the correct data structure?

Pomax commented 3 years ago

Updating your code to include the lookup number and subtable format:

        ...
        lookupIDs.forEach((id) => {
          const lookup = GSUB.getLookup(id);

          // Only dump lookup type 6 for DFLT/dflt
          if (lookup.lookupType === 6 && lang === "dflt") {
            console.log(`--- lookup type 6 in dflt, lookup ${id}`);

            for (let i = 0; i < lookup.subTableCount; i++) {
              const subtable = lookup.getSubTable(i);
              console.log(`  - subtable ${subtable.substFormat}`)

              const coverage = subtable.getCoverageTable();
              console.log(`   `, coverage);
            }
          }
        });
      });
    });
  });
};
--- lookup type 6 in dflt, lookup 1
  - subtable 3
    CoverageTable { coverageFormat: 45062 }
  - subtable 3
    CoverageTable { coverageFormat: 3 }
  - subtable 3
    CoverageTable { coverageFormat: 1644 }
  ...

so let's look at whether or not https://github.com/Pomax/lib-font/blob/master/src/opentype/tables/advanced/shared/subtables/gsub.js#L281-L299 is doing the right thing:

    if (this.substFormat === 3) {
      this.backtrackGlyphCount = p.uint16;
      this.backtrackCoverageOffsets = [
        ...new Array(this.backtrackGlyphCount),
      ].map((_) => p.Offset16);
      this.inputGlyphCount = p.uint16;
      this.inputCoverageOffsets = [...new Array(this.inputGlyphCount)].map(
        (_) => p.Offset16
      );
      this.lookaheadGlyphCount = p.uint16;
      this.lookaheadCoverageOffsets = [
        ...new Array(this.lookaheadGlyphCount),
      ].map((_) => p.Offset16);
      this.substitutionCount = p.uint16;
      this.substLookupRecords = [...new Array(this.substitutionCount)].map(
        (_) => new SubstLookupRecord(p)
      );
    }

This should match https://docs.microsoft.com/en-us/typography/opentype/spec/chapter2#chseqctxt3:

Type Name Description
uint16 format Format identifier: format = 3
uint16 backtrackGlyphCount Number of glyphs in the backtrack sequence
Offset16 backtrackCoverageOffsets[backtrackGlyphCount] Array of offsets to coverage tables for the backtrack sequence
uint16 inputGlyphCount Number of glyphs in the input sequence
Offset16 inputCoverageOffsets[inputGlyphCount] Array of offsets to coverage tables for the input sequence
uint16 lookaheadGlyphCount Number of glyphs in the lookahead sequence
Offset16 lookaheadCoverageOffsets[lookaheadGlyphCount] Array of offsets to coverage tables for the lookahead sequence
uint16 seqLookupCount Number of SequenceLookupRecords
SequenceLookupRecord seqLookupRecords[seqLookupCount] Array of SequenceLookupRecords

The format field comes from the LookupType superclass:

class LookupType extends ParsedData {
  constructor(p) {
    super(p);
    this.substFormat = p.uint16;
    this.coverageOffset = p.Offset16;
  }
  getCoverageTable() {
    let p = this.parser;
    p.currentPosition = this.start + this.coverageOffset;
    return new CoverageTable(p);
  }
}

But we actually see this reading too much data. For substformat 3, the near universal coverageOffset field is not used!

LookupType 5, subtable format 3 is the only other data structure that does this, and has code in place to correct for reading ahead by two bytes, but LookupType 6 subtable format 3 does not: let's fix that.

Adding the same correction:

      p.currentPosition -= 2;
      delete this.coverageOffset;

now yields:

--- lookup type 6 in dflt, lookup 1
  - subtable 3
    CoverageTable { coverageFormat: 1, glyphCount: 1, glyphArray: [ 1 ] }
  - subtable 3
    CoverageTable { coverageFormat: 1, glyphCount: 1, glyphArray: [ 1 ] }
  - subtable 3
    CoverageTable { coverageFormat: 1, glyphCount: 1, glyphArray: [ 1 ] }
  - subtable 3
    CoverageTable { coverageFormat: 1, glyphCount: 1, glyphArray: [ 1 ] }
  ...

So that looks pretty good @RoelN

RoelN commented 3 years ago

This seems to report coverageFormat: 1, glyphCount: 1, glyphArray: [ 1 ] for every font I throw at it. That's doesn't seem right?

Pomax commented 3 years ago

Further updating: the 6.3 code was not complete, so I've updated that in https://github.com/Pomax/lib-font/commit/2ae18d9b0fb12e1d77da073bf0d535db5a364e4b and I've updated my test run code to:

import { Font } from "./lib-font.js";

const font = new Font("boink");
font.src = `./fonts/Recursive_VF_1.064.ttf`;
// font.src = `./fonts/MehrNastaliqWeb-Regular.ttf`;

font.onload = (evt) => {
  let font = evt.detail.font;

  const { GSUB } = font.opentype.tables;

  let scripts = GSUB.getSupportedScripts();

  scripts.forEach((script) => {
    let langsys = GSUB.getSupportedLangSys(script);

    langsys.forEach((lang) => {
      let langSysTable = GSUB.getLangSysTable(script, lang);
      let features = GSUB.getFeatures(langSysTable);

      features.forEach((feature) => {
        const lookupIDs = feature.lookupListIndices;

        lookupIDs.forEach((id) => {
          const lookup = GSUB.getLookup(id);

          // Only dump lookup type 6 for DFLT/dflt
          if (lookup.lookupType === 6 && lang === "dflt") {
            for(let i=0; i<lookup.subTableCount; i++) {
              let subtable = lookup.getSubTable(i);

              console.log(`=====================================================`);
              console.log(`lookup type 6 in dflt, lookup ${id}, subtable ${i}`);
              console.log(`=====================================================`);

              if (subtable.backtrackGlyphCount > 0)
              subtable.backtrackCoverageOffsets.forEach((offset, id) => {
                let coverage = subtable.getCoverageFromOffset(offset);
                console.log(`backtrack coverage ${id+1}:`, coverage);
              });

              if (subtable.lookaheadGlyphCount > 0)
              subtable.lookaheadCoverageOffsets.forEach((offset, id) => {
                let coverage = subtable.getCoverageFromOffset(offset);
                console.log(`lookahead coverage ${id+1}:`, coverage);
              });

              subtable.seqLookupRecords.forEach(slRecord => {
                console.log(`sequence lookup record:`, slRecord);
              });
            }
          }

        });
      });
    });
  });
};

Which yields an insane amount of data, but of the form:

=====================================================
lookup type 6 in dflt, lookup 1, subtable 0
=====================================================
backtrack coverage 1: CoverageTable {
  coverageFormat: 2,
  rangeCount: 2,
  rangeRecords: [
    CoverageRangeRecord {
      startGlyphID: 520,
      endGlyphID: 529,
      startCoverageIndex: 0
    },
    CoverageRangeRecord {
      startGlyphID: 1255,
      endGlyphID: 1255,
      startCoverageIndex: 10
    }
  ]
}
backtrack coverage 2: CoverageTable {
  coverageFormat: 2,
  rangeCount: 2,
  rangeRecords: [
    CoverageRangeRecord {
      startGlyphID: 520,
      endGlyphID: 529,
      startCoverageIndex: 0
    },
    CoverageRangeRecord {
      startGlyphID: 1255,
      endGlyphID: 1255,
      startCoverageIndex: 10
    }
  ]
}
backtrack coverage 3: CoverageTable {
  coverageFormat: 2,
  rangeCount: 2,
  rangeRecords: [
    CoverageRangeRecord {
      startGlyphID: 520,
      endGlyphID: 529,
      startCoverageIndex: 0
    },
    CoverageRangeRecord {
      startGlyphID: 1255,
      endGlyphID: 1255,
      startCoverageIndex: 10
    }
  ]
}
backtrack coverage 4: CoverageTable {
  coverageFormat: 2,
  rangeCount: 2,
  rangeRecords: [
    CoverageRangeRecord {
      startGlyphID: 520,
      endGlyphID: 529,
      startCoverageIndex: 0
    },
    CoverageRangeRecord {
      startGlyphID: 1255,
      endGlyphID: 1255,
      startCoverageIndex: 10
    }
  ]
}
backtrack coverage 5: CoverageTable {
  coverageFormat: 2,
  rangeCount: 2,
  rangeRecords: [
    CoverageRangeRecord {
      startGlyphID: 520,
      endGlyphID: 529,
      startCoverageIndex: 0
    },
    CoverageRangeRecord {
      startGlyphID: 1255,
      endGlyphID: 1255,
      startCoverageIndex: 10
    }
  ]
}
backtrack coverage 6: CoverageTable {
  coverageFormat: 2,
  rangeCount: 2,
  rangeRecords: [
    CoverageRangeRecord {
      startGlyphID: 520,
      endGlyphID: 529,
      startCoverageIndex: 0
    },
    CoverageRangeRecord {
      startGlyphID: 1255,
      endGlyphID: 1255,
      startCoverageIndex: 10
    }
  ]
}
backtrack coverage 7: CoverageTable {
  coverageFormat: 2,
  rangeCount: 2,
  rangeRecords: [
    CoverageRangeRecord {
      startGlyphID: 520,
      endGlyphID: 529,
      startCoverageIndex: 0
    },
    CoverageRangeRecord {
      startGlyphID: 1255,
      endGlyphID: 1255,
      startCoverageIndex: 10
    }
  ]
}
backtrack coverage 8: CoverageTable {
  coverageFormat: 2,
  rangeCount: 2,
  rangeRecords: [
    CoverageRangeRecord {
      startGlyphID: 520,
      endGlyphID: 529,
      startCoverageIndex: 0
    },
    CoverageRangeRecord {
      startGlyphID: 1255,
      endGlyphID: 1255,
      startCoverageIndex: 10
    }
  ]
}
backtrack coverage 9: CoverageTable {
  coverageFormat: 2,
  rangeCount: 2,
  rangeRecords: [
    CoverageRangeRecord {
      startGlyphID: 520,
      endGlyphID: 529,
      startCoverageIndex: 0
    },
    CoverageRangeRecord {
      startGlyphID: 1255,
      endGlyphID: 1255,
      startCoverageIndex: 10
    }
  ]
}
backtrack coverage 10: CoverageTable {
  coverageFormat: 2,
  rangeCount: 2,
  rangeRecords: [
    CoverageRangeRecord {
      startGlyphID: 520,
      endGlyphID: 529,
      startCoverageIndex: 0
    },
    CoverageRangeRecord {
      startGlyphID: 1255,
      endGlyphID: 1255,
      startCoverageIndex: 10
    }
  ]
}
backtrack coverage 11: CoverageTable { coverageFormat: 1, glyphCount: 1, glyphArray: [ 601 ] }
sequence lookup record: SequenceLookupRecord { sequenceIndex: 3, lookupListIndex: 0 }
=====================================================
lookup type 6 in dflt, lookup 1, subtable 1
=====================================================
lookahead coverage 1: CoverageTable {
  coverageFormat: 2,
  rangeCount: 2,
  rangeRecords: [
    CoverageRangeRecord {
      startGlyphID: 520,
      endGlyphID: 529,
      startCoverageIndex: 0
    },
    CoverageRangeRecord {
      startGlyphID: 1255,
      endGlyphID: 1255,
      startCoverageIndex: 10
    }
  ]
}
lookahead coverage 2: CoverageTable {
  coverageFormat: 2,
  rangeCount: 2,
  rangeRecords: [
    CoverageRangeRecord {
      startGlyphID: 520,
      endGlyphID: 529,
      startCoverageIndex: 0
    },
    CoverageRangeRecord {
      startGlyphID: 1255,
      endGlyphID: 1255,
      startCoverageIndex: 10
    }
  ]
}
lookahead coverage 3: CoverageTable {
  coverageFormat: 2,
  rangeCount: 2,
  rangeRecords: [
    CoverageRangeRecord {
      startGlyphID: 520,
      endGlyphID: 529,
      startCoverageIndex: 0
    },
    CoverageRangeRecord {
      startGlyphID: 1255,
      endGlyphID: 1255,
      startCoverageIndex: 10
    }
  ]
}
lookahead coverage 4: CoverageTable {
  coverageFormat: 2,
  rangeCount: 2,
  rangeRecords: [
    CoverageRangeRecord {
      startGlyphID: 520,
      endGlyphID: 529,
      startCoverageIndex: 0
    },
    CoverageRangeRecord {
      startGlyphID: 1255,
      endGlyphID: 1255,
      startCoverageIndex: 10
    }
  ]
}
lookahead coverage 5: CoverageTable {
  coverageFormat: 2,
  rangeCount: 2,
  rangeRecords: [
    CoverageRangeRecord {
      startGlyphID: 520,
      endGlyphID: 529,
      startCoverageIndex: 0
    },
    CoverageRangeRecord {
      startGlyphID: 1255,
      endGlyphID: 1255,
      startCoverageIndex: 10
    }
  ]
}
lookahead coverage 6: CoverageTable {
  coverageFormat: 2,
  rangeCount: 2,
  rangeRecords: [
    CoverageRangeRecord {
      startGlyphID: 520,
      endGlyphID: 529,
      startCoverageIndex: 0
    },
    CoverageRangeRecord {
      startGlyphID: 1255,
      endGlyphID: 1255,
      startCoverageIndex: 10
    }
  ]
}
lookahead coverage 7: CoverageTable {
  coverageFormat: 2,
  rangeCount: 2,
  rangeRecords: [
    CoverageRangeRecord {
      startGlyphID: 520,
      endGlyphID: 529,
      startCoverageIndex: 0
    },
    CoverageRangeRecord {
      startGlyphID: 1255,
      endGlyphID: 1255,
      startCoverageIndex: 10
    }
  ]
}
lookahead coverage 8: CoverageTable {
  coverageFormat: 2,
  rangeCount: 2,
  rangeRecords: [
    CoverageRangeRecord {
      startGlyphID: 520,
      endGlyphID: 529,
      startCoverageIndex: 0
    },
    CoverageRangeRecord {
      startGlyphID: 1255,
      endGlyphID: 1255,
      startCoverageIndex: 10
    }
  ]
}
lookahead coverage 9: CoverageTable {
  coverageFormat: 2,
  rangeCount: 2,
  rangeRecords: [
    CoverageRangeRecord {
      startGlyphID: 520,
      endGlyphID: 529,
      startCoverageIndex: 0
    },
    CoverageRangeRecord {
      startGlyphID: 1255,
      endGlyphID: 1255,
      startCoverageIndex: 10
    }
  ]
}
lookahead coverage 10: CoverageTable {
  coverageFormat: 2,
  rangeCount: 2,
  rangeRecords: [
    CoverageRangeRecord {
      startGlyphID: 520,
      endGlyphID: 529,
      startCoverageIndex: 0
    },
    CoverageRangeRecord {
      startGlyphID: 1255,
      endGlyphID: 1255,
      startCoverageIndex: 10
    }
  ]
}
lookahead coverage 11: CoverageTable { coverageFormat: 1, glyphCount: 1, glyphArray: [ 601 ] }
sequence lookup record: SequenceLookupRecord { sequenceIndex: 3, lookupListIndex: 10 }

Which looks more in line with what the data should look like.

the first backtrack rule maps to:

backtrack coverage 1: CoverageTable {
  coverageFormat: 2,
  rangeCount: 2,
  rangeRecords: [
    CoverageRangeRecord {
      startGlyphID: 520,  === 0 ("zero")
      endGlyphID: 529, === 9 ("nine")
      startCoverageIndex: 0
    },
    CoverageRangeRecord {
      startGlyphID: 1255, === 0 ("zero.sans")
      endGlyphID: 1255,
      startCoverageIndex: 10
    }
  ]
}

Which anecdotally feels like things make more sense now =)

Confirming with TTX shows:

      <Lookup index="1">
        <LookupType value="6"/>
        <LookupFlag value="0"/>
        <!-- SubTableCount=23 -->
        <ChainContextSubst index="0" Format="3">
          <!-- BacktrackGlyphCount=11 -->
          <BacktrackCoverage index="0" Format="2">
            <Glyph value="zero"/>
            <Glyph value="one"/>
            <Glyph value="two"/>
            <Glyph value="three"/>
            <Glyph value="four"/>
            <Glyph value="five"/>
            <Glyph value="six"/>
            <Glyph value="seven"/>
            <Glyph value="eight"/>
            <Glyph value="nine"/>
            <Glyph value="zero.sans"/>
          </BacktrackCoverage>
          <BacktrackCoverage index="1" Format="2">
            <Glyph value="zero"/>
            <Glyph value="one"/>
            <Glyph value="two"/>
            <Glyph value="three"/>
            <Glyph value="four"/>
            <Glyph value="five"/>
            <Glyph value="six"/>
            <Glyph value="seven"/>
            <Glyph value="eight"/>
            <Glyph value="nine"/>
            <Glyph value="zero.sans"/>
          </BacktrackCoverage>
RoelN commented 3 years ago

This is fantastic! Thanks a lot, @Pomax!

For the record, I'm also logging the input coverage, so you have a complete picture of what goes "in" the substitution:

              if (subtable.inputGlyphCount > 0)
              subtable.inputCoverageOffsets.forEach((offset, id) => {
                let coverage = subtable.getCoverageFromOffset(offset);
                console.log(`input coverage ${id+1}:`, coverage);
                console.log("\n");
              });