Closed Pomax closed 3 years ago
Looking at the woff2.js
file, the lazy load function currently shows:
function buildWoff2LazyLookups(woff2, decoded, createTable) {
woff2.tables = {};
woff2.directory.forEach((entry) => {
lazy(woff2.tables, entry.tag.trim(), () => {
const start = entry.offset;
const end =
start +
(entry.transformLength ? entry.transformLength : entry.origLength);
const data = decoded.slice(start, end);
return createTable(
woff2.tables,
{ tag: entry.tag, offset: 0, length: entry.origLength },
new DataView(data.buffer)
);
});
});
}
Updated code to be more explicit about what's happening:
function buildWoff2LazyLookups(woff2, decoded, createTable) {
woff2.tables = {};
woff2.directory.forEach((entry) => {
lazy(woff2.tables, entry.tag.trim(), () => {
const start = entry.offset;
const end =
start +
(entry.transformLength ? entry.transformLength : entry.origLength);
console.log(`packing data`);
let data = decoded.slice(start, end);
try {
data = new DataView(data.buffer);
} catch (e) {
console.error(e);
}
console.log(`packed data:`, data);
console.log(`creating table...`);
try {
return createTable(
woff2.tables,
{ tag: entry.tag, offset: 0, length: entry.origLength },
data
);
} catch (e) {
console.error(e);
}
});
});
}
This reveals the following error:
packing data
packed data: DataView {
byteLength: 447,
byteOffset: 0,
buffer: ArrayBuffer {
[Uint8Contents]: <...>,
byteLength: 447
}
}
creating table...
dict: { tag: 'name', offset: 0, length: 447 }
parser getUint16 2 Parser {
name: 'name',
length: 447,
start: 0,
offset: 448,
data: DataView {
byteLength: 447,
byteOffset: 0,
buffer: ArrayBuffer {
[Uint8Contents]: <...>,
byteLength: 447
}
}
}
parser 0 448
RangeError: Offset is outside the bounds of the DataView
at DataView.getUint16 (<anonymous>)
at Parser.getValue (file:///C:/Users/Mike/Documents/Git/released/lib-font/src/parser.js:60:29)
at Parser.get (file:///C:/Users/Mike/Documents/Git/released/lib-font/src/parser.js:38:25)
at new NameRecord (file:///C:/Users/Mike/Documents/Git/released/lib-font/src/opentype/tables/simple/name.js:64:21)
at file:///C:/Users/Mike/Documents/Git/released/lib-font/src/opentype/tables/simple/name.js:20:14
at Array.map (<anonymous>)
at new name (file:///C:/Users/Mike/Documents/Git/released/lib-font/src/opentype/tables/simple/name.js:19:51)
at createTable (file:///C:/Users/Mike/Documents/Git/released/lib-font/src/opentype/tables/createTable.js:89:20)
at file:///C:/Users/Mike/Documents/Git/released/lib-font/src/opentype/woff2.js:138:16
at Object.get (file:///C:/Users/Mike/Documents/Git/released/lib-font/src/lazy.js:15:13)
Investigating name
table class code:
class name extends SimpleTable {
constructor(dict, dataview) {
const { p } = super(dict, dataview);
console.log(`CONSTRUCTING NAME TABLE`);
this.format = p.uint16;
this.count = p.uint16;
this.stringOffset = p.Offset16; // relative to start of table
console.log(`parsing name records`);
// name records
this.nameRecords = [...new Array(this.count)].map(
(_) => new NameRecord(p, this)
);
// lang-tag records, if applicable
if (this.format === 1) {
this.langTagCount = p.uint16;
this.langTagRecords = [...new Array(this.langTagCount)].map(
(_) => new LangTagRecord(p.uint16, p.Offset16)
);
}
console.log(`caching global string start offset`);
// cache these values for use in `.get(nameID)`
this.stringStart = this.tableStart + this.stringOffset;
}
This shows things go wrong during record parsing (we never get to the global string start offset)
Adding debug prints to the NameRecord constructor:
class NameRecord {
constructor(p, nameTable) {
this.platformID = p.uint16;
this.encodingID = p.uint16;
this.languageID = p.uint16;
this.nameID = p.uint16;
this.length = p.uint16;
this.offset = p.Offset16;
console.log(this.platformID, this.encodingID, this.languageID, this.nameID, this.length, this.offset);
lazy(this, `string`, () => {
p.currentPosition = nameTable.stringStart + this.offset;
return decodeString(p, this);
});
}
}
Shows initially correct, but predominantly wrong, data. The fourth record is already quite clearly wrong, suggesting we've started parsing way too late in the name record section of the table (although miraculously, aligned to an actual record start!):
3 1 1033 4 16 168
3 1 1033 5 32 195
3 1 1033 6 16 246
0 0 17920 27648 24832 29696
26880 25344 28416 28160 70 27745
29801 25455 28160 82 101 103
117 108 97 114 0 21093
26485 27745 29184 70 111 110
116 70 111 114 103 101
32 50 46 48 32 58
32 70 108 97 116 105
99 111 110 32 58 32
50 49 45 49 50 45
50 48 49 57 0 18031
28276 18031 29287 25888 12846 12320
14880 18028 24948 26979 28526 8250
8242 12589 12594 11570 12337 14592
70 108 97 116 105 99
111 110 0 18028 24948 26979
28526 0 22016 25856 29184 29440
26880 28416 28160 8192 12288 12288
12544 11776 12288 12288 12288 8192
86 25970 29545 28526 8240 12337
11824 12336 8192 70 108 97
116 105 99 111 110 0
18028 24948 26979 28526 0 512
0 0 255 49152 6400 0
0 0 0 0 0 0
0 0 0 16640 0 256
512 769 513 769 1025 1281
1537 1793 2049 2305 2561 2817
3073 3329 3585 3841 4097 4353
4609 4865 5121 5377 5633 5889
6145 6401 6657 6913 7169 7425
7681 7937 8193 8449 8705 8961
9217 9473 9729 9985 10241 10497
name
table according to TTX:
<name>
<namerecord nameID="0" platformID="1" platEncID="0" langID="0x0" unicode="True">
</namerecord>
<namerecord nameID="1" platformID="1" platEncID="0" langID="0x0" unicode="True">
Flaticon
</namerecord>
<namerecord nameID="2" platformID="1" platEncID="0" langID="0x0" unicode="True">
Regular
</namerecord>
<namerecord nameID="3" platformID="1" platEncID="0" langID="0x0" unicode="True">
FontForge 2.0 : Flaticon : 21-12-2019
</namerecord>
<namerecord nameID="4" platformID="1" platEncID="0" langID="0x0" unicode="True">
Flaticon
</namerecord>
<namerecord nameID="5" platformID="1" platEncID="0" langID="0x0" unicode="True">
Version 001.000
</namerecord>
<namerecord nameID="6" platformID="1" platEncID="0" langID="0x0" unicode="True">
Flaticon
</namerecord>
<namerecord nameID="0" platformID="3" platEncID="1" langID="0x409">
</namerecord>
<namerecord nameID="1" platformID="3" platEncID="1" langID="0x409">
Flaticon
</namerecord>
<namerecord nameID="2" platformID="3" platEncID="1" langID="0x409">
Regular
</namerecord>
<namerecord nameID="3" platformID="3" platEncID="1" langID="0x409">
FontForge 2.0 : Flaticon : 21-12-2019
</namerecord>
<namerecord nameID="4" platformID="3" platEncID="1" langID="0x409">
Flaticon
</namerecord>
<namerecord nameID="5" platformID="3" platEncID="1" langID="0x409">
Version 001.000
</namerecord>
<namerecord nameID="6" platformID="3" platEncID="1" langID="0x409">
Flaticon
</namerecord>
</name>
Data inspection:
let decoded;
let buffer = dataview.buffer.slice(dictOffset);
if (brotliDecode) {
decoded = brotliDecode(new Uint8Array(buffer));
} else if (nativeBrotliDecode) {
decoded = new Uint8Array(nativeBrotliDecode(buffer));
} else {
const msg = `no brotli decoder available to decode WOFF2 font`;
if (font.onerror) font.onerror(msg);
throw new Error(msg);
}
const asText = Array.from(decoded).map(v => String.fromCharCode(v)).join(``);
const lines = asText.slice(17760, 17760 + 447).match(/([\w\W]{1,16})/g).map(line => {
return line.split(``).map(v => `${v.charCodeAt(0).toString(16).toUpperCase()}`.padStart(2, `0`)).join(` | `);
});
console.log(lines);
buildWoff2LazyLookups(this, decoded, createTable);
Yields:
[
'00 | 03 | 00 | 4A | 00 | 36 | 00 | 03 | 00 | 01 | 04 | 09 | 00 | 04 | 00 | 10',
'00 | A8 | 00 | 03 | 00 | 01 | 04 | 09 | 00 | 05 | 00 | 20 | 00 | C3 | 00 | 03',
'00 | 01 | 04 | 09 | 00 | 06 | 00 | 10 | 00 | F6 | 00 | 00 | 00 | 00 | 46 | 00',
'6C | 00 | 61 | 00 | 74 | 00 | 69 | 00 | 63 | 00 | 6F | 00 | 6E | 00 | 00 | 46',
'6C | 61 | 74 | 69 | 63 | 6F | 6E | 00 | 00 | 52 | 00 | 65 | 00 | 67 | 00 | 75',
'00 | 6C | 00 | 61 | 00 | 72 | 00 | 00 | 52 | 65 | 67 | 75 | 6C | 61 | 72 | 00',
'00 | 46 | 00 | 6F | 00 | 6E | 00 | 74 | 00 | 46 | 00 | 6F | 00 | 72 | 00 | 67',
'00 | 65 | 00 | 20 | 00 | 32 | 00 | 2E | 00 | 30 | 00 | 20 | 00 | 3A | 00 | 20',
'00 | 46 | 00 | 6C | 00 | 61 | 00 | 74 | 00 | 69 | 00 | 63 | 00 | 6F | 00 | 6E',
'00 | 20 | 00 | 3A | 00 | 20 | 00 | 32 | 00 | 31 | 00 | 2D | 00 | 31 | 00 | 32',
'00 | 2D | 00 | 32 | 00 | 30 | 00 | 31 | 00 | 39 | 00 | 00 | 46 | 6F | 6E | 74',
'46 | 6F | 72 | 67 | 65 | 20 | 32 | 2E | 30 | 20 | 3A | 20 | 46 | 6C | 61 | 74',
'69 | 63 | 6F | 6E | 20 | 3A | 20 | 32 | 31 | 2D | 31 | 32 | 2D | 32 | 30 | 31',
'39 | 00 | 00 | 46 | 00 | 6C | 00 | 61 | 00 | 74 | 00 | 69 | 00 | 63 | 00 | 6F',
'00 | 6E | 00 | 00 | 46 | 6C | 61 | 74 | 69 | 63 | 6F | 6E | 00 | 00 | 56 | 00',
'65 | 00 | 72 | 00 | 73 | 00 | 69 | 00 | 6F | 00 | 6E | 00 | 20 | 00 | 30 | 00',
'30 | 00 | 31 | 00 | 2E | 00 | 30 | 00 | 30 | 00 | 30 | 00 | 20 | 00 | 00 | 56',
'65 | 72 | 73 | 69 | 6F | 6E | 20 | 30 | 30 | 31 | 2E | 30 | 30 | 30 | 20 | 00',
'00 | 46 | 00 | 6C | 00 | 61 | 00 | 74 | 00 | 69 | 00 | 63 | 00 | 6F | 00 | 6E',
'00 | 00 | 46 | 6C | 61 | 74 | 69 | 63 | 6F | 6E | 00 | 00 | 02 | 00 | 00 | 00',
'00 | 00 | 00 | FF | C0 | 00 | 19 | 00 | 00 | 00 | 00 | 00 | 00 | 00 | 00 | 00',
'00 | 00 | 00 | 00 | 00 | 00 | 00 | 00 | 00 | 00 | 00 | 00 | 41 | 00 | 00 | 00',
'01 | 00 | 02 | 00 | 03 | 01 | 02 | 01 | 03 | 01 | 04 | 01 | 05 | 01 | 06 | 01',
'07 | 01 | 08 | 01 | 09 | 01 | 0A | 01 | 0B | 01 | 0C | 01 | 0D | 01 | 0E | 01',
'0F | 01 | 10 | 01 | 11 | 01 | 12 | 01 | 13 | 01 | 14 | 01 | 15 | 01 | 16 | 01',
'17 | 01 | 18 | 01 | 19 | 01 | 1A | 01 | 1B | 01 | 1C | 01 | 1D | 01 | 1E | 01',
'1F | 01 | 20 | 01 | 21 | 01 | 22 | 01 | 23 | 01 | 24 | 01 | 25 | 01 | 26 | 01',
'27 | 01 | 28 | 01 | 29 | 01 | 2A | 01 | 2B | 01 | 2C | 01 | 2D | 01 | 2E'
]
Which cannot be the name table: that has to start with either 00 00
or 00 01
.
Let's do some byte hunting...
Skipping back 16 rows and highlighting the 3/1/1033 records:
Working back through the 1/0/0 records:
Working back through the header...
storage offset (0xAE = 174):
count (0x0E = 14):
version (0x00 = 0):
So we know the table starts 132 bytes earlier than its dictionary entry says it starts. Where does that 132 come from.
It comes from this record:
Woff2TableDirectoryEntry {
flags: 11,
tagNumber: 11,
tag: 'loca',
transformVersion: 0,
origLength: 132,
transformLength: 0,
offset: 17366
},
loca is one of the three tables that may be additionally transformed, so looking at its flags
we see it's decimal 11, which means binary 0b0001011, that is: bits [0...5] are the value 11 (for tag=loca
), and bits [6,7] are 0. Quoting from the spec:
Bits 6 and 7 indicate the preprocessing transformation version number (0-3) that was applied to each table. For all tables in a font, except for 'glyf' and 'loca' tables, transformation version 0 indicates the null transform where the original table data is passed directly to the Brotli compressor for inclusion in the compressed data stream. For 'glyf' and 'loca' tables, transformation version 3 indicates the null transform where the original table data was passed directly to the Brotli compressor without applying any pre-processing defined in subclause 5.1 and subclause 5.3. The transformed table formats and their associated transformation version numbers are described in details in clause 5 of this specification.
Further quoting:
the value of the origLength field of transformed table should be treated only as a reference and should not be relied upon in making memory allocation decisions when the WOFF2 data is decoded.
Looking at our code for offset computation, we see:
// parse the dictionary
this.directory = [...new Array(this.numTables)].map(
(_) => new Woff2TableDirectoryEntry(p)
);
let dictOffset = p.currentPosition; // = start of CompressedFontData block
// compute table byte offsets in the decompressed data
this.directory[0].offset = 0;
this.directory.forEach((e, i) => {
let next = this.directory[i + 1];
if (next) {
next.offset =
e.offset + (e.transformLength ? e.transformLength : e.origLength);
}
});
and there's our bug: e.transformLength
is zero for the loca
table, as per the WOFF2 spec, but zero is also a falsey value, and so the ternary does not do what we want: instead of adding 0 to the running offset tally, we incorrectly add the original length value (in this case, 132).
The fix is (of course) trivial:
this.directory.forEach((e, i) => {
let next = this.directory[i + 1];
if (next) {
next.offset =
e.offset + (e.transformLength !== undefined ? e.transformLength : e.origLength);
}
});
That is, we use transformLength
if it exists at all, not "if it's truthy".
With this update, running the following code:
function testFont(font) {
const { directory, tables } = font.opentype;
console.log(directory);
const { name } = tables;
console.log(name);
}
Yields:
name {
format: 0,
count: 14,
stringOffset: 174,
nameRecords: [
NameRecord {
platformID: 1,
encodingID: 0,
languageID: 0,
nameID: 0,
length: 0,
offset: 2
},
NameRecord {
platformID: 1,
encodingID: 0,
languageID: 0,
nameID: 1,
length: 8,
offset: 21
},
NameRecord {
platformID: 1,
encodingID: 0,
languageID: 0,
nameID: 2,
length: 7,
offset: 46
},
NameRecord {
platformID: 1,
encodingID: 0,
languageID: 0,
nameID: 3,
length: 37,
offset: 130
},
NameRecord {
platformID: 1,
encodingID: 0,
languageID: 0,
nameID: 4,
length: 8,
offset: 186
},
NameRecord {
platformID: 1,
encodingID: 0,
languageID: 0,
nameID: 5,
length: 16,
offset: 229
},
NameRecord {
platformID: 1,
encodingID: 0,
languageID: 0,
nameID: 6,
length: 8,
offset: 264
},
NameRecord {
platformID: 3,
encodingID: 1,
languageID: 1033,
nameID: 0,
length: 0,
offset: 0
},
NameRecord {
platformID: 3,
encodingID: 1,
languageID: 1033,
nameID: 1,
length: 16,
offset: 3
},
NameRecord {
platformID: 3,
encodingID: 1,
languageID: 1033,
nameID: 2,
length: 14,
offset: 30
},
NameRecord {
platformID: 3,
encodingID: 1,
languageID: 1033,
nameID: 3,
length: 74,
offset: 54
},
NameRecord {
platformID: 3,
encodingID: 1,
languageID: 1033,
nameID: 4,
length: 16,
offset: 168
},
NameRecord {
platformID: 3,
encodingID: 1,
languageID: 1033,
nameID: 5,
length: 32,
offset: 195
},
NameRecord {
platformID: 3,
encodingID: 1,
languageID: 1033,
nameID: 6,
length: 16,
offset: 246
}
],
stringStart: 174
}
Testing string extraction reveals one more parsing error:
function testFont(font) {
const { directory, tables } = font.opentype;
console.log(directory);
const { name } = tables;
name.nameRecords.forEach((record) => {
try {
const str = record.string;
console.log(str);
} catch (e) {
console.error(e);
}
});
}
Yields:
parser getUint8 1 Parser {
name: 'name',
length: 447,
start: 176,
offset: 272,
data: DataView {
byteLength: 447,
byteOffset: 0,
buffer: ArrayBuffer {
[Uint8Contents]: <...>,
byteLength: 447
}
}
}
parser 176 272
RangeError: Offset is outside the bounds of the DataView
at DataView.getUint8 (<anonymous>)
at Parser.getValue (file:///C:/Users/Mike/Documents/Git/released/lib-font/src/parser.js:60:29)
at Parser.get (file:///C:/Users/Mike/Documents/Git/released/lib-font/src/parser.js:38:25)
at Parser.readBytes (file:///C:/Users/Mike/Documents/Git/released/lib-font/src/parser.js:163:32)
at decodeString (file:///C:/Users/Mike/Documents/Git/released/lib-font/src/opentype/tables/simple/name.js:90:19)
at file:///C:/Users/Mike/Documents/Git/released/lib-font/src/opentype/tables/simple/name.js:69:14
at NameRecord.get (file:///C:/Users/Mike/Documents/Git/released/lib-font/src/lazy.js:15:13)
at file:///C:/Users/Mike/Documents/Git/released/lib-font/testing/manual/custom/flaticon-parsing.js:13:26
at Array.forEach (<anonymous>)
at testFont (file:///C:/Users/Mike/Documents/Git/released/lib-font/testing/manual/custom/flaticon-parsing.js:11:20)
Flaticon
Regular
FontForge 2.0 : Flaticon : 21-12-2019
Flaticon
Version 001.000
Flaticon
Flaticon
Regular
FontForge 2.0 : Flaticon : 21-12-2019
Flaticon
Version 001.000
Flaticon
Looking at decodeString
with a console log added for the record values:
function decodeString(p, record) {
const { nameID, platformID, encodingID, length } = record;
console.log(nameID, platformID, encodingID, length);
// We decode strings for the Unicode/Microsoft platforms as UTF-16
if (platformID === 0 || platformID === 3) {
const str = [];
for (let i = 0, e = length / 2; i < e; i++)
str[i] = String.fromCharCode(p.uint16);
return str.join(``);
}
// Everything else, we treat as plain bytes.
const bytes = p.readBytes(length);
const str = [];
bytes.forEach(function (b, i) {
str[i] = String.fromCharCode(b);
});
return str.join(``);
// TODO: if someone wants to finesse this/implement all the other string encodings, have at it!
}
Shows:
0 1 0 0
parser getUint8 1 Parser {
name: 'name',
length: 447,
start: 176,
offset: 272,
data: DataView {
byteLength: 447,
byteOffset: 0,
buffer: ArrayBuffer {
[Uint8Contents]: <...>,
byteLength: 447
}
}
}
parser 176 272
RangeError: Offset is outside the bounds of the DataView
at DataView.getUint8 (<anonymous>)
at Parser.getValue (file:///C:/Users/Mike/Documents/Git/released/lib-font/src/parser.js:60:29)
at Parser.get (file:///C:/Users/Mike/Documents/Git/released/lib-font/src/parser.js:38:25)
at Parser.readBytes (file:///C:/Users/Mike/Documents/Git/released/lib-font/src/parser.js:163:32)
at decodeString (file:///C:/Users/Mike/Documents/Git/released/lib-font/src/opentype/tables/simple/name.js:92:19)
at file:///C:/Users/Mike/Documents/Git/released/lib-font/src/opentype/tables/simple/name.js:69:14
at NameRecord.get (file:///C:/Users/Mike/Documents/Git/released/lib-font/src/lazy.js:15:13)
at file:///C:/Users/Mike/Documents/Git/released/lib-font/testing/manual/custom/flaticon-parsing.js:13:26
at Array.forEach (<anonymous>)
at testFont (file:///C:/Users/Mike/Documents/Git/released/lib-font/testing/manual/custom/flaticon-parsing.js:11:20)
1 1 0 8
Flaticon
2 1 0 7
Regular
3 1 0 37
FontForge 2.0 : Flaticon : 21-12-2019
4 1 0 8
Flaticon
5 1 0 16
Version 001.000
6 1 0 8
Flaticon
0 3 1 0
1 3 1 16
Flaticon
2 3 1 14
Regular
3 3 1 74
FontForge 2.0 : Flaticon : 21-12-2019
4 3 1 16
Flaticon
5 3 1 32
Version 001.000
6 3 1 16
Flaticon
Looking at that first record's encoded values, we see a length of zero bytes, which means we should return an empty string immediately, instead of actually running the code we're currently running.
Again, a trivial fix:
function decodeString(p, record) {
const { platformID, length } = record;
if (length === 0) return ``;
...rest of function here...
}
With these two fixes:
function testFont(font) {
const { name } = font.opentype.tables;
name.nameRecords.forEach((record) => {
try {
const str = record.string;
console.log(str);
} catch (e) {
console.error(e);
}
});
}
yields:
Flaticon
Regular
FontForge 2.0 : Flaticon : 21-12-2019
Flaticon
Version 001.000
Flaticon
Flaticon
Regular
FontForge 2.0 : Flaticon : 21-12-2019
Flaticon
Version 001.000
Flaticon
And finally we have a correct result. PR incoming.
https://github.com/Pomax/lib-font/pull/121 filed (including tests) and merged in.
v2.3.0 published to npm
Font
https://www.panoramicinfotech.com/wp-content/themes/engitech/fonts/Flaticon.woff2
Problem
table entries seem to resolve incorrectly.
Code
Results
Directory:
Directory entry for
name
table:Error when destructuring the
name
table itself: