opentypejs / opentype.js

Read and write OpenType fonts using JavaScript.
https://opentype.js.org/
MIT License
4.37k stars 467 forks source link

Read ligature from a font #384

Open andreinitescu opened 5 years ago

andreinitescu commented 5 years ago

Is it possible to read the "ligatures" from a font?

What I mean is, get a dictionary (an array) where the key is the substituion and the value is the Glyph object (or just the unicode integer value), something like:

var font = ... ; // a Font object
var ligatures = font.getLigatures();  // an array where key is the ligature, value is the Glyph(or just the unicode integer will do it too)
foreach(var l in ligatures) {
    console.log(l, ligatures[l]); // 
}

For example, Google's Material Icons font has the "face" ligature, see: https://google.github.io/material-design-icons/#using-the-icons-in-html

vinuel commented 5 years ago

To get a list of ligatures would really be helpful!

Recently a FontDrop! user sent me a script that he used to get ligatures. He wrote:

Below is the JavaScript I used to grab the ligatures. It's missing the first character for each ligature, but that was good enough for me.

Here is the code


(function(){
  var glyphIndexMap = {"0":1,"1":3,"32":4,"95":5,"97":6,"98":7,"99":8,"100":9,"101":10,"102":11,"103":12,"104":13,"105":14,"107":15,"108":16,"109":17,"110":18,"111":19,"112":20,"114":21,"115":22,"116":23,"117":24,"118":25,"119":26,"121":27,"9724":28,"57549":29,"57569":30,"57573":31,"58131":32,"58133":33,"58134":34,"58821":35,"58829":36,"58834":37,"58840":38,"59389":39,"59448":40,"59449":41,"59450":42,"59517":43,"59518":44,"59530":45,"59572":46,"59574":47,"59595":48,"59596":49,"59611":50,"59612":51,"59648":52,"59649":53,"59650":54,"59651":55,"59652":56,"59653":57,"59654":58,"59655":59,"59656":60,"59657":61,"59658":62,"59659":63};
  var reverseGlyphIndexMap = {};
  Object.keys(glyphIndexMap).forEach(function(key){
    var value = glyphIndexMap[key];
    reverseGlyphIndexMap[value] = key;
    });
  var lookups = {"lookupType":4,"lookupFlag":0,"subtables":[{"substFormat":1,"coverage":{"format":2,"ranges":[{"start":6,"end":8,"index":0},{"start":10,"end":14,"index":3},{"start":17,"end":18,"index":8},{"start":20,"end":24,"index":10},{"start":26,"end":27,"index":15}]},"ligatureSets":[[{"ligGlyph":38,"components":[21,21,19,26,5,24,20,26,6,21,9]},{"ligGlyph":35,"components":[18,12,16,10,5,9,19,26,18]}],[{"ligGlyph":48,"components":[6,22,15,10,23]},{"ligGlyph":57,"components":[16,19,12]}],[{"ligGlyph":33,"components":[13,10,25,21,19,18,5,21,14,12,13,23]},{"ligGlyph":32,"components":[13,10,25,21,19,18,5,9,19,26,18]},{"ligGlyph":34,"components":[13,10,25,21,19,18,5,24,20]},{"ligGlyph":36,"components":[16,19,22,10]},{"ligGlyph":49,"components":[6,21,23]}],[{"ligGlyph":30,"components":[18,25,10,16,19,20,10]}],[{"ligGlyph":61,"components":[6,8,10,7,19,19,15]},{"ligGlyph":54,"components":[16,14,8,15,21]}],[{"ligGlyph":53,"components":[19,19,12,16,10]}],[{"ligGlyph":44,"components":[10,6,21,23,5,7,19,21,9,10,21]},{"ligGlyph":43,"components":[10,6,21,23]},{"ligGlyph":45,"components":[19,17,10]}],[{"ligGlyph":59,"components":[18,22,23,6,12,21,6,17]}],[{"ligGlyph":46,"components":[6,21,15,10,21]},{"ligGlyph":37,"components":[10,18,24]}],[{"ligGlyph":52,"components":[10,26,22]}],[{"ligGlyph":58,"components":[14,18,23,10,21,10,22,23]},{"ligGlyph":29,"components":[13,19,18,10]}],[{"ligGlyph":31,"components":[22,22]}],[{"ligGlyph":42,"components":[23,6,21,5,7,19,21,9,10,21]},{"ligGlyph":41,"components":[23,6,21,5,13,6,16,11]},{"ligGlyph":47,"components":[10,6,21,8,13]},{"ligGlyph":40,"components":[23,6,21]}],[{"ligGlyph":50,"components":[13,24,17,7,5,9,19,26,18]},{"ligGlyph":51,"components":[13,24,17,7,5,24,20]},{"ligGlyph":63,"components":[13,10,15,18,19,26]},{"ligGlyph":60,"components":[26,14,23,23,10,21]}],[{"ligGlyph":39,"components":[22,10,21]}],[{"ligGlyph":62,"components":[10,9,9,14,18,12,26,14,21,10]}],[{"ligGlyph":55,"components":[19,24,23,24,7,10]},{"ligGlyph":56,"components":[10,16,20]}]]}]};
  var ligatureSets = lookups.subtables[0].ligatureSets;
  ligatureSets.forEach(function(set){
    set.forEach(function(ligature){
     ligature.components = ligature.components.map(function(component){
       component = reverseGlyphIndexMap[component];
       component = parseInt(component);
       return String.fromCharCode(component);
      });
      console.log(ligature.components.join(''));
   });
  });
})();

Iā€™m not a javascript developer, but maybe it is a starting point ...

andreinitescu commented 5 years ago

@vinuel Thanks for your input. OpenType.js already has support for ligatures, but I don't know how ligatures technically work.

andreinitescu commented 5 years ago

Thanks for your code, I think I made some progress! I will share maybe it helps others as well: I refactored your method to use the Font object:

    function parseLigatures(font) {
        var glyphIndexMap = font.tables.cmap.glyphIndexMap;
        var reverseGlyphIndexMap = {};
        Object.keys(glyphIndexMap).forEach(function (key) {
            var value = glyphIndexMap[key];
            reverseGlyphIndexMap[value] = key;
        });

        var lookups = font.tables.gsub.lookups[0];
        var ligatureSets = lookups.subtables[0].ligatureSets;
        ligatureSets.forEach(function (set) {
            set.forEach(function (ligature) {
                ligature.components = ligature.components.map(function (component) {
                    component = reverseGlyphIndexMap[component];
                    component = parseInt(component);
                    return String.fromCharCode(component);
                });
                 console.log(ligature.components.join(''), ligature.ligGlyph, reverseGlyphIndexMap[ligature.ligGlyph]);
            });
        });
    }

The issue is, as you said above, it doesn't include the first character of the ligature (it prints "ace" instead of "face")

I'd appreciate some input

andreinitescu commented 5 years ago

For some reason, each set in lookups.subtables[0].ligatureSets has ligature.components missing first character. Is this a bug in the library?

andreinitescu commented 5 years ago

By the way I am using this great library (OpenType.js) in an open source project: https://andreinitescu.github.io/IconFont2Code/

vinuel commented 5 years ago

The missing first glyph might be in lookups.subtables[0].coverage but it is a bit complicated, because there is different formats: https://docs.microsoft.com/en-us/typography/opentype/otspec170/gsub#lookuptype-4:-ligature-substitution-subtable

vinuel commented 5 years ago

Also, I tested the code with my developer. Fonts can contain countless lockups, not only in the first lookup. So we made it search for all LookupType 4 (Ligatures) first, and then get all ligatureSets:

    function parseLigatures(font) {
        var glyphIndexMap = font.tables.cmap.glyphIndexMap;
        var reverseGlyphIndexMap = {};
        Object.keys(glyphIndexMap).forEach(function (key) {
            var value = glyphIndexMap[key];
            reverseGlyphIndexMap[value] = key;
        });

        var lookups = font.tables.gsub.lookups;
    var subtables = [];
        Object.keys(lookups).forEach(function(index){
            if (lookups[index].subtables[0].ligatureSets){
                subtables.push(lookups[index].subtables[0]);
            }else{
                return;
            }
        });
        var ligatureSets = lookups.subtables[0].ligatureSets;
        subtables.forEach(function (subtable) {
            subtable.ligatureSets.forEach(function (ligature) {
                ligature.components = ligature.components.map(function (component) {
                    component = reverseGlyphIndexMap[component];
                    component = parseInt(component);
                    return String.fromCharCode(component);
                });
                 console.log(ligature.components.join(''), ligature.ligGlyph, reverseGlyphIndexMap[ligature.ligGlyph]);
            });
        });
    }
andreinitescu commented 5 years ago

Fonts can contain countless lockups, not only in the first lookup`

I see what you mean, thanks, it makes sense

This line:

var ligatureSets = lookups.subtables[0].ligatureSets;

throws error, there's no "subtables" property on lookups (which makes sense because lookups is an array)

vinuel commented 5 years ago

I think you do not need this line (we use it in Ember JS so I had to reorder some code, might be a mistake)

andreinitescu commented 5 years ago

I removed the line but I got another error. I think you might have code which does more than what you pasted here.

Anyway, I tried to refactor so it goes through all lookups, subtables:

    function parseLigatures3(font) {
        var glyphIndexMap = font.tables.cmap.glyphIndexMap;
        var reverseGlyphIndexMap = {};
        Object.keys(glyphIndexMap).forEach(function (key) {
            var value = glyphIndexMap[key];
            reverseGlyphIndexMap[value] = key;
        });

        font.tables.gsub.lookups.forEach(function (lookup) {
            lookup.subtables.forEach(function (subtable) {
                subtable.ligatureSets.forEach(function (set) {
                    set.forEach(function (ligature) {
                        ligature.components = ligature.components.map(function (component) {
                            component = reverseGlyphIndexMap[component];
                            component = parseInt(component);
                            return String.fromCharCode(component);
                        });
                        console.log(ligature.components.join(''), ligature.ligGlyph, reverseGlyphIndexMap[ligature.ligGlyph]);
                    });
                });
            });
        });
    }

But it still doesn't print the first character... šŸ¤¦ā€ā™‚ This is because of my lack of understanding how technically ligatures work...

vinuel commented 5 years ago

Look, here is a first idea for the firstGlyph, just a start, it might help:

[...]
                if (subtable.coverage.format == 1) {
                    var firstGlyph = subtable.coverage.glyphs;
                }else{
                    var firstGlyph = 'Format 2';            
                }
                console.log(firstGlyph, components.join(''), i[0].ligGlyph);
            });
        });
andreinitescu commented 5 years ago

Hmm, the else case where it sets to Format 2 doesn't make sense...

Here's some of the console output:

Format 2 d_rotation 738 59469 otfFont.js:97 
Format 2 irline_seat_individual_suite 676 58930 otfFont.js:97 
Format 2 irline_seat_legroom_reduced 679 58933 otfFont.js:97 
Format 2 irline_seat_legroom_normal 678 58932 otfFont.js:97 
Format 2 irline_seat_recline_normal 681 58935

So it looks like "Format 2" string is used as first letter...

In the meantime, I also created a JS fiddle so it's easier for someone to test:

https://jsfiddle.net/1v6ugj9c/2/

 var url = 'https://rawcdn.githack.com/google/material-design-icons/224895a86501195e7a7ff3dde18e39f00b8e3d5a/iconfont/MaterialIcons-Regular.ttf';
 opentype.load(url, function(err, font) {
   parseLigatures3(font);
 });

 function parseLigatures3(font) {
   var glyphIndexMap = font.tables.cmap.glyphIndexMap;
   var reverseGlyphIndexMap = {};
   Object.keys(glyphIndexMap).forEach(function(key) {
     var value = glyphIndexMap[key];
     reverseGlyphIndexMap[value] = key;
   });

   font.tables.gsub.lookups.forEach(function(lookup) {
     lookup.subtables.forEach(function(subtable) {
       subtable.ligatureSets.forEach(function(set) {
         set.forEach(function(ligature) {
           ligature.components = ligature.components.map(function(component) {
             component = reverseGlyphIndexMap[component];
             component = parseInt(component);
             return String.fromCharCode(component);
           });
           console.log(ligature.components.join(''), ligature.ligGlyph, reverseGlyphIndexMap[ligature.ligGlyph]);
         });
       });
     });
   });
 }

Check the console output

andreinitescu commented 5 years ago

As you can see, the last line printed to the console is

oom_in 912 59647

but it should really be:

zoom_in 912 59647

The issue is first letter is omitted.

vinuel commented 5 years ago

So far the firstGlyph thing was just a concept, have a look in the subtable.coverage property, there is Format 1 and Format 2. Format 1 can be an array of glyphs, Format 2 is different ... probably we need to check for the first glyph at first, and then build the components ... (I hope I can check this with my developer later.. )

andreinitescu commented 5 years ago

I'm not sure but I think the implementation to do all this is already there, it's just not surfaced: https://github.com/opentypejs/opentype.js/blob/master/src/features/featureQuery.js#L353

vinuel commented 5 years ago

Here is code that shows the first letter for ligatures with subtable.coverage Format 1: https://jsfiddle.net/4o1mqu0w/ (It doesn't work with the font in the fiddle, but I tested it with other fonts.) Ligatures with subtable.coverage Format 2 needs to be done ...

vinuel commented 5 years ago

Found this, what also might help: https://github.com/foliojs/fontkit/issues/187

vinuel commented 5 years ago

This works for Format 1 and Format 2: https://jsfiddle.net/nvbajtmo/

andreinitescu commented 5 years ago

@vinuel so cool, thanks! Does it work for any font, or it won't work for some scenarios/features in some fonts?

vinuel commented 5 years ago

This will detect all Ligatures. But at this point we just search through the lookups, it would be better to check the features first and then we can connect the ligature with the features (Standard Ligatures ā€“ liga, Discretionary Ligatures ā€“ dlig, Historical Ligatures ā€“ hlig, etc..)

Also, next challenge is to display the results. Not all components of a ligature might have a unicode to display them (e.g. small caps letters and small caps ligatures.) Ligatures and components could be rendered with opentype.js, but the font might not contain the component letters (e.g. the material icons font, for the cloud-ligature you need the letters c l o u d but the font do not have glyphs for these letters, just empty code points.)

If you just want to show the ligatures, without components and features it works.

andreinitescu commented 5 years ago

Thanks again. I'm not familiar with the font internals, I'm not sure how "feature" and "ligature" and "lookups" work exactly. The purpose of my tool (iconfont2code) is to show all glyphs in a font with icons (with the help of openType.js) and based on your selected glyphs in a list, it generates a simple C# code with the unicode constants. Using constants in apps helps with maintenance (you could just use hardcoded unicode value instead).

Recently I learned about ligatures (strings which the font replaces with glyphs), so I realized that if I could get the ligatures from the font, I could use the ligatures as names for the generated C# constants, otherwise the developer needs to come up with distinctive names for the constants himself.

petermikitsh commented 3 years ago

I made an NPM module for getting an array of ligature names from a font. It's published to npm as ligatures. https://github.com/petermikitsh/ligatures

yne commented 2 years ago

@andreinitescu, does @petermikitsh ligature module helped you ? Are you still looking ?