Aadhaar ID is not detected as a semantic type

I was currently testing on Aadhaar detection with locale "en-IN". I have put multiple possible Aadhaar values in the dataset (given below) to try it with FTA.

As shown in this reference that aadhar number should be:

12 digits long
Can have spaces after 4 digits
Can't start with 0 or 1
Can't contain any alphabets

In the dataset all columns follow those rules of Aadhaar, and the columns are:

Aadhaar ID: With dashes "-" in between, a general case
Aadhar ID No Space: Without spaces, whole continue 12 digit number
Aadhar ID Wd Space: With space after 4 digits
Aadhaar Python Faker: Created with Python faker (except above)
Aadhaar Python Faker Replaced: Replaced "1" in the starting which were created with python faker

Surprisingly Python faker is creating the Aadhar Ids with "1" in the first digit. So I had to check both scenarios (with 1 and without 1). As a result none of them were detected as the IDENTITY.AADHAR_IN.

Aadhaar Python Faker was the only one which was detected as a semantic type but that was detected as CHECKDIGIT.LUHN.

The log: (I just named all headers the same "Aadhaar" to avoid any potential mis-detection)

{
  "fieldName" : "Aadhaar",
  "totalCount" : -1,
  "sampleCount" : 100,
  "matchCount" : 100,
  "nullCount" : 0,
  "blankCount" : 0,
  "distinctCount" : 100,
  "regExp" : "\\d{4}-\\d{4}-\\d{4}",
  "confidence" : 1.0,
  "type" : "String",
  "isSemanticType" : false,
  "min" : "2010-0105-9680",
  "max" : "9986-8228-9793",
  "minLength" : 14,
  "maxLength" : 14,
  "topK" : [ "9986-8228-9793", "9899-6008-5332", "9851-2041-4147", "9621-6511-5606", "9615-7817-7604", "9584-9201-9782", "9533-6945-2700", "9376-9305-1251", "9233-7113-7861", "9120-2241-1404" ],
  "bottomK" : [ "2010-0105-9680", "2010-1889-7684", "2058-6244-7535", "2075-8515-6125", "2127-9469-1096", "2161-6729-3627", "2358-9575-3996", "2407-2148-7453", "2498-2728-6494", "2625-2331-7140" ],
  "cardinality" : 100,
  "outlierCardinality" : 0,
  "invalidCardinality" : 0,
  "shapesCardinality" : 1,
  "leadingWhiteSpace" : false,
  "trailingWhiteSpace" : false,
  "multiline" : false,
  "keyConfidence" : 0.0,
  "uniqueness" : 1.0,
  "detectionLocale" : "en-IN",
  "ftaVersion" : "12.8.3",
  "structureSignature" : "+g5Mjerw5mF0MtWUTFjDAU48zt8=",
  "dataSignature" : "E5jFbXSoDR/Fnna9+WK2ZRB6Ffc="
}
{
  "fieldName" : "Aadhaar",
  "totalCount" : -1,
  "sampleCount" : 100,
  "matchCount" : 100,
  "nullCount" : 0,
  "blankCount" : 0,
  "distinctCount" : 100,
  "regExp" : "\\d{12}",
  "confidence" : 1.0,
  "type" : "Long",
  "isSemanticType" : false,
  "min" : "201001059680",
  "max" : "998682289793",
  "minLength" : 12,
  "maxLength" : 12,
  "mean" : 5.627418333923204E11,
  "standardDeviation" : 2.3030520296122556E11,
  "topK" : [ "998682289793", "989960085332", "985120414147", "962165115606", "961578177604", "958492019782", "953369452700", "937693051251", "923371137861", "912022411404" ],
  "bottomK" : [ "201001059680", "201018897684", "205862447535", "207585156125", "212794691096", "216167293627", "235895753996", "240721487453", "249827286494", "262523317140" ],
  "leadingZeroCount" : 0,
  "cardinality" : 100,
  "outlierCardinality" : 0,
  "invalidCardinality" : 0,
  "shapesCardinality" : 1,
  "percentiles" : [ "201001059680", "201001059680", "201018897684", "205862447535", "207585156125", "212794691096", "216167293627", "235895753996", "240721487453", "249827286494", "262523317140", "262769827928", "285123103815", "291356457241", "293221368267", "311920636381", "313392460129", "314955440800", "317207551928", "319143601350", "319947789389", "329938857430", "330009318388", "345438111054", "349256840720", "356246452629", "359771252585", "362941367657", "368469896402", "369734064364", "398496628526", "407473720991", "409283901243", "410254540982", "419361110819", "421914417663", "432204464154", "439135606883", "440991090985", "450459480441", "454223140475", "457392793235", "465869661990", "477131768289", "505749806731", "510770116413", "525221078328", "533513219289", "537985232076", "538391671172", "546563205020", "555994263916", "557307883571", "569921640640", "572944463058", "583084972427", "587018017853", "589373782799", "617653633205", "621336314249", "631651914857", "639476363284", "657713420041", "662223509284", "679002179540", "683564060520", "683768777756", "684015507777", "689690820602", "690799263070", "693040907834", "698946009333", "707376677567", "710852374515", "748061529092", "748079329633", "752989213332", "758317661567", "766921825423", "775483541074", "777844115944", "778059439354", "795014596537", "812813967951", "833517866340", "838427959970", "856385853725", "858030382541", "858962825268", "873050833227", "876383040084", "912022411404", "923371137861", "937693051251", "953369452700", "958492019782", "961578177604", "962165115606", "985120414147", "989960085332", "998682289793" ],
  "histogram" : [ 11, 15, 11, 8, 12, 7, 13, 8, 6, 9 ],
  "leadingWhiteSpace" : false,
  "trailingWhiteSpace" : false,
  "multiline" : false,
  "keyConfidence" : 0.0,
  "uniqueness" : 1.0,
  "detectionLocale" : "en-IN",
  "ftaVersion" : "12.8.3",
  "structureSignature" : "C5vhekfmFo1T9wk1aL7/XAVFQS8=",
  "dataSignature" : "Pjp3x9CWVJ6pZMMQqKwBacAf9xc="
}
{
  "fieldName" : "Aadhaar",
  "totalCount" : -1,
  "sampleCount" : 100,
  "matchCount" : 100,
  "nullCount" : 0,
  "blankCount" : 0,
  "distinctCount" : 100,
  "regExp" : "\\d{4} \\d{4} \\d{4}",
  "confidence" : 1.0,
  "type" : "String",
  "isSemanticType" : false,
  "min" : "2010 0105 9680",
  "max" : "9986 8228 9793",
  "minLength" : 14,
  "maxLength" : 14,
  "topK" : [ "9986 8228 9793", "9899 6008 5332", "9851 2041 4147", "9621 6511 5606", "9615 7817 7604", "9584 9201 9782", "9533 6945 2700", "9376 9305 1251", "9233 7113 7861", "9120 2241 1404" ],
  "bottomK" : [ "2010 0105 9680", "2010 1889 7684", "2058 6244 7535", "2075 8515 6125", "2127 9469 1096", "2161 6729 3627", "2358 9575 3996", "2407 2148 7453", "2498 2728 6494", "2625 2331 7140" ],
  "cardinality" : 100,
  "outlierCardinality" : 0,
  "invalidCardinality" : 0,
  "shapesCardinality" : 1,
  "leadingWhiteSpace" : false,
  "trailingWhiteSpace" : false,
  "multiline" : false,
  "keyConfidence" : 0.0,
  "uniqueness" : 1.0,
  "detectionLocale" : "en-IN",
  "ftaVersion" : "12.8.3",
  "structureSignature" : "hFPiutvc8eNGBEqwPa86Kx2Bx+4=",
  "dataSignature" : "ZBBcBKUzoVeGyLspI6gjJJYRq90="
}
{
  "fieldName" : "Aadhaar",
  "totalCount" : -1,
  "sampleCount" : 100,
  "matchCount" : 100,
  "nullCount" : 0,
  "blankCount" : 0,
  "distinctCount" : 100,
  "regExp" : "\\d{12}",
  "confidence" : 1.0,
  "type" : "Long",
  "isSemanticType" : true,
  "semanticType" : "CHECKDIGIT.LUHN",
  "min" : "100251817266",
  "max" : "977401778690",
  "minLength" : 12,
  "maxLength" : 12,
  "mean" : 5.2713633128787006E11,
  "standardDeviation" : 2.4094264377784946E11,
  "topK" : [ "977401778690", "971715135994", "962357472886", "922902779151", "910347441230", "908994944043", "880303662444", "873281504530", "867190843021", "865716762774" ],
  "bottomK" : [ "100251817266", "143592253957", "158005205139", "160241532375", "161192806578", "163790311134", "175411761158", "183204333912", "183218701492", "204826799411" ],
  "leadingZeroCount" : 0,
  "cardinality" : 100,
  "outlierCardinality" : 0,
  "invalidCardinality" : 0,
  "shapesCardinality" : 1,
  "percentiles" : [ "100251817266", "100251817266", "143592253957", "158005205139", "160241532375", "161192806578", "163790311134", "175411761158", "183204333912", "183218701492", "204826799411", "212462175887", "219606172514", "221002302109", "223064285057", "232887584687", "255832167035", "265830369012", "276833393701", "287412290846", "292829876658", "297210822674", "299216325231", "302314311549", "303164030528", "306128529790", "329364480957", "330464039362", "338489938336", "342048191023", "342583470022", "368531005677", "370176345681", "373003246700", "376963844570", "400868377922", "401394923882", "410336418117", "411928392207", "414538124044", "418140862834", "425021412043", "432222436094", "443837166414", "450602634971", "460537042935", "463127721173", "472458224780", "487878663649", "491853129188", "507229945682", "512462630769", "515938808919", "518049186092", "525098366251", "559937514011", "578986301146", "579475488063", "591422919819", "593311948578", "618075606334", "628533157598", "631315724072", "633363432053", "634263582229", "642257992981", "642617014088", "669153864220", "675074000778", "681169134852", "684065593056", "696681160482", "697157893200", "702594987498", "709130746650", "718308132148", "718622293535", "725752854572", "731016245987", "767311160208", "785805378995", "788774874275", "806230508368", "808698795065", "818362368305", "840668878399", "844007851679", "844653208836", "847242337057", "854376999637", "860321665256", "865716762774", "867190843021", "873281504530", "880303662444", "908994944043", "910347441230", "922902779151", "962357472886", "971715135994", "977401778690" ],
  "histogram" : [ 9, 8, 13, 14, 10, 6, 14, 7, 13, 6 ],
  "leadingWhiteSpace" : false,
  "trailingWhiteSpace" : false,
  "multiline" : false,
  "keyConfidence" : 0.0,
  "uniqueness" : 1.0,
  "detectionLocale" : "en-IN",
  "ftaVersion" : "12.8.3",
  "structureSignature" : "ySEbGrpJCO/yoMiVxyK5vi9sBtU=",
  "dataSignature" : "Nk8taKWlfWomlW+7m0ekep4Z79w="
}
{
  "fieldName" : "Aadhaar",
  "totalCount" : -1,
  "sampleCount" : 100,
  "matchCount" : 100,
  "nullCount" : 0,
  "blankCount" : 0,
  "distinctCount" : 100,
  "regExp" : "\\d{12}",
  "confidence" : 1.0,
  "type" : "Long",
  "isSemanticType" : false,
  "min" : "200252827266",
  "max" : "977402778690",
  "minLength" : 12,
  "maxLength" : 12,
  "mean" : 5.376487561888701E11,
  "standardDeviation" : 2.285527641596243E11,
  "topK" : [ "977402778690", "972725235994", "962357472886", "922902779252", "920347442230", "908994944043", "880303662444", "873282504530", "867290843022", "865726762774" ],
  "bottomK" : [ "200252827266", "204826799422", "222002302209", "222462275887", "223064285057", "229606272524", "232887584687", "243592253957", "255832267035", "258005205239" ],
  "leadingZeroCount" : 0,
  "cardinality" : 100,
  "outlierCardinality" : 0,
  "invalidCardinality" : 0,
  "shapesCardinality" : 1,
  "percentiles" : [ "200252827266", "200252827266", "204826799422", "222002302209", "222462275887", "223064285057", "229606272524", "232887584687", "243592253957", "255832267035", "258005205239", "260242532375", "262292806578", "263790322234", "265830369022", "275422762258", "276833393702", "283204333922", "283228702492", "287422290846", "292829876658", "297220822674", "299226325232", "302324322549", "303264030528", "306228529790", "329364480957", "330464039362", "338489938336", "342048292023", "342583470022", "368532005677", "370276345682", "373003246700", "376963844570", "400868377922", "402394923882", "420336428227", "422928392207", "424538224044", "425022422043", "428240862834", "432222436094", "443837266424", "450602634972", "460537042935", "463227722273", "472458224780", "487878663649", "492853229288", "507229945682", "522462630769", "525098366252", "525938808929", "528049286092", "559937524022", "578986302246", "579475488063", "592422929829", "593322948578", "628075606334", "628533257598", "632325724072", "633363432053", "634263582229", "642257992982", "642627024088", "669253864220", "675074000778", "682269234852", "684065593056", "696682260482", "697257893200", "702594987498", "709230746650", "725752854572", "728308232248", "728622293535", "732026245987", "767322260208", "785805378995", "788774874275", "806230508368", "808698795065", "828362368305", "840668878399", "844007852679", "844653208836", "847242337057", "854376999637", "860322665256", "865726762774", "867290843022", "873282504530", "880303662444", "908994944043", "920347442230", "922902779252", "962357472886", "972725235994", "977402778690" ],
  "histogram" : [ 16, 14, 12, 8, 7, 9, 12, 5, 11, 6 ],
  "leadingWhiteSpace" : false,
  "trailingWhiteSpace" : false,
  "multiline" : false,
  "keyConfidence" : 0.0,
  "uniqueness" : 1.0,
  "detectionLocale" : "en-IN",
  "ftaVersion" : "12.8.3",
  "structureSignature" : "C5vhekfmFo1T9wk1aL7/XAVFQS8=",
  "dataSignature" : "+QFiv3mTZ6Z6NluMZ37biDPbK2k="
}

---
Total records: 100
----
DETECT WINDOW: 20
THRESHOLD OF PLUGINS: 60
THRESHOLD: 60

Thank you, Tim!

I am using this dataset: Just Aadhaar SMALL100.csv

tsegall / fta

Aadhaar ID is not detected as a semantic type #26