Closed dcamper closed 5 years ago
64 should probably be a default and be configurable? (Side note: 64 worked well in the past as it was "small" and would include US States)
Agreed.
FWIW here is my "baby" implementation:
flatToDataset(lf) := FUNCTIONMACRO
RETURN DATASET(lf, RECORDOF(lf, LOOKUP), FLAT);
ENDMACRO;
lowCardinalityFields(inFile, maxCardinality = 64) := FUNCTIONMACRO
LOADXML('<xml/>');
#EXPORTXML(inFileFields, RECORDOF(inFile));
#DECLARE(recLevel);
#DECLARE(needsDelim);
#SET(recLevel, 0)
#FOR(inFileFields)
#FOR(field)
#IF(%{@isRecord}% = 1 OR %{@isDataset}% = 1)
#SET(recLevel, %recLevel% + 1)
#ELSEIF(%{@isEnd}% = 1)
#SET(recLevel, %recLevel% - 1)
#ELSEIF(%recLevel% = 0)
#EXPAND(%'@name'% + '_table') := CHOOSEN(TABLE(inFile, {STRING value := (STRING)#EXPAND(%'@name'%), UNSIGNED INTEGER4 rowcount := COUNT(GROUP)}, #EXPAND(%'@name'%), FEW), maxCardinality + 1);
#END
#END
#END
ValueRowCount := RECORD
STRING value;
UNSIGNED INTEGER4 rowcount;
END;
FieldValues := RECORD
STRING fieldID;
UNSIGNED INTEGER4 valueCount;
DATASET(ValueRowCount) values;
END;
retVal := DATASET([
#SET(recLevel, 0)
#SET(needsDelim, 0)
#FOR(inFileFields)
#FOR(field)
#IF(%{@isRecord}% = 1 OR %{@isDataset}% = 1)
#SET(recLevel, %recLevel% + 1)
#ELSEIF(%{@isEnd}% = 1)
#SET(recLevel, %recLevel% - 1)
#ELSEIF(%recLevel% = 0)
#IF(%needsDelim% = 1) , #END
{
%'@name'%,
COUNT(#EXPAND(%'@name'% + '_table')),
#EXPAND(%'@name'% + '_table')
}
#SET(needsDelim, 1)
#END
#END
#END
], FieldValues);
RETURN retVal(valueCount <= maxCardinality);
ENDMACRO;
inFile := flatToDataset('~progguide::exampledata::accounts');
y := lowCardinalityFields(inFile);
OUTPUT(y, NAMED('accounts'));
Close via PR 6f69c3ed44c6cefba20261932089f31478ee8fe9
For fields where cardinality <=64 values, show those values along with record counts.