zinggAI / zingg

Scalable identity resolution, entity resolution, data mastering and deduplication using ML
GNU Affero General Public License v3.0
950 stars 120 forks source link

TestZFrame validation methods should be moved to common methods #887

Open sonalgoyal opened 1 month ago

sonalgoyal commented 1 month ago

Duplicate code like

for (int idx = 0; idx < sampleData.size(); idx++) {
            R row = rows.get(idx);
            for (Field column : fields) {
                String columnName  = column.getName();
                if (column.getType() == String.class) {
                    assertEquals(column.get(sampleData.get(idx)), zFrameSortedDesc.getAsString(row, columnName),
                            "value in ZFrame and sample input is not same");
                } else if (column.getType() == Integer.class) {
                    assertEquals(column.get(sampleData.get(idx)), zFrameSortedDesc.getAsInt(row, columnName),
                            "value in ZFrame and sample input is not same");
                } else if (column.getType() == Double.class) {
                    assertEquals(column.get(sampleData.get(idx)), zFrameSortedDesc.getAsDouble(row, columnName),
                            "value in ZFrame and sample input is not same");
                } else if (column.getType() == Long.class) {
                    assertEquals(column.get(sampleData.get(idx)), zFrameSortedDesc.getAsLong(row, columnName),
                            "value in ZFrame and sample input is not same");
                } else {
                    throw new Exception("Not a valid data type");
                }
            }
        }

AND

for (Person schema : sampleDataWithDistinctSurnameAndPostCode) {
            for (R row : rows) {
                boolean rowMatched = true;
                for (Field column : fields) {
                    String columnName = column.getName();
                    if (!column.get(schema).toString().
                            equals(zFrame.getAsString(row, columnName))) {
                        rowMatched = false;
                        break;
                    }
                }
                if (rowMatched) {
                    matchedCount++;
                    break;
                }
            }
        }

        assertEquals(rows.size(), matchedCount,
                "rows count is not as expected");
        assertEquals(sampleDataWithDistinctSurnameAndPostCode.size(), matchedCount,
                "rows count is not as expected");