linkml / schemasheets

Structure your data in a FAIR way using google sheets or TSVs. These are then converted to LinkML, and from there other formats
https://linkml.io/schemasheets/
41 stars 5 forks source link

Three tests failing on Mark's laptop but in GH actions or several other people's computers #64

Closed turbomam closed 1 year ago

turbomam commented 1 year ago
FAILED                               [ 30%]
test_schema_exporter.py:174 (test_types)
self = SchemaMaker(schema=SchemaDefinition(name='TEMP', id_prefixes=[], definition_uri=None, local_names={}, conforms_to=None...), element_map=None, metamodel=None, cardinality_vocabulary=None, default_name=None, unique_slots=None, gsheet_id=None)
file_name = '/Users/MAM/Documents/gitrepos/schemasheets/tests/output/mini.tsv'
delimiter = '\t'

    def merge_sheet(self, file_name: str, delimiter='\t') -> None:
        """
        Merge information from the given schema sheet into the current schema

        :param file_name: schema sheet
        :param delimiter: default is tab
        :return:
        """
        logging.info(f'READING {file_name} D={delimiter}')
        #with self.ensure_file(file_name) as tsv_file:
        #    reader = csv.DictReader(tsv_file, delimiter=delimiter)
        with self.ensure_csvreader(file_name, delimiter=delimiter) as reader:
            schemasheet = SchemaSheet.from_dictreader(reader)
            line_num = schemasheet.start_line_number
            # TODO: check why this doesn't work
            #while rows and all(x for x in rows[-1] if not x):
            #    print(f'TRIMMING: {rows[-1]}')
            #    rows.pop()
            logging.info(f'ROWS={len(schemasheet.rows)}')
            for row in schemasheet.rows:
                try:
>                   self.add_row(row, schemasheet.table_config)

../schemasheets/schemamaker.py:105: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = SchemaMaker(schema=SchemaDefinition(name='TEMP', id_prefixes=[], definition_uri=None, local_names={}, conforms_to=None...), element_map=None, metamodel=None, cardinality_vocabulary=None, default_name=None, unique_slots=None, gsheet_id=None)
row = {'Desc': 'my string', 'Extends': 'string', 'Type': '', 'base': '', ...}
table_config = TableConfig(name=None, columns={'Type': ColumnConfig(name='Type', maps_to='type', settings=ColumnSettings(curie_prefix...], all_of=[]), is_element_type=None)}, column_by_element_type={'type': 'Type'}, metatype_column=None, name_column=None)

    def add_row(self, row: Dict[str, Any], table_config: TableConfig):
>       for element in self.row_focal_element(row, table_config):

../schemasheets/schemamaker.py:111: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = SchemaMaker(schema=SchemaDefinition(name='TEMP', id_prefixes=[], definition_uri=None, local_names={}, conforms_to=None...), element_map=None, metamodel=None, cardinality_vocabulary=None, default_name=None, unique_slots=None, gsheet_id=None)
row = {'Desc': 'my string', 'Extends': 'string', 'Type': '', 'base': '', ...}
table_config = TableConfig(name=None, columns={'Type': ColumnConfig(name='Type', maps_to='type', settings=ColumnSettings(curie_prefix...], all_of=[]), is_element_type=None)}, column_by_element_type={'type': 'Type'}, metatype_column=None, name_column=None)
column = None

    def row_focal_element(self, row: Dict[str, Any], table_config: TableConfig,
                          column: COL_NAME = None) -> Generator[None, Element, None]:
        """
        Each row must have a single focal element, i.e the row is about a class, a slot, an enum, ...

        :param row:
        :param table_config:
        :return:
        """
        vmap = {}
        main_elt = None
        if table_config.metatype_column:
            tc = table_config.metatype_column
            if tc in row:
                typ = self.normalize_value(row[tc], table_config.columns[tc])
                if not table_config.name_column:
                    raise ValueError(f'name column must be set when type column ({tc}) is set; row={row}')
                name_val = row[table_config.name_column]
                if not name_val:
                    raise ValueError(f'name column must be set when type column ({tc}) is set')
                if typ == 'class':
                    vmap[T_CLASS] = [self.get_current_element(ClassDefinition(name_val))]
                elif typ == 'slot':
                    vmap[T_SLOT] = [self.get_current_element(SlotDefinition(name_val))]
                else:
                    raise ValueError(f'Unknown metatype: {typ}')
        if table_config.column_by_element_type is None:
            raise ValueError(f'No table_config.column_by_element_type')
        for k, elt_cls in tmap.items():
            if k in table_config.column_by_element_type:
                col = table_config.column_by_element_type[k]
                if col in row:
                    v = self.normalize_value(row[col])
                    if v:
                        if '|' in v:
                            vs = v.split('|')
                        else:
                            vs = [v]
                        if elt_cls == Prefix:
                            if len(vs) != 1:
                                raise ValueError(f'Cardinality of prefix col must be 1; got: {vs}')
                            pfx = Prefix(vs[0], 'TODO')
                            self.schema.prefixes[pfx.prefix_prefix] = pfx
                            vmap[k] = [pfx]
                        elif elt_cls == SchemaDefinition:
                            if len(vs) != 1:
                                raise ValueError(f'Cardinality of schema col must be 1; got: {vs}')
                            self.schema.name = vs[0]
                            vmap[k] = [self.schema]
                        else:
                            vmap[k] = [self.get_current_element(elt_cls(v)) for v in vs]
        def check_excess(descriptors):
            diff = set(vmap.keys()) - set(descriptors + [T_SCHEMA])
            if len(diff) > 0:
                raise ValueError(f'Excess slots: {diff}')
        if column:
            cc = table_config.columns[column]
            if cc.settings.applies_to_class:
                if T_CLASS in vmap and vmap[T_CLASS]:
                    raise ValueError(f'Cannot use applies_to_class in class-focused row')
                else:
                    cls = self.get_current_element(ClassDefinition(cc.settings.applies_to_class))
                    vmap[T_CLASS] = [cls]
        if T_SLOT in vmap:
            check_excess([T_SLOT, T_CLASS])
            if len(vmap[T_SLOT]) != 1:
                raise ValueError(f'Cardinality of slot field must be 1; got {vmap[T_SLOT]}')
            main_elt = vmap[T_SLOT][0]
            if T_CLASS in vmap:
                # TODO: attributes
                c: ClassDefinition
                for c in vmap[T_CLASS]:
                    #c: ClassDefinition = vmap[T_CLASS]
                    if main_elt.name not in c.slots:
                        c.slots.append(main_elt.name)
                    if self.unique_slots:
                        yield main_elt
                    else:
                        c.slot_usage[main_elt.name] = SlotDefinition(main_elt.name)
                        main_elt = c.slot_usage[main_elt.name]
                        yield main_elt
            else:
                yield main_elt
        elif T_CLASS in vmap:
            check_excess([T_CLASS])
            for main_elt in vmap[T_CLASS]:
                yield main_elt
        elif T_ENUM in vmap:
            check_excess([T_ENUM, T_PV])
            if len(vmap[T_ENUM]) != 1:
                raise ValueError(f'Cardinality of enum field must be 1; got {vmap[T_ENUM]}')
            this_enum: EnumDefinition = vmap[T_ENUM][0]
            if T_PV in vmap:
                for pv in vmap[T_PV]:
                    #pv = PermissibleValue(text=v)
                    this_enum.permissible_values[pv.text] = pv
                    yield pv
            else:
                yield this_enum
        elif T_PREFIX in vmap:
            for main_elt in vmap[T_PREFIX]:
                yield main_elt
        elif T_TYPE in vmap:
            for main_elt in vmap[T_TYPE]:
                yield main_elt
        elif T_SUBSET in vmap:
            for main_elt in vmap[T_SUBSET]:
                yield main_elt
        elif T_SCHEMA in vmap:
            for main_elt in vmap[T_SCHEMA]:
                yield main_elt
        else:
>           raise ValueError(f'Could not find a focal element for {row}')
E           ValueError: Could not find a focal element for {'Type': '', 'base': '', 'uri': '', 'Desc': 'my string', 'Extends': 'string'}

../schemasheets/schemamaker.py:318: ValueError

The above exception was the direct cause of the following exception:

    def test_types():
        """
        tests a specification that is dedicated to types
        """
        sb = SchemaBuilder()
        schema = sb.schema
        # TODO: add this functionality to SchemaBuilder
        t = TypeDefinition('MyString', description='my string', typeof='string')
        schema.types[t.name] = t
>       _roundtrip(schema, TYPES_SPEC)

test_schema_exporter.py:184: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
test_schema_exporter.py:94: in _roundtrip
    schema2 = sm.create_schema(MINISHEET)
../schemasheets/schemamaker.py:61: in create_schema
    self.merge_sheet(f, **kwargs)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = SchemaMaker(schema=SchemaDefinition(name='TEMP', id_prefixes=[], definition_uri=None, local_names={}, conforms_to=None...), element_map=None, metamodel=None, cardinality_vocabulary=None, default_name=None, unique_slots=None, gsheet_id=None)
file_name = '/Users/MAM/Documents/gitrepos/schemasheets/tests/output/mini.tsv'
delimiter = '\t'

    def merge_sheet(self, file_name: str, delimiter='\t') -> None:
        """
        Merge information from the given schema sheet into the current schema

        :param file_name: schema sheet
        :param delimiter: default is tab
        :return:
        """
        logging.info(f'READING {file_name} D={delimiter}')
        #with self.ensure_file(file_name) as tsv_file:
        #    reader = csv.DictReader(tsv_file, delimiter=delimiter)
        with self.ensure_csvreader(file_name, delimiter=delimiter) as reader:
            schemasheet = SchemaSheet.from_dictreader(reader)
            line_num = schemasheet.start_line_number
            # TODO: check why this doesn't work
            #while rows and all(x for x in rows[-1] if not x):
            #    print(f'TRIMMING: {rows[-1]}')
            #    rows.pop()
            logging.info(f'ROWS={len(schemasheet.rows)}')
            for row in schemasheet.rows:
                try:
                    self.add_row(row, schemasheet.table_config)
                    line_num += 1
                except ValueError as e:
>                   raise SchemaSheetRowException(f'Error in line {line_num}, row={row}') from e
E                   schemasheets.schemamaker.SchemaSheetRowException: Error in line 2, row={'Type': '', 'base': '', 'uri': '', 'Desc': 'my string', 'Extends': 'string'}

../schemasheets/schemamaker.py:108: SchemaSheetRowException
turbomam commented 1 year ago

I was also getting the same errors on my Ubuntu machine.

I deleted the repo, deleted the PyCharm interpreter, re-cloned, re-poetry-installed, created a new PyCharm interpreter, and then all tests passed.

Trying on LBL MacBook now.

turbomam commented 1 year ago

Fresh start described above worked on LBL MacBook