datajoint / datajoint-python

Relational data pipelines for the science lab
https://datajoint.com/docs
GNU Lesser General Public License v2.1
169 stars 84 forks source link

Filepath: Restricted delete fails #687

Open guzman-raphael opened 5 years ago

guzman-raphael commented 5 years ago

(Position & 'pos_id=0').delete()

yields:

AssertionError                            Traceback (most recent call last)
<ipython-input-26-867a7b0fa805> in <module>
      1 #Only deletes record not S3 object
----> 2 (Position & 'pos_id=0').delete()

~/.local/lib/python3.7/site-packages/datajoint/table.py in delete(self, verbose)
    407                         delete_list[r[0]].proj(**{a: b for a, b in r[1]['attr_map'].items()})
    408                         if isinstance(r, _rename_map) else r)
--> 409                     for r in restrictions[name]])
    410         if safe:
    411             print('About to delete:')

~/.local/lib/python3.7/site-packages/datajoint/expression.py in restrict(self, restriction)
    349         string, or an AndList.
    350         """
--> 351         assert is_true(restriction) or not self.heading.expressions or isinstance(self, GroupBy), \
    352             "Cannot restrict a projection with renamed attributes in place."
    353         self.restriction.append(restriction)

~/.local/lib/python3.7/site-packages/datajoint/table.py in heading(self)
     53             else:
     54                 self._heading.init_from_database(
---> 55                     self.connection, self.database, self.table_name, self.declaration_context)
     56         return self._heading
     57 

~/.local/lib/python3.7/site-packages/datajoint/heading.py in init_from_database(self, conn, database, table_name, context)
    228             # process adapted attribute types
    229             if special and TYPE_PATTERN['ADAPTED'].match(attr['type']):
--> 230                 assert context is not None, 'Declaration context is not set'
    231                 adapter_name = special['type']
    232                 try:

AssertionError: Declaration context is not set
dimitri-yatsenko commented 5 years ago

need more details to reproduce the error. How was Position obtained?

guzman-raphael commented 3 years ago

@dimitri-yatsenko Good point, let me take some time to reproduce this properly and will post the steps. Will also cross-reference against seemingly related issue posted on DataJoint Slack here.

guzman-raphael commented 3 years ago

After further review, I have managed to properly reproduce the issue. It appears to be associated with using filepath in conjunction with adapted_type. Below is a simplified example that illustrates this. Also, this issue does appear to be related as previously thought to the Slack link referenced above. I have not yet pinpointed the root cause yet but at least the issue is now fully specified.

Source:

from minio import Minio
from os import makedirs
import datajoint as dj
from datajoint import errors
from json import loads, dumps
from pathlib import Path

# Instantiate bucket on local Minio
client = Minio(
    'fakeservices.datajoint.io',
    access_key='datajoint',
    secret_key='datajoint',
    region="my-region",
)
client.make_bucket('datajoint.registry')

# Create local remote mirror
makedirs("/home/dja/contacts")

# setup DataJoint
errors._switch_adapted_types(True)
errors._switch_filepath_types(True)
print(dj.__version__)
dj.config['stores'] = {'remote': dict(
        endpoint='fakeservices.datajoint.io',
        access_key='datajoint',
        secret_key='datajoint',
        bucket='datajoint.registry',
        stage='/home/dja/contacts',
        protocol='s3',
        location='contacts',
    )}

# Define schema
schema = dj.schema('registry')

class Json(dj.AttributeAdapter):
    attribute_type = 'filepath@remote'

    @staticmethod
    def put(obj):
        path = str(Path(dj.config['stores']['remote']['stage'],
                        f"{obj['last_name']}, {obj['first_name']}.json"))
        with open(path, 'w') as out:
            out.write(dumps({k: v for k, v in obj.items()
                             if 'name' not in k}))
        return path

    @staticmethod
    def get(path):
        last_name, first_name = Path(path).name[:-5].split(', ')
        return dict(loads(open(path, 'r').read()),
                    first_name=first_name,
                    last_name=last_name)
json = Json()

@schema
class CompanyResource(dj.Manual):
    definition = """
    employee_id     : serial
    ---
    position        : varchar(30)
    focus           : enum('tech', 'mgmt')
    contact_details : <json>
    """

# Load data and verify
CompanyResource.insert([dict(position='Software Engineer',
                             focus='tech',
                             contact_details=dict(first_name='Raphael',
                                                  last_name='Guzman',
                                                  address='123 Fake St.',
                                                  phone='123-456-7890')),
                        dict(position='CEO',
                             focus='mgmt',
                             contact_details=dict(first_name='John',
                                                  last_name='Doe',
                                                  address='456 Mystery Ave.',
                                                  phone='098-765-4321'))])
print(CompanyResource())

# Attempt to delete a record
(CompanyResource & 'employee_id=1').delete()

Result:

0.12.8
Connecting root@fakeservices.datajoint.io:3306
*employee_id   position       focus     contact_de
+------------+ +------------+ +-------+ +--------+
1              Software Engin tech      =BLOB=    
2              CEO            mgmt      =BLOB=    
 (Total: 2)
---------------------------------------------------------------------------
AssertionError                            Traceback (most recent call last)
<ipython-input-1-7bc680f46f69> in <module>
     81 
     82 # Attempt to delete a record
---> 83 (CompanyResource & 'employee_id=1').delete()

~/.local/lib/python3.8/site-packages/datajoint/table.py in delete(self, verbose)
    441         for name, table in delete_list.items():
    442             if not name.isdigit() and restrictions[name]:  # do not restrict by an empty list
--> 443                 table.restrict([
    444                     r.proj() if isinstance(r, FreeTable) else (
    445                         delete_list[r[0]].proj(**{a: b for a, b in r[1]['attr_map'].items()})

~/.local/lib/python3.8/site-packages/datajoint/expression.py in restrict(self, restriction)
    349         string, or an AndList.
    350         """
--> 351         assert is_true(restriction) or not self.heading.expressions or isinstance(self, GroupBy), \
    352             "Cannot restrict a projection with renamed attributes in place."
    353         self.restriction.append(restriction)

~/.local/lib/python3.8/site-packages/datajoint/table.py in heading(self)
     46             self._heading = Heading()  # instance-level heading
     47         if not self._heading and self.connection is not None:  # lazy loading of heading
---> 48             self._heading.init_from_database(
     49                 self.connection, self.database, self.table_name, self.declaration_context)
     50         return self._heading

~/.local/lib/python3.8/site-packages/datajoint/heading.py in init_from_database(self, conn, database, table_name, context)
    228             # process adapted attribute types
    229             if special and TYPE_PATTERN['ADAPTED'].match(attr['type']):
--> 230                 assert context is not None, 'Declaration context is not set'
    231                 adapter_name = special['type']
    232                 try:

AssertionError: Declaration context is not set
chrisroat commented 3 years ago

I have encountered this error using adapted types, as recommended in #947 . I have a piece of code that examines a table for how much work is left (so it can spin up dask workers, if needed). It's bailing during evaluation of key_source.

The code is a bit complex, but I may be able to devise a simple example.

starmap/pipeline/execute.py:64: in populate
    work = table()._jobs_to_do(restrict) - table() - reserved
../../.local/share/virtualenvs/starmap-T47byR32/lib/python3.8/site-packages/datajoint/autopopulate.py:80: in _jobs_to_do
    todo = self.key_source
starmap/pipeline/pipeline.py:63: in key_source
    return super().key_source & (Pipeline.Processing * Pipeline.Specification)
../../.local/share/virtualenvs/starmap-T47byR32/lib/python3.8/site-packages/datajoint/autopopulate.py:46: in key_source
    self._key_source *= _rename_attributes(*q)
../../.local/share/virtualenvs/starmap-T47byR32/lib/python3.8/site-packages/datajoint/expression.py:235: in __mul__
    return self.join(other)
../../.local/share/virtualenvs/starmap-T47byR32/lib/python3.8/site-packages/datajoint/expression.py:261: in join
    assert_join_compatibility(self, other)
../../.local/share/virtualenvs/starmap-T47byR32/lib/python3.8/site-packages/datajoint/condition.py:68: in assert_join_compatibility
    expr2.heading.secondary_attributes)))
../../.local/share/virtualenvs/starmap-T47byR32/lib/python3.8/site-packages/datajoint/heading.py:102: in secondary_attributes
    return [k for k, v in self.attributes.items() if not v.in_key]
../../.local/share/virtualenvs/starmap-T47byR32/lib/python3.8/site-packages/datajoint/heading.py:89: in attributes
    self._init_from_database()   # lazy loading from database
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = <[AssertionError('Declaration context is not set') raised in repr()] Heading object at 0x7f597e530340>

    def _init_from_database(self):
        """ initialize heading from an existing database table. """
        conn, database, table_name, context = (
            self.table_info[k] for k in ('conn', 'database', 'table_name', 'context'))
        info = conn.query('SHOW TABLE STATUS FROM `{database}` WHERE name="{table_name}"'.format(
            table_name=table_name, database=database), as_dict=True).fetchone()
        if info is None:
            if table_name == '~log':
                logger.warning('Could not create the ~log table')
                return
            raise DataJointError('The table `{database}`.`{table_name}` is not defined.'.format(
                table_name=table_name, database=database))
        self._table_status = {k.lower(): v for k, v in info.items()}
        cur = conn.query(
            'SHOW FULL COLUMNS FROM `{table_name}` IN `{database}`'.format(
                table_name=table_name, database=database), as_dict=True)

        attributes = cur.fetchall()

        rename_map = {
            'Field': 'name',
            'Type': 'type',
            'Null': 'nullable',
            'Default': 'default',
            'Key': 'in_key',
            'Comment': 'comment'}

        fields_to_drop = ('Privileges', 'Collation')

        # rename and drop attributes
        attributes = [{rename_map[k] if k in rename_map else k: v
                       for k, v in x.items() if k not in fields_to_drop}
                      for x in attributes]
        numeric_types = {
            ('float', False): np.float64,
            ('float', True): np.float64,
            ('double', False): np.float64,
            ('double', True): np.float64,
            ('tinyint', False): np.int64,
            ('tinyint', True): np.int64,
            ('smallint', False): np.int64,
            ('smallint', True): np.int64,
            ('mediumint', False): np.int64,
            ('mediumint', True): np.int64,
            ('int', False): np.int64,
            ('int', True): np.int64,
            ('bigint', False): np.int64,
            ('bigint', True): np.uint64}

        sql_literals = ['CURRENT_TIMESTAMP']

        # additional attribute properties
        for attr in attributes:

            attr.update(
                in_key=(attr['in_key'] == 'PRI'),
                database=database,
                nullable=attr['nullable'] == 'YES',
                autoincrement=bool(re.search(r'auto_increment', attr['Extra'], flags=re.I)),
                numeric=any(TYPE_PATTERN[t].match(attr['type']) for t in ('DECIMAL', 'INTEGER', 'FLOAT')),
                string=any(TYPE_PATTERN[t].match(attr['type']) for t in ('ENUM', 'TEMPORAL', 'STRING')),
                is_blob=bool(TYPE_PATTERN['INTERNAL_BLOB'].match(attr['type'])),
                uuid=False, is_attachment=False, is_filepath=False, adapter=None,
                store=None, is_external=False, attribute_expression=None)

            if any(TYPE_PATTERN[t].match(attr['type']) for t in ('INTEGER', 'FLOAT')):
                attr['type'] = re.sub(r'\(\d+\)', '', attr['type'], count=1)  # strip size off integers and floats
            attr['unsupported'] = not any((attr['is_blob'], attr['numeric'], attr['numeric']))
            attr.pop('Extra')

            # process custom DataJoint types
            special = re.match(r':(?P<type>[^:]+):(?P<comment>.*)', attr['comment'])
            if special:
                special = special.groupdict()
                attr.update(special)
            # process adapted attribute types
            if special and TYPE_PATTERN['ADAPTED'].match(attr['type']):
>               assert context is not None, 'Declaration context is not set'
E               AssertionError: Declaration context is not set

../../.local/share/virtualenvs/starmap-T47byR32/lib/python3.8/site-packages/datajoint/heading.py:239: AssertionError