googleapis / python-datastore

Apache License 2.0
78 stars 44 forks source link

Bug querying with offset #547

Open jacobg opened 4 days ago

jacobg commented 4 days ago

Sorry I don't have a good repro for you because I'm only repro-ing it on my data, but here is a code that seems to clearly demonstrate a bug:

import time

from google.cloud import datastore

def _create_query(ds_client: datastore.Client) -> datastore.Query:
    query = ds_client.query(kind='my-kind')
    query.add_filter(filter=datastore.query.PropertyFilter('active', '=', True))
    query.add_filter(filter=datastore.query.PropertyFilter('account', '=', 111))
    query.add_filter(filter=datastore.query.PropertyFilter('roles', '=', 'w'))
    query.add_filter(filter=datastore.query.PropertyFilter('labels', '=', 222))
    return query

def run():
    ds_client = datastore.Client(project='my-project')

    start = time.time()
    count = 0
    for entity in _create_query(ds_client).fetch():
        count += 1
    end = time.time()
    print(f'Fetched all {count} entities in {end - start} seconds')
    real_count = count

    for offset in range(0, real_count + 2):
        start = time.time()
        count = 0
        for entity in _create_query(ds_client).fetch(offset=offset):
            count += 1
        end = time.time()
        print(f'Fetched at offset={offset} {count} entities in {end - start} seconds')

if __name__ == '__main__':
    run()

In addition, I added two print statements in google/cloud/datastore/query.py's _next_page while loop:

        while (
            response_pb.batch.more_results == _NOT_FINISHED
            and response_pb.batch.skipped_results < query_pb.offset
        ):
            # We haven't finished processing. A likely reason is we haven't
            # skipped all of the results yet. Don't return any results.
            # Instead, rerun query, adjusting offsets. Datastore doesn't process
            # more than 1000 skipped results in a query.
            old_query_pb = query_pb
            query_pb = query_pb2.Query()
            query_pb._pb.CopyFrom(old_query_pb._pb)  # copy for testability
            query_pb.start_cursor = response_pb.batch.skipped_cursor
            query_pb.offset -= response_pb.batch.skipped_results
            print(f'response_pb.batch.skipped_cursor = {response_pb.batch.skipped_cursor}')
            print(f'response_pb.batch.skipped_results = {response_pb.batch.skipped_results}')

            request = {
                "project_id": self._query.project,
                "partition_id": partition_id,
                "read_options": read_options,
                "query": query_pb,
            }
            helpers.set_database_id_to_request(request, self.client.database)

            response_pb = self.client._datastore_api.run_query(
                request=request,
                **kwargs,
            )

This is the output running the script:

Fetched all 16 entities in 1.7923529148101807 seconds
Fetched at offset=0 16 entities in 1.6426568031311035 seconds
Fetched at offset=1 15 entities in 1.7998042106628418 seconds
Fetched at offset=2 14 entities in 1.525015115737915 seconds
Fetched at offset=3 13 entities in 1.4684479236602783 seconds
response_pb.batch.skipped_cursor = b'\n7\x121j\x0cs~my-projectr!\x0b\x12\x14my_kind\x18\xdb\x81\x83\x8c\xb5\x91\xe4\x03\x0c\x18\x00 \x00'
response_pb.batch.skipped_results = 3
Fetched at offset=4 12 entities in 1.6105940341949463 seconds
response_pb.batch.skipped_cursor = b'\n7\x121j\x0cs~my-projectr!\x0b\x12\x14my_kind\x18\xdb\x81\x83\x8c\xb5\x91\xe4\x03\x0c\x18\x00 \x00'
response_pb.batch.skipped_results = 3
Fetched at offset=5 11 entities in 1.4919788837432861 seconds
response_pb.batch.skipped_cursor = b'\n7\x121j\x0cs~my-projectr!\x0b\x12\x14my_kind\x18\xdb\x81\x83\x8c\xb5\x91\xe4\x03\x0c\x18\x00 \x00'
response_pb.batch.skipped_results = 3
Fetched at offset=6 10 entities in 1.6434409618377686 seconds
response_pb.batch.skipped_cursor = b'\n7\x121j\x0cs~my-projectr!\x0b\x12\x14my_kind\x18\xdb\x81\x83\x8c\xb5\x91\xe4\x03\x0c\x18\x00 \x00'
response_pb.batch.skipped_results = 3
response_pb.batch.skipped_cursor = b'\n7\x121j\x0cs~my-projectr!\x0b\x12\x14my_kind\x18\xcd\x9e\x8d\xc1\xf7\xb4\xcf\x06\x0c\x18\x00 \x00'
response_pb.batch.skipped_results = 3
Fetched at offset=7 9 entities in 1.6705551147460938 seconds
response_pb.batch.skipped_cursor = b'\n7\x121j\x0cs~my-projectr!\x0b\x12\x14my_kind\x18\xdb\x81\x83\x8c\xb5\x91\xe4\x03\x0c\x18\x00 \x00'
response_pb.batch.skipped_results = 3
response_pb.batch.skipped_cursor = b'\n7\x121j\x0cs~my-projectr!\x0b\x12\x14my_kind\x18\xcd\x9e\x8d\xc1\xf7\xb4\xcf\x06\x0c\x18\x00 \x00'
response_pb.batch.skipped_results = 3
Fetched at offset=8 8 entities in 1.490036964416504 seconds
response_pb.batch.skipped_cursor = b'\n7\x121j\x0cs~my-projectr!\x0b\x12\x14my_kind\x18\xdb\x81\x83\x8c\xb5\x91\xe4\x03\x0c\x18\x00 \x00'
response_pb.batch.skipped_results = 3
response_pb.batch.skipped_cursor = b'\n7\x121j\x0cs~my-projectr!\x0b\x12\x14my_kind\x18\xcd\x9e\x8d\xc1\xf7\xb4\xcf\x06\x0c\x18\x00 \x00'
response_pb.batch.skipped_results = 3
Fetched at offset=9 7 entities in 1.263127088546753 seconds
response_pb.batch.skipped_cursor = b'\n7\x121j\x0cs~my-projectr!\x0b\x12\x14my_kind\x18\xdb\x81\x83\x8c\xb5\x91\xe4\x03\x0c\x18\x00 \x00'
response_pb.batch.skipped_results = 3
response_pb.batch.skipped_cursor = b'\n7\x121j\x0cs~my-projectr!\x0b\x12\x14my_kind\x18\xcd\x9e\x8d\xc1\xf7\xb4\xcf\x06\x0c\x18\x00 \x00'
response_pb.batch.skipped_results = 3
response_pb.batch.skipped_cursor = b'\n7\x121j\x0cs~my-projectr!\x0b\x12\x14my_kind\x18\x80\x80\xf0\xde\xf1\xc5\x9c\x08\x0c\x18\x00 \x00'
response_pb.batch.skipped_results = 3
Fetched at offset=10 6 entities in 1.1788661479949951 seconds
response_pb.batch.skipped_cursor = b'\n7\x121j\x0cs~my-projectr!\x0b\x12\x14my_kind\x18\xdb\x81\x83\x8c\xb5\x91\xe4\x03\x0c\x18\x00 \x00'
response_pb.batch.skipped_results = 3
response_pb.batch.skipped_cursor = b'\n7\x121j\x0cs~my-projectr!\x0b\x12\x14my_kind\x18\xcd\x9e\x8d\xc1\xf7\xb4\xcf\x06\x0c\x18\x00 \x00'
response_pb.batch.skipped_results = 3
response_pb.batch.skipped_cursor = b'\n7\x121j\x0cs~my-projectr!\x0b\x12\x14my_kind\x18\x80\x80\xf0\xde\xf1\xc5\x9c\x08\x0c\x18\x00 \x00'
response_pb.batch.skipped_results = 3
response_pb.batch.skipped_cursor = b'\n7\x121j\x0cs~my-projectr!\x0b\x12\x14my_kind\x18\xda\xc7\xf2\xf9\xab\xbe\xab\x08\x0c\x18\x00 \x00'
response_pb.batch.skipped_results = 1
Fetched at offset=11 5 entities in 1.3047678470611572 seconds
response_pb.batch.skipped_cursor = b'\n7\x121j\x0cs~my-projectr!\x0b\x12\x14my_kind\x18\xdb\x81\x83\x8c\xb5\x91\xe4\x03\x0c\x18\x00 \x00'
response_pb.batch.skipped_results = 3
response_pb.batch.skipped_cursor = b'\n7\x121j\x0cs~my-projectr!\x0b\x12\x14my_kind\x18\xcd\x9e\x8d\xc1\xf7\xb4\xcf\x06\x0c\x18\x00 \x00'
response_pb.batch.skipped_results = 3
response_pb.batch.skipped_cursor = b'\n7\x121j\x0cs~my-projectr!\x0b\x12\x14my_kind\x18\x80\x80\xf0\xde\xf1\xc5\x9c\x08\x0c\x18\x00 \x00'
response_pb.batch.skipped_results = 3
response_pb.batch.skipped_cursor = b'\n7\x121j\x0cs~my-projectr!\x0b\x12\x14my_kind\x18\xda\xc7\xf2\xf9\xab\xbe\xab\x08\x0c\x18\x00 \x00'
response_pb.batch.skipped_results = 1
response_pb.batch.skipped_cursor = b'\n7\x121j\x0cs~my-projectr!\x0b\x12\x14my_kind\x18\x80\x80\xf0\xde\xf1\xc5\xdc\x08\x0c\x18\x00 \x00'
response_pb.batch.skipped_results = 1
Fetched at offset=12 4 entities in 1.4790778160095215 seconds
response_pb.batch.skipped_cursor = b'\n7\x121j\x0cs~my-projectr!\x0b\x12\x14my_kind\x18\xdb\x81\x83\x8c\xb5\x91\xe4\x03\x0c\x18\x00 \x00'
response_pb.batch.skipped_results = 3
response_pb.batch.skipped_cursor = b'\n7\x121j\x0cs~my-projectr!\x0b\x12\x14my_kind\x18\xcd\x9e\x8d\xc1\xf7\xb4\xcf\x06\x0c\x18\x00 \x00'
response_pb.batch.skipped_results = 3
response_pb.batch.skipped_cursor = b'\n7\x121j\x0cs~my-projectr!\x0b\x12\x14my_kind\x18\x80\x80\xf0\xde\xf1\xc5\x9c\x08\x0c\x18\x00 \x00'
response_pb.batch.skipped_results = 3
response_pb.batch.skipped_cursor = b'\n7\x121j\x0cs~my-projectr!\x0b\x12\x14my_kind\x18\xda\xc7\xf2\xf9\xab\xbe\xab\x08\x0c\x18\x00 \x00'
response_pb.batch.skipped_results = 1
response_pb.batch.skipped_cursor = b'\n7\x121j\x0cs~my-projectr!\x0b\x12\x14my_kind\x18\x80\x80\xf0\xde\xf1\xc5\xdc\x08\x0c\x18\x00 \x00'
response_pb.batch.skipped_results = 1
response_pb.batch.skipped_cursor = b'\n7\x121j\x0cs~my-projectr!\x0b\x12\x14my_kind\x18\xb4\xa6\x81\xd3\x84\xc7\x88\t\x0c\x18\x00 \x00'
response_pb.batch.skipped_results = 1
response_pb.batch.skipped_cursor = b''
response_pb.batch.skipped_results = 0
Fetched at offset=13 15 entities in 2.0598793029785156 seconds
response_pb.batch.skipped_cursor = b'\n7\x121j\x0cs~my-projectr!\x0b\x12\x14my_kind\x18\xdb\x81\x83\x8c\xb5\x91\xe4\x03\x0c\x18\x00 \x00'
response_pb.batch.skipped_results = 3
response_pb.batch.skipped_cursor = b'\n7\x121j\x0cs~my-projectr!\x0b\x12\x14my_kind\x18\xcd\x9e\x8d\xc1\xf7\xb4\xcf\x06\x0c\x18\x00 \x00'
response_pb.batch.skipped_results = 3
response_pb.batch.skipped_cursor = b'\n7\x121j\x0cs~my-projectr!\x0b\x12\x14my_kind\x18\x80\x80\xf0\xde\xf1\xc5\x9c\x08\x0c\x18\x00 \x00'
response_pb.batch.skipped_results = 3
response_pb.batch.skipped_cursor = b'\n7\x121j\x0cs~my-projectr!\x0b\x12\x14my_kind\x18\xda\xc7\xf2\xf9\xab\xbe\xab\x08\x0c\x18\x00 \x00'
response_pb.batch.skipped_results = 1
response_pb.batch.skipped_cursor = b'\n7\x121j\x0cs~my-projectr!\x0b\x12\x14my_kind\x18\x80\x80\xf0\xde\xf1\xc5\xdc\x08\x0c\x18\x00 \x00'
response_pb.batch.skipped_results = 1
response_pb.batch.skipped_cursor = b'\n7\x121j\x0cs~my-projectr!\x0b\x12\x14my_kind\x18\xb4\xa6\x81\xd3\x84\xc7\x88\t\x0c\x18\x00 \x00'
response_pb.batch.skipped_results = 1
response_pb.batch.skipped_cursor = b''
response_pb.batch.skipped_results = 0
Fetched at offset=14 14 entities in 2.045603036880493 seconds
response_pb.batch.skipped_cursor = b'\n7\x121j\x0cs~my-projectr!\x0b\x12\x14my_kind\x18\xdb\x81\x83\x8c\xb5\x91\xe4\x03\x0c\x18\x00 \x00'
response_pb.batch.skipped_results = 3
response_pb.batch.skipped_cursor = b'\n7\x121j\x0cs~my-projectr!\x0b\x12\x14my_kind\x18\xcd\x9e\x8d\xc1\xf7\xb4\xcf\x06\x0c\x18\x00 \x00'
response_pb.batch.skipped_results = 3
response_pb.batch.skipped_cursor = b'\n7\x121j\x0cs~my-projectr!\x0b\x12\x14my_kind\x18\x80\x80\xf0\xde\xf1\xc5\x9c\x08\x0c\x18\x00 \x00'
response_pb.batch.skipped_results = 3
response_pb.batch.skipped_cursor = b'\n7\x121j\x0cs~my-projectr!\x0b\x12\x14my_kind\x18\xda\xc7\xf2\xf9\xab\xbe\xab\x08\x0c\x18\x00 \x00'
response_pb.batch.skipped_results = 1
response_pb.batch.skipped_cursor = b'\n7\x121j\x0cs~my-projectr!\x0b\x12\x14my_kind\x18\x80\x80\xf0\xde\xf1\xc5\xdc\x08\x0c\x18\x00 \x00'
response_pb.batch.skipped_results = 1
response_pb.batch.skipped_cursor = b'\n7\x121j\x0cs~my-projectr!\x0b\x12\x14my_kind\x18\xb4\xa6\x81\xd3\x84\xc7\x88\t\x0c\x18\x00 \x00'
response_pb.batch.skipped_results = 1
response_pb.batch.skipped_cursor = b''
response_pb.batch.skipped_results = 0
Fetched at offset=15 13 entities in 2.3071401119232178 seconds
response_pb.batch.skipped_cursor = b'\n7\x121j\x0cs~my-projectr!\x0b\x12\x14my_kind\x18\xdb\x81\x83\x8c\xb5\x91\xe4\x03\x0c\x18\x00 \x00'
response_pb.batch.skipped_results = 3
response_pb.batch.skipped_cursor = b'\n7\x121j\x0cs~my-projectr!\x0b\x12\x14my_kind\x18\xcd\x9e\x8d\xc1\xf7\xb4\xcf\x06\x0c\x18\x00 \x00'
response_pb.batch.skipped_results = 3
response_pb.batch.skipped_cursor = b'\n7\x121j\x0cs~my-projectr!\x0b\x12\x14my_kind\x18\x80\x80\xf0\xde\xf1\xc5\x9c\x08\x0c\x18\x00 \x00'
response_pb.batch.skipped_results = 3
response_pb.batch.skipped_cursor = b'\n7\x121j\x0cs~my-projectr!\x0b\x12\x14my_kind\x18\xda\xc7\xf2\xf9\xab\xbe\xab\x08\x0c\x18\x00 \x00'
response_pb.batch.skipped_results = 1
response_pb.batch.skipped_cursor = b'\n7\x121j\x0cs~my-projectr!\x0b\x12\x14my_kind\x18\x80\x80\xf0\xde\xf1\xc5\xdc\x08\x0c\x18\x00 \x00'
response_pb.batch.skipped_results = 1
response_pb.batch.skipped_cursor = b'\n7\x121j\x0cs~my-projectr!\x0b\x12\x14my_kind\x18\xb4\xa6\x81\xd3\x84\xc7\x88\t\x0c\x18\x00 \x00'
response_pb.batch.skipped_results = 1
response_pb.batch.skipped_cursor = b''
response_pb.batch.skipped_results = 0
response_pb.batch.skipped_cursor = b'\n7\x121j\x0cs~my-projectr!\x0b\x12\x14my_kind\x18\xdb\x81\x83\x8c\xb5\x91\xe4\x03\x0c\x18\x00 \x00'
response_pb.batch.skipped_results = 3
Fetched at offset=16 12 entities in 2.1022279262542725 seconds
response_pb.batch.skipped_cursor = b'\n7\x121j\x0cs~my-projectr!\x0b\x12\x14my_kind\x18\xdb\x81\x83\x8c\xb5\x91\xe4\x03\x0c\x18\x00 \x00'
response_pb.batch.skipped_results = 3
response_pb.batch.skipped_cursor = b'\n7\x121j\x0cs~my-projectr!\x0b\x12\x14my_kind\x18\xcd\x9e\x8d\xc1\xf7\xb4\xcf\x06\x0c\x18\x00 \x00'
response_pb.batch.skipped_results = 3
response_pb.batch.skipped_cursor = b'\n7\x121j\x0cs~my-projectr!\x0b\x12\x14my_kind\x18\x80\x80\xf0\xde\xf1\xc5\x9c\x08\x0c\x18\x00 \x00'
response_pb.batch.skipped_results = 3
response_pb.batch.skipped_cursor = b'\n7\x121j\x0cs~my-projectr!\x0b\x12\x14my_kind\x18\xda\xc7\xf2\xf9\xab\xbe\xab\x08\x0c\x18\x00 \x00'
response_pb.batch.skipped_results = 1
response_pb.batch.skipped_cursor = b'\n7\x121j\x0cs~my-projectr!\x0b\x12\x14my_kind\x18\x80\x80\xf0\xde\xf1\xc5\xdc\x08\x0c\x18\x00 \x00'
response_pb.batch.skipped_results = 1
response_pb.batch.skipped_cursor = b'\n7\x121j\x0cs~my-projectr!\x0b\x12\x14my_kind\x18\xb4\xa6\x81\xd3\x84\xc7\x88\t\x0c\x18\x00 \x00'
response_pb.batch.skipped_results = 1
response_pb.batch.skipped_cursor = b''
response_pb.batch.skipped_results = 0
response_pb.batch.skipped_cursor = b'\n7\x121j\x0cs~my-projectr!\x0b\x12\x14my_kind\x18\xdb\x81\x83\x8c\xb5\x91\xe4\x03\x0c\x18\x00 \x00'
response_pb.batch.skipped_results = 3
Fetched at offset=17 11 entities in 1.9469151496887207 seconds

It seems what happens is that if the while loop that skips results based on offset reaches the end of the actual resultset, then the response returns empty cursor. In that case, what the code should do is realize there are no results and immediately return. However, what the code does instead is move on to run the full query again for one more iteration, then exit the while loop, and fetch and return all the remaining results.

Am I understanding that correctly?

jacobg commented 4 days ago

Actually, maybe the backend is returning no cursor and 0 skipped results before it got through all the results it needs to offset.

It seems to repro when querying on multiple properties, with at least one of them being a list property. It seems to work correctly, if not querying any list property, or if only querying on one list property and nothing else.

Could that mean a backend bug, possibly when querying a list with an offset using zig-zag merge?