manzt / zarrita.js

A JavaScript toolkit for working with chunked, compressed, n-dimensional arrays
https://zarrita.dev
MIT License
38 stars 5 forks source link

Indexing with get "Cannot read properties of undefined (reading 'TypedArray')" #191

Closed jamesm131 closed 2 weeks ago

jamesm131 commented 3 weeks ago

I recently ran an update on my node modules and ran into this new error. (I'm having a hard time nailing down exactly which version introduced it). I get the error "Cannot read properties of undefined (reading 'TypedArray')" when using get to access zarr data, but not when using getChunk().

I've had a poke around in the code and found that in indexing package, the context variable is always undefined because _internal_get_array_context cannot view the zarrita context symbol on the array object.

I've not had much experience in ts/js package development, so the best I could do on my own was to edit the @zarrita/indexing/dist/src/get.js file with the below changes to get things working.

This is working as expected, though I'm aware that this is quite hacky and likely has some consequences I haven't thought of, but I couldn't get the arr object to recognise the CONTEXT_MARKER symbol in the hierarchy file, nor could I properly export the CONTEXT_MARKER symbol and import it alongside the array getter (a consequence of the crude ad-hoc editing of node-modules).

I apologise if this is a consequence of a user error. I am wrapping the AsyncReadable class with aws4fetch to use with an S3 bucket, but I don't believe that this would be the cause of the issue. (code attached below just in case there is something obvious).

I'd be happy to be of more help if you can point me in the right direction, but like I said, I've not got much experience in developing js/ts packages.

// @zarrita/indexing/dist/src/get.js

// updated imports
import { KeyError } from "@zarrita/core";
// import { _internal_get_array_context, KeyError } from "@zarrita/core";
import { BasicIndexer } from "./indexer.js";
import { create_queue } from "./util.js";
function unwrap(arr, idx) {
    return "get" in arr ? arr.get(idx) : arr[idx];
}
// defined a local context getter
export function _local_get_context(obj) {
    const syms = Object.getOwnPropertySymbols(obj)
    return obj[syms[0]];
}

export async function get(arr, selection, opts, setter) {
    // subbed in the local context getter
    const context = _local_get_context(arr);
    // const context = _internal_get_array_context(arr);
    const indexer = new BasicIndexer({
        selection,
        shape: arr.shape,
        chunk_shape: arr.chunks,
    });
    const out = setter.prepare(new context.TypedArray(indexer.shape.reduce((a, b) => a * b, 1)), indexer.shape, context.get_strides(indexer.shape, opts.order));
    const queue = opts.create_queue?.() ?? create_queue();
    for (const { chunk_coords, mapping } of indexer) {
        queue.add(() => arr.getChunk(chunk_coords, opts.opts)
            .then(({ data, shape, stride }) => {
            const chunk = setter.prepare(data, shape, stride);
            setter.set_from_chunk(out, chunk, mapping);
        })
            .catch((err) => {
            // re-throw error if not a missing chunk
            if (!(err instanceof KeyError))
                throw err;
            // KeyError, we need to fill the corresponding array
            if (context.fill_value) {
                setter.set_scalar(out, mapping
                    .map((m) => m.to)
                    .filter((s) => s !== null), context.fill_value);
            }
        }));
    }
    await queue.onIdle();
    // If the final out shape is empty, we just return a scalar.
    return indexer.shape.length === 0 ? unwrap(out.data, 0) : out;
}

My AsyncReadable wrapper code for reference:

//s3-fetch-store.ts
import type { AwsClient } from 'aws4fetch';
import type { AbsolutePath, AsyncReadable, RangeQuery } from "@zarrita/storage";

export function fetch_range(
    url: string | URL,
    offset?: number,
    length?: number,
    opts: RequestInit = {},
) {
    if (offset !== undefined && length !== undefined) {
        // merge request opts
        opts = {
            ...opts,
            headers: {
                ...opts.headers,
                Range: `bytes=${offset}-${offset + length - 1}`,
            },
        };
    }
    return fetch(url, opts);
}

export function merge_init(
    storeOverrides: RequestInit,
    requestOverrides: RequestInit,
) {
    // Request overrides take precedence over storeOverrides.
    return {
        ...storeOverrides,
        ...requestOverrides,
        headers: {
            ...storeOverrides.headers,
            ...requestOverrides.headers,
        },
    };
}

function resolve(root: string | URL, path: AbsolutePath): URL {
    const base = typeof root === "string" ? new URL(root) : root;
    if (!base.pathname.endsWith("/")) {
        // ensure trailing slash so that base is resolved as _directory_
        base.pathname += "/";
    }
    const resolved = new URL(path.slice(1), base);
    // copy search params to new URL
    resolved.search = base.search;
    return resolved;
}

async function handle_response(
    response: Response,
): Promise<Uint8Array | undefined> {
    if (response.status === 404 || response.status === 403) {
        return undefined;
    }
    if (response.status === 200 || response.status === 206) {
        return new Uint8Array(await response.arrayBuffer());
    }
    throw new Error(
        `Unexpected response status ${response.status} ${response.statusText}`,
    );
}

async function fetch_suffix(
    url: URL,
    suffix_length: number,
    init: RequestInit,
    aws: AwsClient,
): Promise<Response> {
    return aws.fetch(url.href, {
        ...init,
        headers: { ...init.headers, Range: `bytes=-${suffix_length}` },
    });
}

/**
 * Readonly store based on AWS S3 using the aws4fetch library.
 */
export class S3FetchStore implements AsyncReadable<RequestInit> {
    #overrides: RequestInit;
    #aws: AwsClient;

    constructor(
        public url: string | URL,
        aws: AwsClient,
        options: { overrides?: RequestInit } = {},
    ) {
        this.#overrides = options.overrides ?? {};
        this.#aws = aws;
    }

    #merge_init(overrides: RequestInit) {
        return merge_init(this.#overrides, overrides);
    }

    async get(
        key: AbsolutePath,
        options: RequestInit = {},
    ): Promise<Uint8Array | undefined> {
        console.log("key", key)
        const href = resolve(this.url, key).href;
        const response = await this.#aws.fetch(href, this.#merge_init(options));

        return handle_response(response);
    }
    async getRange(
        key: AbsolutePath,
        range: RangeQuery,
        options: RequestInit = {},
    ): Promise<Uint8Array | undefined> {
        const url = resolve(this.url, key);
        const init = this.#merge_init(options);
        let response: Response;
        if ("suffixLength" in range) {
            response = await fetch_suffix(
                url,
                range.suffixLength,
                init,
                this.#aws,
            );
        } else {
            const awsInit = await this.#aws.sign(url.href, init);
            response = await fetch_range(url, range.offset, range.length, awsInit);
        }
        return handle_response(response);
    }
}
manzt commented 3 weeks ago

Thanks for opening the issue. This makes me think you might have slightly different versions of zarritas different packages. Can you share you package.json?

Btw, this is a clever hack! Not something I want end users to rely on, so I'm sure we'll find a fix!

jamesm131 commented 2 weeks ago

Ah thank you, it was a versioning issue! I thought I was using the most recent version of all packages, but pnpm update didn't update the zarrita packages (I'm not sure if this is because of their next flag, some distribution method, or a some weird config on my machine).

Sorry for raising this as an issue. I had a feeling it was a version problem as it occurred after I ran a pnpm update of @zarrita/core but I was having trouble reproducing. Thank you again!

For completeness sake, the versions that were not working, and the updated versions below. // old "@zarrita/core": "0.1.0-next.8", "@zarrita/indexing": "0.1.0-next.14", "@zarrita/ndarray": "0.1.0-next.10", "@zarrita/storage": "0.1.0-next.4", // new "@zarrita/core": "0.1.0-next.12", "@zarrita/indexing": "0.1.0-next.14", "@zarrita/ndarray": "0.1.0-next.14", "@zarrita/storage": "0.1.0-next.5",