aws-amplify / amplify-js

A declarative JavaScript library for application development using cloud services.
https://docs.amplify.aws/lib/q/platform/js
Apache License 2.0
9.38k stars 2.1k forks source link

Amplify Gen 2 - Best Way to Query Phone Number Array vs DynamoDB Table #13365

Closed ChristopherGabba closed 4 weeks ago

ChristopherGabba commented 1 month ago

Before opening, please confirm:

JavaScript Framework

React Native

Amplify APIs

GraphQL API

Amplify Version

v6

Amplify Categories

api

Backend

Amplify Gen 2 (Preview)

Environment information

``` System: OS: macOS 14.4.1 CPU: (10) arm64 Apple M2 Pro Memory: 279.44 MB / 16.00 GB Shell: 5.9 - /bin/zsh Binaries: Node: 20.7.0 - /opt/homebrew/bin/node Yarn: 1.22.22 - /opt/homebrew/bin/yarn npm: 10.1.0 - /opt/homebrew/bin/npm Watchman: 2023.09.04.00 - /opt/homebrew/bin/watchman Browsers: Safari: 17.4.1 npmPackages: %name%: 0.1.0 @aws-amplify/backend: ^1.0.1 => 1.0.1 @aws-amplify/backend-cli: ^1.0.2 => 1.0.2 @aws-amplify/react-native: ^1.1.0 => 1.1.0 @aws-amplify/ui-react-native: ^2.2.0 => 2.2.0 @babel/core: ^7.20.0 => 7.24.5 @babel/plugin-proposal-export-namespace-from: ^7.18.9 => 7.18.9 @babel/plugin-proposal-optional-chaining: ^7.0.0 => 7.21.0 @babel/plugin-transform-arrow-functions: ^7.0.0 => 7.24.1 @babel/plugin-transform-nullish-coalescing-operator: ^7.0.0 => 7.24.1 @babel/plugin-transform-shorthand-properties: ^7.0.0 => 7.24.1 @babel/plugin-transform-template-literals: ^7.0.0 => 7.24.1 @babel/preset-env: ^7.20.0 => 7.24.5 @babel/runtime: ^7.20.0 => 7.24.5 @config-plugins/ffmpeg-kit-react-native: ^8.0.0 => 8.0.0 @expo-google-fonts/m-plus-1p: ^0.2.3 => 0.2.3 @expo-google-fonts/montserrat: ^0.2.3 => 0.2.3 @expo/config-plugins: ~8.0.0 => 8.0.4 (7.9.2) @expo/metro-runtime: ~3.1.3 => 3.1.3 @gorhom/bottom-sheet: ^4.6.1 => 4.6.1 @react-native-async-storage/async-storage: ^1.23.1 => 1.23.1 @react-native-community/netinfo: ^11.3.1 => 11.3.1 @react-navigation/bottom-tabs: ^6.5.20 => 6.5.20 @react-navigation/native: ^6.0.2 => 6.1.17 @react-navigation/native-stack: ^6.0.2 => 6.9.26 @sentry/react-native: ~5.22.0 => 5.22.2 @shopify/flash-list: 1.6.4 => 1.6.4 @types/i18n-js: 3.8.2 => 3.8.2 @types/jest: ^29.2.1 => 29.5.12 @types/lodash.filter: ^4.6.9 => 4.6.9 @types/react: ~18.2.14 => 18.2.79 @types/react-test-renderer: ^18.0.0 => 18.3.0 @typescript-eslint/eslint-plugin: ^5.59.0 => 5.62.0 @typescript-eslint/parser: ^5.59.0 => 5.62.0 ContextAPIMixpanel: 0.0.1 HelloWorld: 0.0.1 MixpanelDemo: 0.0.1 SimpleMixpanel: 0.0.1 apisauce: 3.0.1 => 3.0.1 aws-amplify: ^6.3.0 => 6.3.0 aws-amplify/adapter-core: undefined () aws-amplify/analytics: undefined () aws-amplify/analytics/kinesis: undefined () aws-amplify/analytics/kinesis-firehose: undefined () aws-amplify/analytics/personalize: undefined () aws-amplify/analytics/pinpoint: undefined () aws-amplify/api: undefined () aws-amplify/api/server: undefined () aws-amplify/auth: undefined () aws-amplify/auth/cognito: undefined () aws-amplify/auth/cognito/server: undefined () aws-amplify/auth/enable-oauth-listener: undefined () aws-amplify/auth/server: undefined () aws-amplify/data: undefined () aws-amplify/data/server: undefined () aws-amplify/datastore: undefined () aws-amplify/in-app-messaging: undefined () aws-amplify/in-app-messaging/pinpoint: undefined () aws-amplify/push-notifications: undefined () aws-amplify/push-notifications/pinpoint: undefined () aws-amplify/storage: undefined () aws-amplify/storage/s3: undefined () aws-amplify/storage/s3/server: undefined () aws-amplify/storage/server: undefined () aws-amplify/utils: undefined () aws-cdk: ^2.141.0 => 2.141.0 aws-cdk-lib: ^2.141.0 => 2.141.0 babel-jest: ^29.2.1 => 29.7.0 cheerio: ^1.0.0-rc.12 => 1.0.0-rc.12 constructs: ^10.3.0 => 10.3.0 date-fns: ^2.30.0 => 2.30.0 esbuild: ^0.21.1 => 0.21.1 (0.20.2) eslint: 8.17.0 => 8.17.0 eslint-config-prettier: 8.5.0 => 8.5.0 eslint-config-standard: 17.0.0 => 17.0.0 eslint-plugin-import: 2.26.0 => 2.26.0 eslint-plugin-n: ^15.0.0 => 15.7.0 eslint-plugin-promise: 6.0.0 => 6.0.0 eslint-plugin-react: 7.30.0 => 7.30.0 eslint-plugin-react-native: 4.0.0 => 4.0.0 eslint-plugin-reactotron: ^0.1.2 => 0.1.4 ex: ^0.1.4 => 0.1.4 expo: ^51.0.2 => 51.0.2 expo-application: ~5.9.1 => 5.9.1 expo-av: ~14.0.4 => 14.0.4 expo-blur: ~13.0.2 => 13.0.2 expo-build-properties: ^0.12.1 => 0.12.1 expo-clipboard: ~6.0.3 => 6.0.3 expo-constants: ^16.0.1 => 16.0.1 expo-contacts: ~13.0.3 => 13.0.3 expo-dev-client: ~4.0.13 => 4.0.13 expo-device: ~6.0.2 => 6.0.2 expo-font: ~12.0.4 => 12.0.4 expo-haptics: ~13.0.1 => 13.0.1 expo-image-picker: ~15.0.4 => 15.0.4 expo-linear-gradient: ~13.0.2 => 13.0.2 expo-linking: ~6.3.1 => 6.3.1 expo-localization: ~15.0.3 => 15.0.3 expo-modules-autolinking: ^1.11.1 => 1.11.1 expo-notifications: ^0.28.1 => 0.28.1 expo-secure-store: ~13.0.1 => 13.0.1 expo-share-intent: ^2.0.0 => 2.0.0 expo-splash-screen: ^0.27.4 => 0.27.4 expo-status-bar: ~1.12.1 => 1.12.1 expo-store-review: ~7.0.2 => 7.0.2 expo-updates: ^0.25.11 => 0.25.11 expo-video-thumbnails: ~8.0.0 => 8.0.0 ffmpeg-kit-react-native: ^6.0.2 => 6.0.2 i18n-js: 3.9.2 => 3.9.2 jest: ^29.2.1 => 29.7.0 jest-expo: ~51.0.1 => 51.0.1 libphonenumber-js: ^1.11.1 => 1.11.1 (1.9.47) libphonenumber-js-core: undefined (1.0.0) libphonenumber-js-max: undefined (1.0.0) libphonenumber-js-min: undefined (1.0.0) libphonenumber-js-mobile: undefined (1.0.0) libphonenumber-js/build: undefined () libphonenumber-js/core: undefined () libphonenumber-js/max: undefined () libphonenumber-js/max/metadata: undefined () libphonenumber-js/min: undefined () libphonenumber-js/min/metadata: undefined () libphonenumber-js/mobile: undefined () libphonenumber-js/mobile/examples: undefined () libphonenumber-js/mobile/metadata: undefined () lodash: ^4.17.21 => 4.17.21 lodash.filter: ^4.6.0 => 4.6.0 lottie-react-native: 6.7.0 => 6.7.0 mixpanel-react-native: ^3.0.2 => 3.0.3 mixpanelexpo: 1.0.0 mobx: 6.10.2 => 6.10.2 mobx-react-lite: 4.0.5 => 4.0.5 mobx-state-tree: 5.3.0 => 5.3.0 patch-package: 6.4.7 => 6.4.7 postinstall-prepare: 1.0.1 => 1.0.1 prettier: 2.8.8 => 2.8.8 (2.3.2, 1.19.1) react: 18.2.0 => 18.2.0 react-dom: 18.2.0 => 18.2.0 react-native: 0.74.1 => 0.74.1 react-native-blurhash: ^2.0.2 => 2.0.2 react-native-compressor: ^1.8.24 => 1.8.24 react-native-context-menu-view: ^1.16.0 => 1.16.0 react-native-device-info: ^10.13.2 => 10.13.2 react-native-fs: ^2.20.0 => 2.20.0 react-native-gesture-handler: ~2.16.1 => 2.16.2 react-native-get-random-values: ^1.11.0 => 1.11.0 react-native-mime-types: ^2.5.0 => 2.5.0 react-native-mmkv: ^2.12.2 => 2.12.2 react-native-reanimated: ~3.10.1 => 3.10.1 react-native-safe-area-context: ^4.10.1 => 4.10.1 react-native-screens: 3.31.1 => 3.31.1 react-native-static-safe-area-insets: ^2.2.0 => 2.2.0 react-native-touchable-scale: ^2.2.0 => 2.2.0 react-native-url-polyfill: ^2.0.0 => 2.0.0 react-native-vision-camera: ^4.0.3 => 4.0.3 react-native-volume-manager: ^1.10.0 => 1.10.0 react-native-web: ~0.19.6 => 0.19.11 react-native-webview: 13.8.6 => 13.8.6 react-native-youtube-iframe: ^2.3.0 => 2.3.0 react-test-renderer: 18.2.0 => 18.2.0 reactotron-core-client: ^2.8.13 => 2.9.3 reactotron-mst: ^3.1.7 => 3.1.9 reactotron-react-js: ^3.3.11 => 3.3.14 reactotron-react-native: ^5.0.5 => 5.1.6 ts-jest: ^29.1.1 => 29.1.2 ts-node: ^10.9.2 => 10.9.2 tsx: ^4.9.4 => 4.10.0 typescript: ^5.4.5 => 5.4.5 (4.4.4, 4.9.5) uuid: ^9.0.1 => 9.0.1 (8.3.2, 3.3.2, 7.0.3) npmGlobalPackages: @aws-amplify/cli-internal: 12.12.0 @aws-amplify/cli: 12.11.0 @react-native-community/netinfo: 9.4.1 eas-cli: 9.0.3 expo-cli: 6.3.10 firebase-tools: 11.24.1 n: 9.1.0 node-gyp: 10.0.1 node: 20.6.0 npm: 10.7.0 pod-install: 0.2.0 react-native-spinkit: 1.5.1 typescript: 5.4.5 yarn: 1.22.22 ```

Describe the bug

Not so much a bug, but a question I can't seem to find much data on.

Here is my sequence:

  1. Fetch user's contacts from phone
  2. Flatmap user's contacts into just phone number strings (result is an array of maybe 500 phone number strings)
  3. Check my dynamo DB user table to see if they exist as a user yet

The problem is that secondary index queries only allow me to do a database query for a single phone number:

  User: a
    .model({
      id: a.id().required(),
      birthdate: a.string().required(),
      firstName: a.string().required(),
      lastName: a.string().required(),
      username: a.string().required(),
      phoneNumber: a.phone().required(),
    })
    .secondaryIndexes((index) => [
      index("phoneNumber").queryField("listUsersByPhoneNumber"),
    ])
    .authorization((allow) => [allow.owner(), allow.publicApiKey().to(["read"])]),

My function:

   arrayOfActiveUsersFromContacts = []
   for (const phoneNumber of phoneNumbers){
      const response = await client.models.User.listUsersByPhoneNumber({
        phoneNumber
      })
    if(response.data.length) {
      arrayOfActiveUsersFromContacts.push(response.data)
    }
}

This would mean that I would need to have a for loop that queries 500 times to the database to check if there is a user with that phone number. At scale, this seems super expensive. I'm guessing there is a better way... Is there a way to make a custom query that takes the whole array of phone numbers?

     const response = await client.models.User.getAllUsersFromPhoneNumbers({
        phoneNumbers: myBigArrayOfPhoneNumbers
      })

What is the best way to accomplish this?

Expected behavior

N/A

Reproduction steps

N/A

Code Snippet

See above.

Log output

``` // Put your logs below this line ```

aws-exports.js

No response

Manual configuration

No response

Additional configuration

No response

Mobile Device

iPhone 12 Physical

Mobile Operating System

iOS 17

Mobile Browser

No response

Mobile Browser Version

No response

Additional information and screenshots

No response

chrisbonifacio commented 1 month ago

@ChristopherGabba thanks for opening this issue! As you mentioned, this is a use case I don't think is efficiently achieved by the auto-generated queries. I think your best best will be to look into writing a custom resolver that performs a BatchGetItem to lookup multiple users by phone number.

Here's our docs on writing custom queries and mutations:

https://docs.amplify.aws/react/build-a-backend/data/custom-business-logic/

And heere's an example of what a AppSync JS resolver performing BatchGetItem would look like:

https://docs.amplify.aws/react/build-a-backend/data/connect-to-existing-data-sources/connect-external-ddb-table/#batchgetitem

A limitation to keep in mind with this approach is that, with BatchGetItem, you can only retrieve up to 100 items at a time. So you will have to split up a user's contacts into batches of 100 and perform the query for each batch. If you request more than 100 items, or the response size exceeds 1MB per partition, the response will include an UnprocessedKeys value which you can use in a subsequent query to get the rest of the items.

For more details on BatchGetItem, please refer to the DynamoDB docs: https://docs.aws.amazon.com/amazondynamodb/latest/APIReference/API_BatchGetItem.html#:~:text=The%20BatchGetItem%20operation%20returns%20the,as%20many%20as%20100%20items.

ChristopherGabba commented 1 month ago

@chrisbonifacio Awesome, I think this is what I'm looking for. I just read through your options and put together a start:

Per the first link, define a custom query:

  UserBatchResponse: a.customType({
    activeUsers: a.ref("User").array(),
  }),

  checkBatchOfPhoneNumbersForActiveUsers: a
    .query()
    .arguments({
      phoneNumbers: a.string().array(),
    })
    .returns(a.ref("UserBatchResponse"))
    .handler(a.handler.function(phoneBatchHandler))
    .authorization((allow) => [allow.authenticated()]),

Per the second link, configuring the custom BatchGetItem:

const phoneBatchHandler = defineFunction({
  entry: "./phoneBatch-handler/handler.ts",
})

//phoneBatch-handler/handler.ts
import type { Schema } from '../resource'
import { util } from '@aws-appsync/utils'; //throwing error here as I do not have this as a dependendency

export const handler: Schema["User"]["type"][] = async (ctx) => {
  const { phoneNumbers } = ctx.args;
  return {
    operation: 'BatchGetItem',
    tables: {
        users: [util.dynamodb.toMapValues({ phoneNumbers })]
    }
  };
};

I'm guessing I'll need to add @aws-appsync/utils as a dependency to my app? Also I've never defined a custom function handler but in this case this seems too simple.

chrisbonifacio commented 1 month ago

@ChristopherGabba yes, you have to install @aws-appsync/utils in your project to use the dynamodb utility functions

Looks like the right idea in your resolver, let me know how it goes!

chrisbonifacio commented 1 month ago

Hey @ChristopherGabba , just noticed that your resolver is a function but the utils and syntax are for AppSync JS resolvers. So you need to refactor a bit, for example your custom query's schema should look like this:

 checkBatchOfPhoneNumbersForActiveUsers: a
    .query()
    .arguments({
      phoneNumbers: a.string().array(),
    })
    .returns(a.ref("UserBatchResponse"))
    .handler(
      a.handler.custom({
        dataSource: a.ref("User"),
        entry: "./phoneBatchHandler.ts",
      })
    )
    .authorization((allow) => [allow.authenticated()]),

and the resolver logic should look more like this, without the type annotation (which is for lambda resolvers):

import { Context, util } from "@aws-appsync/utils";

export const request = (ctx: Context) => {
  const { phoneNumbers } = ctx.args;

  return {
    operation: "BatchGetItem",
    tables: {
      users: [util.dynamodb.toMapValues({ phoneNumbers })],
    },
  };
};

export const response = (ctx: Context) => {
  return ctx.result;
};
ChristopherGabba commented 1 month ago

I am now getting this error:

Cannot use `.ref()` to refer a model from a `custom type`. Field `activeUsers` of `UserBatchResponse` refers to model `User`
Caused By: Cannot use `.ref()` to refer a model from a `custom type`. Field `activeUsers` of `UserBatchResponse` refers to model `User`

According to the docs you can only use a.ref("") to reference enums and customTypes, so my User:

User: a
    .model({
      id: a.id().required(),
      birthdate: a.string().required(),
      firstName: a.string().required(),
      lastName: a.string().required(),
      username: a.string().required(),
      phoneNumber: a.phone().required(),
      pushToken: a.string(),
      profileImage: a.url(),
      profileImageBlurhash: a.string(),
      searchTerm: a.string().required(),
      sentFriendships: a.hasMany("Friendship", "senderId"),
      receivedFriendships: a.hasMany("Friendship", "receiverId"),
    })
    .secondaryIndexes((index) => [
      index("phoneNumber").queryField("listUsersByPhoneNumber"),
      index("searchTerm").queryField("listUsersBySearchTerm").sortKeys(["id"]),
    ])
    .authorization((allow) => [allow.owner(), allow.publicApiKey().to(["read"])]),

Can't be referenced through a ref? How can I reference my User model without having to make a whole copy as a customType?

chrisbonifacio commented 1 month ago

I think the issue is that the model is nested in a custom type. You should use the model directly instead of the custom type.

try replacing .returns(a.ref("UserBatchResponse")) with:

.returns(a.ref('User').array())
ChristopherGabba commented 1 month ago

Okay, you nailed it. That fixed it and made it past the initial deployment, then it threw this error later on in the sandbox compilation:

amplify-reelfeel-christophergabba-sandbox-1be123c1b8-data7552DF31-NTFVC5Q3JJ65 | 4:17:19 PM | CREATE_FAILED        | AWS::AppSync::FunctionConfiguration | data/Fn_Query_checkBatchOfPhoneNumbersForActiveUsers_1 (FnQuerycheckBatchOfPhoneNumbersForActiveUsers1) Resource handler returned message: "The code contains one or more errors. (Service: AppSync, Status Code: 400, Request ID: edc62a28-1b9b-4e96-bda1-ba9c087c5509)" (RequestToken: 3767e388-24d1-ecca-30fc-6331112e69ab, HandlerErrorCode: GeneralServiceException)
amplify-reelfeel-christophergabba-sandbox-1be123c1b8 | 4:17:29 PM | UPDATE_FAILED        | AWS::CloudFormation::Stack | data.NestedStack/data.NestedStackResource (data7552DF31) Embedded stack arn:aws:cloudformation:us-east-1:440383253519:stack/amplify-reelfeel-christophergabba-sandbox-1be123c1b8-data7552DF31-NTFVC5Q3JJ65/fd79c3c0-0fa9-11ef-aba0-0e7d764f0719 was not successfully updated. Currently in UPDATE_ROLLBACK_IN_PROGRESS with reason: The following resource(s) failed to create: [FnQuerycheckBatchOfPhoneNumbersForActiveUsers1]. 

The CloudFormation deployment has failed.
Caused By: ❌ Deployment failed: Error: The stack named amplify-reelfeel-christophergabba-sandbox-1be123c1b8 failed to deploy: UPDATE_ROLLBACK_COMPLETE: Resource handler returned message: "The code contains one or more errors. (Service: AppSync, Status Code: 400, Request ID: edc62a28-1b9b-4e96-bda1-ba9c087c5509)" (RequestToken: 3767e388-24d1-ecca-30fc-6331112e69ab, HandlerErrorCode: GeneralServiceException), Embedded stack arn:aws:cloudformation:us-east-1:440383253519:stack/amplify-reelfeel-christophergabba-sandbox-1be123c1b8-data7552DF31-NTFVC5Q3JJ65/fd79c3c0-0fa9-11ef-aba0-0e7d764f0719 was not successfully updated. Currently in UPDATE_ROLLBACK_IN_PROGRESS with reason: The following resource(s) failed to create: [FnQuerycheckBatchOfPhoneNumbersForActiveUsers1]. 

Function:

  checkBatchOfPhoneNumbersForActiveUsers: a
    .query()
    .arguments({
      phoneNumbers: a.string().array(),
    })
    .returns(a.ref("User").array())
    .handler(a.handler.custom({
      dataSource: a.ref("User"),
      entry: "./phoneBatchHandler.ts",
    }))
    .authorization((allow) => [allow.authenticated()]),

phoneBatchHander.ts

import { Context, util } from "@aws-appsync/utils";

export const request = (ctx: Context) => {
  const { phoneNumbers } = ctx.args;

  return {
    operation: "BatchGetItem",
    tables: {
      users: [util.dynamodb.toMapValues({ phoneNumbers })],
    },
  };
};

export const response = (ctx: Context) => {
  return ctx.result;
};

When I read this page: https://docs.amplify.aws/react/build-a-backend/data/custom-business-logic/ , it uses this synthax:

export const handler: Schema["echo"]["functionHandler"] =

In this case, I just have a request and a response instead of a handler function, could that be why? It seems like it didn't like the phoneBatchHandler.ts function.

chrisbonifacio commented 1 month ago

Your resolver has the right logic, should have a request and response function for JS resolvers.

Handler functions are for Lambdas.

If you were to use a Lambda you would have to change the logic in your file from using AppSync's utils (which are built into AppSync JS resolvers) and instead use a DynamoDB client to perform the operation.

Try changing it to a js file rather than ts, removing the Context type import. I forgot AppSync JS resolvers only support JS, for TypeScript to work it has to be transpiled to JS

ChristopherGabba commented 1 month ago

@chrisbonifacio Brilliant! That fixed the problem and it compiled. I would've spent 7 years and 400 ChatGPT attempts to figure that out, I'll try it out this afternoon on my lunch break and let you know if the query pulls data correctly.

ChristopherGabba commented 1 month ago

@chrisbonifacio Okay I just tried out the query. I first created some fake users then I fed an array of random phone numbers where a few of the phone numbers were included.

The query returned null each time with the following error:

        const testNumbers = ["+1111111111", "+12345678910", "+678910111213"]
        const result = await client.queries.checkBatchOfPhoneNumbersForActiveUsers({
          phoneNumbers: testNumbers,
        })
        console.log(result)

      //logs: {"data": null, "errors": [{"data": null, "errorInfo": null, "errorType": "Code", "locations": [Array], "message": "Expected JSON object for '$[tables][users]' but got a 'ARRAY' instead.", "path": [Array]}]}
chrisbonifacio commented 1 month ago

That's interesting, the error suggests there's a mismatch between the return type and what was returned from the resolver. The error says an ARRAY was returned and the custom query expects to return an an array of User items.

I would console log the result from the resolver and enable logging in your API to see what data is being returned and if there's anything that needs to be adjusted about the return type of the custom query.

chrisbonifacio commented 1 month ago

Oh! Actually, it might just be referring to this line in the resolver:

users: [util.dynamodb.toMapValues({ phoneNumbers })]

Try removing the array brackets so that it passes the map instead.

users: util.dynamodb.toMapValues({ phoneNumbers })
chrisbonifacio commented 1 month ago

If that doesn't work, try converting the array to JSON like so:

users: JSON.stringify([util.dynamodb.toMapValues({ phoneNumbers })])
ChristopherGabba commented 1 month ago

@chrisbonifacio This method without the array produced this error

   users: util.dynamodb.toMapValues({ phoneNumbers }) 
//Error message
    {"data": null, "errors": [{"data": null, "errorInfo": null, "errorType": "Code", "locations": [Array], 
      "message": "Unsupported element '$[tables][users][phoneNumbers]'.", "path": [Array]}]}

The stringify message produced:


   users: JSON.stringify([util.dynamodb.toMapValues({ phoneNumbers })])
//Error message
{"data": null, "errors": [{"data": null, "errorInfo": null, "errorType": "Code", "locations": [Array], "message": "Expected JSON object for '$[tables][users]' but got a 'STRING' instead.", "path": [Array]}]
chrisbonifacio commented 1 month ago

@ChristopherGabba So, while I figured out the correct syntax for the BatchGetItem operation from a JS resolver (our and AppSync's documentation needs updating), I learned that BatchGetItem only supports searching by primary keys. In this case, phoneNumber is not the primary key for a user so this approach won't work for this use case unfortunately.

for what it's worth, this is the proper syntax for a GetBatchItem JS resolver

import { util } from "@aws-appsync/utils";

export const request = (ctx) => {
  const userIds = [];

  ctx.args.userIds.forEach((userId) => {
    users.push(util.dynamodb.toMapValues({ id: userId }));
  });

  return {
    operation: "BatchGetItem",
    tables: {
      [ctx.env.USER_TABLE]: {
        keys: userIds,
      },
    },
  };
};

export const response = (ctx) => {
  return ctx.result.data[ctx.env.USER_TABLE];
};

If I plug in phone Numbers instead of the primary key, I just an array of null values and unprocessed keys:

image

If I pass a list of primary keys, id, as arguments, I get data:

image

Now, there might still be a better way to do this, but as far as I am aware and with the current schema, I can only think of filtering for users with certain phone numbers using an OR filter expression. There is a limit to how large a filter expression can be though, the maximum length of an expression string being 4KB.

https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/ServiceQuotas.html#limits-expression-parameters-lengths

I was able to pass in around 95 phone numbers + two test numbers in one query:

const checkActivePhoneNumbers = async () => {
    const largeArray = new Array(95).fill("");

    const fakePhoneNumbers = largeArray.map((_, index) => `${index}`);

    const phoneNumbers = ["718-706-5432", "718-706-4327", ...fakePhoneNumbers];

    const { data, errors } = await client.models.User.list({
      filter: {
        or: phoneNumbers.map((phoneNumber) => {
          return {
            phoneNumber: {
              eq: phoneNumber,
            },
          };
        }),
      },
      authMode: "apiKey",
    });

    console.log(data);

    setUsers(data);
  };
image
ChristopherGabba commented 1 month ago

@chrisbonifacio thank you for investigating and coming to a consensus! Let me ask you this: do you think if I made the primary key the phone number, this method would work? I noticed you can also define multiple identifiers so perhaps listing ID and phoneNumber or both?

Also the problem I see with your filter approach is if you have say 100,000 users and you are now using a scan operation. So you would only scan whichever the initial limit setting in the query is at. You may need 50 next tokens to fully check all the users for a matching phone number,or you need to set the limit to 100000000 (large number). Without searching all users for a matching phone number, you risk telling the client that there is no user with that phone number which could create some confusion.

chrisbonifacio commented 1 month ago

Yeah, while you could potentially paginate the filtered query, the approach is still inefficient and could still result in no existing users being found.

There are privacy concerns working with phone numbers as primary keys so that's up to you. One option is to hash them which would require some processing during entry and querying to compare the hash values while obfuscating the real values.

Also, a user's phone number can change so that would be something to consider as well.

In any case, if you can store phone numbers as the primary key that would allow you to use GetBatchItem.

Using the phone number to make a composite key is also a valid approach, just need to consider that you would need to know both a user's id and phone number ahead of time. This is a problem because a user's contacts only provides a phone number and you'd have to do the extra work of finding the id that matches the phone number anyway.

chrisbonifacio commented 1 month ago

Another approach might be to create a separate table for storing phone numbers as the primary key and perhaps other user profile info. This table can have a hasOne relationship from User to Profile/ContactInfo/etc and this new table can have a belongsTo relationship to a User record.

Again, not sure what the best approach is here but these are options to consider.

I mention this approach because it's not uncommon for customers to want to use a user's Cognito sub as the value for an id primary key.

ChristopherGabba commented 1 month ago

Thank you @chrisbonifacio, I like your second approach so I tried it out:

//schema
User: a
    .model({
      id: a.id().required(),
      birthdate: a.string().required(),
      firstName: a.string().required(),
      lastName: a.string().required(),
      username: a.string().required(),
      phoneNumber: a.hasOne("PhoneNumber", "userId"),
      pushToken: a.string(),
      profileImage: a.url(),
      profileImageBlurhash: a.string(),
      searchTerm: a.string().required(),
      sentFriendships: a.hasMany("Friendship", "senderId"),
      receivedFriendships: a.hasMany("Friendship", "receiverId"),
    })
    .secondaryIndexes((index) => [
      index("searchTerm").queryField("listUsersBySearchTerm").sortKeys(["id"]),
    ])
    .authorization((allow) => [allow.publicApiKey()]),

  PhoneNumber: a
    .model({
      phoneNumber: a.string().required(),
      userId: a.string().required(),
      user: a.belongsTo("User", "userId"),
    })
    .identifier(["phoneNumber"])
    .authorization((allow) => [allow.publicApiKey()]),

  checkBatchOfPhoneNumbersForActiveUsers: a
    .query()
    .arguments({
      phoneNumbers: a.string().array(),
    })
    .returns(a.ref("PhoneNumber").array())
    .handler(
      a.handler.custom({
        dataSource: a.ref("PhoneNumber"),
        entry: "./phoneBatchHandler.js",
      }),
    )
    .authorization((allow) => [allow.publicApiKey()]),

// phoneHandler.js
import { util } from "@aws-appsync/utils";

export const request = (ctx) => {
  const phoneNumbers = [];

  ctx.args.phoneNumbers.forEach((phoneNumber) => {
    phoneNumbers.push(util.dynamodb.toMapValues({ id: phoneNumber }));
  });

  return {
    operation: "BatchGetItem",
    tables: {
      [ctx.env.PHONENUMBER_TABLE]: {
        keys: phoneNumbers,
      },
    },
  };
};

export const response = (ctx) => {
  return ctx.result.data[ctx.env.PHONENUMBER_TABLE];
};

It compiled but now I'm getting this error:

          const client = generateClient<Schema>()
          const testNumbers = ["+14323496006", "+19722367519", "+678910111213"]
          const result = await client.queries.checkBatchOfPhoneNumbersForActiveUsers({
            phoneNumbers: testNumbers,
          })
          console.log(result)
//logs {"data": null, "errors": [{"data": null, "errorInfo": null, "errorType": "Code", "locations": [Array], "message": "Runtime Error", "path": [Array]}]}

So there appears to be some sort of error in my logic. Because typescript doesn't infer anything in the phoneBatchHandler function, I'm not positive if the env.PHONENUMBER_TABLE is correct.

Also because I want to be able to deep reference the user like so results.data[0].user.firstName, how can you define a selectionSet for the custom query so that all user data returns in the phone number object?

chrisbonifacio commented 1 month ago

This line should just be { phoneNumber }) because it's the primary key rather than id: phoneNumber

ctx.args.phoneNumbers.forEach((phoneNumber) => {
    phoneNumbers.push(util.dynamodb.toMapValues({ phoneNumber }));
  })

As for the environment variable, how are you setting it in your backend.ts file?

ChristopherGabba commented 1 month ago

This is all I have in the backend.ts


const schema = a.schema({
  User: a
    .model({
      id: a.id().required(),
      birthdate: a.string().required(),
      firstName: a.string().required(),
      lastName: a.string().required(),
      username: a.string().required(),
      phoneNumber: a.hasOne("PhoneNumber", "userId"),
      pushToken: a.string(),
      profileImage: a.url(),
      profileImageBlurhash: a.string(),
      searchTerm: a.string().required(),
    })
    .secondaryIndexes((index) => [
      index("searchTerm").queryField("listUsersBySearchTerm").sortKeys(["id"]),
    ])
    .authorization((allow) => [allow.publicApiKey()]),

  PhoneNumber: a
    .model({
      phoneNumber: a.string().required(),
      userId: a.string().required(),
      user: a.belongsTo("User", "userId"),
    })
    .identifier(["phoneNumber"])
    .authorization((allow) => [allow.publicApiKey()]),

  checkBatchOfPhoneNumbersForActiveUsers: a
    .query()
    .arguments({
      phoneNumbers: a.string().array(),
    })
    .returns(a.ref("PhoneNumber").array())
    .handler(
      a.handler.custom({
        dataSource: a.ref("PhoneNumber"),
        entry: "./phoneBatchHandler.js",
      }),
    )
    .authorization((allow) => [allow.publicApiKey()]),
})

export type Schema = ClientSchema<typeof schema>

export const data = defineData({
  schema,
  authorizationModes: {
    defaultAuthorizationMode: "apiKey",
    apiKeyAuthorizationMode: { expiresInDays: 30 },
  },
})

I don't think I specify anything specific as far as environment variables go.

chrisbonifacio commented 1 month ago

Ah, that might be the issue then. You can set the environment variable similar to this in the backend.ts file:

backend.data.resources.cfnResources.cfnGraphqlApi.environmentVariables = {
  PHONENUMBER_TABLE: "<insert-table-name>",
};

The environment variables set here will be available to all AppSync resolvers via the context's env object, which is why the resolver logic uses ctx.env rather than process.env.

We append some stuff like the amplify appId to the table name so you'll probably have to find the correct table name by searching "PhoneNumber" in the DynamoDB console

chrisbonifacio commented 1 month ago

just tested it and I also got the runtime error if I try to access an env var that isn't set on the API.

Setting it resulted in a working batch query:

image
ChristopherGabba commented 1 month ago

@chrisbonifacio Here is my code: backend.data.resources.cfnResources.cfnGraphqlApi.environmentVariables = { PHONENUMBER_TABLE: "PhoneNumber-6ds57ytuajcg7gq76fizgkx2pe-NONE", };

Here is my number:

Screenshot 2024-05-17 at 12 27 40 PM

It seems to no longer be throwing an error! But now I'm getting:

{"data": {"0": null, "user": [Function anonymous]}, "extensions": undefined}

When I lazy load the user it returns null. I'm not so sure why there even is auser in the result like that given that it should be a phoneNumber array.

I'm going to try deleting my deployment and these tables. For some reason it keeps making duplicate dynamoDB tables and try again.

chrisbonifacio commented 1 month ago

Yeah, there's currently a bug with the way data is deserialized that we caught yesterday in a different scenario. The data should be in the form of an array, not an object. And there should be a user function for each PhoneNumber record in the array to lazy load the relationships.

I also tried looking into the selectionSet but it seems that custom queries/mutations don't currently support custom selection sets. So, since lazy loading would've been the workaround, you'd have to drop down to the client.graphql API as a workaround unfortunately.

to generate the qraphql statements you can use the command:

npx ampx generate graphql-client-code --out src/graphql

In the graphql/queries.ts file you should see the custom query with a selection set that includes the user:

query CheckBatchOfPhoneNumbersForActiveUsers($phoneNumbers: [String]) {
  checkBatchOfPhoneNumbersForActiveUsers(phoneNumbers: $phoneNumbers) {
    createdAt
    phoneNumber
    updatedAt
    user {
      birthdate
      createdAt
      firstName
      id
      lastName
      profileImage
      profileImageBlurhash
      pushToken
      searchTerm
      updatedAt
      username
      __typename
    }
    userId
    __typename
  }
}
chrisbonifacio commented 1 month ago

Here's an example I tested of what the client.graphql query would look like:

//...
import { checkBatchOfPhoneNumbersForActiveUsers } from "@/src/graphql/queries";

//...

  const checkActivePhoneNumbersWithGraphQL = async () => {
    const largeArray = new Array(95).fill("");

    const fakePhoneNumbers = largeArray.map((_, index) => `${index}`);

    const phoneNumbersToCheckFor = [
      "718-706-5432",
      "718-706-4327",
      ...fakePhoneNumbers,
    ];

    let {
      data: { checkBatchOfPhoneNumbersForActiveUsers: phoneNumbers },
    } = await client.graphql({
      query: checkBatchOfPhoneNumbersForActiveUsers,
      variables: {
        phoneNumbers: phoneNumbersToCheckFor,
      },
    });

    console.log(phoneNumbers);
  };

result (in an array as expected with User data nested):

image
ChristopherGabba commented 4 weeks ago

To those who stumble across this thread this is the exact solution I implemented and finally got the deployment working thanks to @chrisbonifacio:

  1. Split your User schema into a User schema with a PhoneNumber schema relationship to expose phoneNumber as a primary key:
  User: a
    .model({
      id: a.id().required(),
      birthdate: a.string().required(),
      firstName: a.string().required(),
      lastName: a.string().required(),
      username: a.string().required(),
      phoneNumber: a.hasOne("PhoneNumber", "userId"),
      pushToken: a.string(),
      profileImage: a.url(),
      profileImageBlurhash: a.string(),
      searchTerm: a.string().required(),
    })
    .secondaryIndexes((index) => [
      index("searchTerm").queryField("listUsersBySearchTerm").sortKeys(["id"]),
    ])
    .authorization((allow) => [allow.publicApiKey()]),

  PhoneNumber: a
    .model({
      phoneNumber: a.string().required(),
      userId: a.string().required(),
      user: a.belongsTo("User", "userId"),
    })
    .identifier(["phoneNumber"])
    .authorization((allow) => [allow.publicApiKey()]),

  checkBatchOfPhoneNumbersForActiveUsers: a
    .query()
    .arguments({
      phoneNumbers: a.string().array(),
    })
    .returns(a.ref("PhoneNumber").array())
    .handler(
      a.handler.custom({
        dataSource: a.ref("PhoneNumber"),
        entry: "./phoneBatchHandler.js",
      }),
    )
    .authorization((allow) => [allow.publicApiKey()]),
  1. In your backend.ts file, define your PhoneNumber table:
    
    const backend = defineBackend({
    auth,
    data,
    storage,
    })
    // Define Phone Table variable for batch query
    backend.data.resources.cfnResources.cfnGraphqlApi.environmentVariables = {
    PHONENUMBER_TABLE: "PhoneNumber-abcdefghijklmnop-NONE",
    };

3. Here is the `phoneBatchHander.js` file (MAKE SURE THIS FILE IS NOT `ts` and is a javascript file `js`:

```typescript
import { util } from "@aws-appsync/utils";

export const request = (ctx) => {
  const phoneNumbers = [];

  ctx.args.phoneNumbers.forEach((phoneNumber) => {
    phoneNumbers.push(util.dynamodb.toMapValues({ phoneNumber }));
  });

  return {
    operation: "BatchGetItem",
    tables: {
      [ctx.env.PHONENUMBER_TABLE]: {
        keys: phoneNumbers,
      },
    },
  };
};

export const response = (ctx) => {
  return ctx.result.data[ctx.env.PHONENUMBER_TABLE];
};

and finally as of right now, in order to expose the deeper user object from the PhoneNumber batch query results refer to @chrisbonifacio's cli function above. I submitted a feature request in order to define selection sets within this function and remove this requirement.

Closing issue (finally)! Thanks again @chrisbonifacio