farcasterxyz / hub-monorepo

Implementation of the Farcaster Hub specification and supporting libraries for building applications on Farcaster
https://www.thehubble.xyz
MIT License
700 stars 392 forks source link

Backfilling Casts stops the application sync #1825

Closed chetankashetti closed 3 months ago

chetankashetti commented 6 months ago

What is the bug? Backfill casts flow stops the sync, and it will be stuck forever without processing any event. We have removed foreign key constraints.

How can it be reproduced? (optional)

[20:12:21.743] ERROR (1): Job failed {"jobName":"ProcessHubEvent","jobId":"47817423","reason":"deadlock detected","errorName":"error","errorMessage":"deadlock detected","errorStack":"error: deadlock detected\n    at /home/node/app/node_modules/pg/lib/client.js:526:17\n    at process.processTicksAndRejections (node:internal/process/task_queues:95:5)\n    at async PostgresConnection.executeQuery (/home/node/app/node_modules/kysely/dist/cjs/dialect/postgres/postgres-driver.js:72:28)\n    at async /home/node/app/node_modules/kysely/dist/cjs/query-executor/query-executor-base.js:37:28\n    at async #run (/home/node/app/node_modules/kysely/dist/cjs/driver/single-connection-provider.js:27:16)\n    at PostgresConnection.executeQuery (/home/node/app/node_modules/kysely/dist/cjs/dialect/postgres/postgres-driver.js:91:69)\n    at process.processTicksAndRejections (node:internal/process/task_queues:95:5)\n    at async /home/node/app/node_modules/kysely/dist/cjs/query-executor/query-executor-base.js:37:28\n    at async #run (/home/node/app/node_modules/kysely/dist/cjs/driver/single-connection-provider.js:27:16)\n    at execute (/home/node/app/apps/replicator/build/db.js:129:50)\n    at process.processTicksAndRejections (node:internal/process/task_queues:95:5)\n    at async storeMessage (/home/node/app/apps/replicator/build/processors/index.js:279:5)\n    at async processMessage (/home/node/app/apps/replicator/build/processors/index.js:71:5)\n    at async deleteMessage (/home/node/app/apps/replicator/build/processors/index.js:56:5)\n    at async /home/node/app/apps/replicator/build/processors/index.js:373:13\n    at async /home/node/app/apps/replicator/build/db.js:156:20\n    at async /home/node/app/node_modules/kysely/dist/cjs/kysely.js:418:32\n    at async DefaultConnectionProvider.provideConnection (/home/node/app/node_modules/kysely/dist/cjs/driver/default-connection-provider.js:12:20)\n    at async executeTx (/home/node/app/apps/replicator/build/db.js:155:16)\ninsert into\n  \"messages\" (\n    \"created_at\",\n    \"updated_at\",\n    \"fid\",\n    \"type\",\n    \"timestamp\",\n    \"hash\",\n    \"hash_scheme\",\n    \"signature\",\n    \"signature_scheme\",\n    \"signer\",\n    \"raw\",\n    \"deleted_at\",\n    \"pruned_at\",\n    \"revoked_at\",\n    \"body\"\n  )\nvalues\n  (\n    '2024-03-15T20:12:20.728Z',\n    '2024-03-15T20:12:20.728Z',\n    305236,\n    3,\n    '2024-03-02T20:15:23.000Z',\n    '\x6b9a76ecf78180aac0f9ea8cb1851ac615e99254',\n    1,\n    '\x18306e2f905fe440484a947b0628ee6164a0558cb74a7a536be1efe4cd3b00e4ff472ae996fa671c3dbdeb1bbb8ca94113c36890b440b4ed7e5bbef689d78704',\n    1,\n    '\x5e7be1b520ce0bdf2c5f4260ce35c8584655237a3da9b6dfca54381b114b5866',\n    '\x0af401080310d4d01218dbc5d42f20012ac50112098dd204c4d304d6f1011a1a08edd10412146e37d5ffe2382fca598b2c1b68f2e931a3c016e6225e536571756920757420766f6c75707461732070726f766964656e74207265637573616e64616520696e2074656e6574757220766f6c7570746174756d2064656c6563747573206d61696f7265732074656d706f7265206e6f737472756d2e2a03002f5e321c121a08e5aa0112141085b785312f000faf75d069ae1f620c35f9312732190a17687474703a2f2f7761727065642d616c7461722e636f6d3a1d0801121908e5311214e95b5c8d60a43ba170acddb401bda42570562f8912146b9a76ecf78180aac0f9ea8cb1851ac615e992541801224018306e2f905fe440484a947b0628ee6164a0558cb74a7a536be1efe4cd3b00e4ff472ae996fa671c3dbdeb1bbb8ca94113c36890b440b4ed7e5bbef689d78704280132205e7be1b520ce0bdf2c5f4260ce35c8584655237a3da9b6dfca54381b114b5866',\n    '2024-03-15T20:12:20.728Z',\n    null,\n    null,\n    '{\"type\":1,\"targetCastId\":{\"fid\":6373,\"hash\":\"0xe95b5c8d60a43ba170acddb401bda42570562f89\"}}'\n  )\non conflict (\"hash\") do\nupdate\nset\n  \"updated_at\" = '2024-03-15T20:12:20.728Z',\n  \"signature\" = \"excluded\".\"signature\",\n  \"signature_scheme\" = \"excluded\".\"signature_scheme\",\n  \"signer\" = \"excluded\".\"signer\",\n  \"deleted_at\" = '2024-03-15T20:12:20.728Z',\n  \"pruned_at\" = null,\n  \"revoked_at\" = null\nwhere\n  (\n    \"excluded\".\"signature\" != \"messages\".\"signature\"\n    or \"excluded\".\"signature_scheme\" != \"messages\".\"signature_scheme\"\n    or \"excluded\".\"signer\" != \"messages\".\"signer\"\n    or \"excluded\".\"deleted_at\" is distinct from \"messages\".\"deleted_at\"\n    or \"excluded\".\"pruned_at\" is distinct from \"messages\".\"pruned_at\"\n    or \"excluded\".\"revoked_at\" is distinct from \"messages\".\"revoked_at\"\n  )\nreturning\n  \"hash\",\n  \"updated_at\",\n  \"created_at\"\n    at executeTx (/home/node/app/apps/replicator/build/db.js:161:50)\n    at process.processTicksAndRejections (node:internal/process/task_queues:95:5)\n    at async processMergeMessageHubEvent (/home/node/app/apps/replicator/build/processors/index.js:370:5)\n    at async processHubEvent (/home/node/app/apps/replicator/build/processors/index.js:329:13)\n    at async Object.run (/home/node/app/apps/replicator/build/jobs/processHubEvent.js:11:9)\n    at async /home/node/app/node_modules/bullmq/dist/cjs/classes/child-processor.js:69:33"}

Additional context (optional) We are using latest version of hub and replicator.

dawsbot commented 5 months ago

@chetankashetti is this still an issue on the latest hub? If this is solved for you please close the issue 🙏

Also, can you format your error in a multiline comment? It will be easier to read: https://github.com/jspsych/jsPsych/discussions/1113

chetankashetti commented 5 months ago

This is an issue on replicator, we relaxed foreign key constraints, after that we didn't see this issue. hub will cause this issue?

chetankashetti commented 5 months ago

also we see unicode errors while backfilling casts, not if its related to this. i can create separate issue if its not related

[03:11:30.961] ERROR (1): Job failed {"jobName":"BackfillFidCasts","jobId":"173530432","reason":"invalid byte sequence for encoding \"UTF8\": 0x00","errorName":"error","errorMessage":"invalid byte sequence for encoding \"UTF8\": 0x00","errorStack":"error: invalid byte sequence for encoding \"UTF8\": 0x00\n    at /home/node/app/node_modules/pg/lib/client.js:526:17\n    at process.processTicksAndRejections (node:internal/process/task_queues:95:5)\n    at async PostgresConnection.executeQuery (/home/node/app/node_modules/kysely/dist/cjs/dialect/postgres/postgres-driver.js:72:28)\n    at async /home/node/app/node_modules/kysely/dist/cjs/query-executor/query-executor-base.js:37:28\n    at async #run (/home/node/app/node_modules/kysely/dist/cjs/driver/single-connection-provider.js:27:16)\n    at PostgresConnection.executeQuery (/home/node/app/node_modules/kysely/dist/cjs/dialect/postgres/postgres-driver.js:91:69)\n    at process.processTicksAndRejections (node:internal/process/task_queues:95:5)\n    at async /home/node/app/node_modules/kysely/dist/cjs/query-executor/query-executor-base.js:37:28\n    at async #run (/home/node/app/node_modules/kysely/dist/cjs/driver/single-connection-provider.js:27:16)\n    at executeTakeFirstOrThrow (/home/node/app/apps/replicator/build/db.js:149:50)\n    at process.processTicksAndRejections (node:internal/process/task_queues:95:5)\n    at async Object.mergeDerivedRow (/home/node/app/apps/replicator/build/processors/cast.js:76:16)\n    at async /home/node/app/apps/replicator/build/eventProcessor.js:36:31\n    at async processMessage (/home/node/app/apps/replicator/build/processors/index.js:80:17)\n    at async mergeMessage (/home/node/app/apps/replicator/build/processors/index.js:52:5)\n    at async /home/node/app/apps/replicator/build/jobs/backfillFidCasts.js:15:21\n    at async /home/node/app/apps/replicator/build/db.js:156:20\n    at async /home/node/app/node_modules/kysely/dist/cjs/kysely.js:418:32\n    at async DefaultConnectionProvider.provideConnection (/home/node/app/node_modules/kysely/dist/cjs/driver/default-connection-provider.js:12:20)\ninsert into\n  \"casts\" (\n    \"timestamp\",\n    \"deleted_at\",\n    \"fid\",\n    \"parent_fid\",\n    \"hash\",\n    \"root_parent_hash\",\n    \"parent_hash\",\n    \"root_parent_url\",\n    \"text\",\n    \"embeds\",\n    \"mentions\",\n    \"mentions_positions\"\n  )\nvalues\n  (\n    '2024-02-28T08:27:08.000Z',\n    null,\n    308972,\n    308972,\n    '\x4666d85a24cc503d8c29a2d579268fa72daf41f7',\n    '\x99417ac94367cdcf765bf9f5b9e13838f4cc1223',\n    '\xd55f54ada9edff811b30a475a17a97d9bcc06b35',\n    null,\n    '🗳️ Your verifiable poll frame has been created using farcaster.vote & \n\nNow copy & paste the URL of the frame into a Cast to share your poll with others!\n\n👉 http://localhost:8888/c�{鏀�o4\"�\u0004C�}j>��44Nj\u0012\u0014�\u0010\u0000\u0000\u0000\u0000\u0003',\n    '[]',\n    '[7548]',\n    '[75]'\n  )\non conflict (\"hash\") do\nupdate\nset\n  \"deleted_at\" = null\nreturning\n  *\n    at executeTx (/home/node/app/apps/replicator/build/db.js:161:50)\n    at process.processTicksAndRejections (node:internal/process/task_queues:95:5)\n    at async Object.run (/home/node/app/apps/replicator/build/jobs/backfillFidCasts.js:14:17)\n    at async /home/node/app/node_modules/bullmq/dist/cjs/classes/child-processor.js:69:33"}
[03:11:30.959] ERROR (38): Error processing message 0x4666d85a24cc503d8c29a2d579268fa72daf41f7 (fid 308972): error: invalid byte sequence for encoding "UTF8": 0x00 {"context":"worker","wid":"38:0","messageHash":"0x4666d85a24cc503d8c29a2d579268fa72daf41f7","fid":308972,"rawMessage":{"type":"Buffer","data":[10,159,2,8,1,16,236,237,18,24,220,144,194,47,32,1,42,143,2,18,2,252,58,26,26,8,236,237,18,18,20,213,95,84,173,169,237,255,129,27,48,164,117,161,122,151,217,188,192,107,53,34,233,1,240,159,151,179,239,184,143,32,89,111,117,114,32,118,101,114,105,102,105,97,98,108,101,32,112,111,108,108,32,102,114,97,109,101,32,104,97,115,32,98,101,101,110,32,99,114,101,97,116,101,100,32,117,115,105,110,103,32,102,97,114,99,97,115,116,101,114,46,118,111,116,101,32,38,32,10,10,78,111,119,32,99,111,112,121,32,38,32,112,97,115,116,101,32,116,104,101,32,85,82,76,32,111,102,32,116,104,101,32,102,114,97,109,101,32,105,110,116,111,32,97,32,67,97,115,116,32,116,111,32,115,104,97,114,101,32,121,111,117,114,32,112,111,108,108,32,119,105,116,104,32,111,116,104,101,114,115,33,10,10,240,159,145,137,32,104,116,116,112,58,47,47,108,111,99,97,108,104,111,115,116,58,56,56,56,56,47,99,239,191,189,123,233,143,128,239,191,189,111,52,34,239,191,189,4,67,239,191,189,125,106,62,239,191,189,239,191,189,52,52,78,106,18,20,239,191,189,16,0,0,0,0,3,42,1,75,18,20,70,102,216,90,36,204,80,61,140,41,162,213,121,38,143,167,45,175,65,247,24,1,34,64,232,35,251,242,104,40,147,12,249,126,230,214,80,93,49,161,61,78,6,174,3,17,133,0,232,63,27,11,49,145,203,123,158,220,71,53,217,181,238,94,40,252,66,34,178,7,77,132,69,249,255,225,19,177,189,227,26,148,158,118,249,77,220,1,40,1,50,32,198,36,35,214,47,105,222,85,224,93,35,170,31,21,82,202,187,3,9,109,33,141,176,224,93,158,65,246,131,134,112,81]},"operation":"merge"}
dawsbot commented 5 months ago

also we see unicode errors while backfilling casts, not if its related to this. i can create separate issue if its not related

[03:11:30.961] ERROR (1): Job failed {"jobName":"BackfillFidCasts","jobId":"173530432","reason":"invalid byte sequence for encoding \"UTF8\": 0x00","errorName":"error","errorMessage":"invalid byte sequence for encoding \"UTF8\": 0x00","errorStack":"error: invalid byte sequence for encoding \"UTF8\": 0x00\n    at /home/node/app/node_modules/pg/lib/client.js:526:17\n    at process.processTicksAndRejections (node:internal/process/task_queues:95:5)\n    at async PostgresConnection.executeQuery (/home/node/app/node_modules/kysely/dist/cjs/dialect/postgres/postgres-driver.js:72:28)\n    at async /home/node/app/node_modules/kysely/dist/cjs/query-executor/query-executor-base.js:37:28\n    at async #run (/home/node/app/node_modules/kysely/dist/cjs/driver/single-connection-provider.js:27:16)\n    at PostgresConnection.executeQuery (/home/node/app/node_modules/kysely/dist/cjs/dialect/postgres/postgres-driver.js:91:69)\n    at process.processTicksAndRejections (node:internal/process/task_queues:95:5)\n    at async /home/node/app/node_modules/kysely/dist/cjs/query-executor/query-executor-base.js:37:28\n    at async #run (/home/node/app/node_modules/kysely/dist/cjs/driver/single-connection-provider.js:27:16)\n    at executeTakeFirstOrThrow (/home/node/app/apps/replicator/build/db.js:149:50)\n    at process.processTicksAndRejections (node:internal/process/task_queues:95:5)\n    at async Object.mergeDerivedRow (/home/node/app/apps/replicator/build/processors/cast.js:76:16)\n    at async /home/node/app/apps/replicator/build/eventProcessor.js:36:31\n    at async processMessage (/home/node/app/apps/replicator/build/processors/index.js:80:17)\n    at async mergeMessage (/home/node/app/apps/replicator/build/processors/index.js:52:5)\n    at async /home/node/app/apps/replicator/build/jobs/backfillFidCasts.js:15:21\n    at async /home/node/app/apps/replicator/build/db.js:156:20\n    at async /home/node/app/node_modules/kysely/dist/cjs/kysely.js:418:32\n    at async DefaultConnectionProvider.provideConnection (/home/node/app/node_modules/kysely/dist/cjs/driver/default-connection-provider.js:12:20)\ninsert into\n  \"casts\" (\n    \"timestamp\",\n    \"deleted_at\",\n    \"fid\",\n    \"parent_fid\",\n    \"hash\",\n    \"root_parent_hash\",\n    \"parent_hash\",\n    \"root_parent_url\",\n    \"text\",\n    \"embeds\",\n    \"mentions\",\n    \"mentions_positions\"\n  )\nvalues\n  (\n    '2024-02-28T08:27:08.000Z',\n    null,\n    308972,\n    308972,\n    '\x4666d85a24cc503d8c29a2d579268fa72daf41f7',\n    '\x99417ac94367cdcf765bf9f5b9e13838f4cc1223',\n    '\xd55f54ada9edff811b30a475a17a97d9bcc06b35',\n    null,\n    '🗳️ Your verifiable poll frame has been created using farcaster.vote & \n\nNow copy & paste the URL of the frame into a Cast to share your poll with others!\n\n👉 http://localhost:8888/c�{鏀�o4\"�\u0004C�}j>��44Nj\u0012\u0014�\u0010\u0000\u0000\u0000\u0000\u0003',\n    '[]',\n    '[7548]',\n    '[75]'\n  )\non conflict (\"hash\") do\nupdate\nset\n  \"deleted_at\" = null\nreturning\n  *\n    at executeTx (/home/node/app/apps/replicator/build/db.js:161:50)\n    at process.processTicksAndRejections (node:internal/process/task_queues:95:5)\n    at async Object.run (/home/node/app/apps/replicator/build/jobs/backfillFidCasts.js:14:17)\n    at async /home/node/app/node_modules/bullmq/dist/cjs/classes/child-processor.js:69:33"}
[03:11:30.959] ERROR (38): Error processing message 0x4666d85a24cc503d8c29a2d579268fa72daf41f7 (fid 308972): error: invalid byte sequence for encoding "UTF8": 0x00 {"context":"worker","wid":"38:0","messageHash":"0x4666d85a24cc503d8c29a2d579268fa72daf41f7","fid":308972,"rawMessage":{"type":"Buffer","data":[10,159,2,8,1,16,236,237,18,24,220,144,194,47,32,1,42,143,2,18,2,252,58,26,26,8,236,237,18,18,20,213,95,84,173,169,237,255,129,27,48,164,117,161,122,151,217,188,192,107,53,34,233,1,240,159,151,179,239,184,143,32,89,111,117,114,32,118,101,114,105,102,105,97,98,108,101,32,112,111,108,108,32,102,114,97,109,101,32,104,97,115,32,98,101,101,110,32,99,114,101,97,116,101,100,32,117,115,105,110,103,32,102,97,114,99,97,115,116,101,114,46,118,111,116,101,32,38,32,10,10,78,111,119,32,99,111,112,121,32,38,32,112,97,115,116,101,32,116,104,101,32,85,82,76,32,111,102,32,116,104,101,32,102,114,97,109,101,32,105,110,116,111,32,97,32,67,97,115,116,32,116,111,32,115,104,97,114,101,32,121,111,117,114,32,112,111,108,108,32,119,105,116,104,32,111,116,104,101,114,115,33,10,10,240,159,145,137,32,104,116,116,112,58,47,47,108,111,99,97,108,104,111,115,116,58,56,56,56,56,47,99,239,191,189,123,233,143,128,239,191,189,111,52,34,239,191,189,4,67,239,191,189,125,106,62,239,191,189,239,191,189,52,52,78,106,18,20,239,191,189,16,0,0,0,0,3,42,1,75,18,20,70,102,216,90,36,204,80,61,140,41,162,213,121,38,143,167,45,175,65,247,24,1,34,64,232,35,251,242,104,40,147,12,249,126,230,214,80,93,49,161,61,78,6,174,3,17,133,0,232,63,27,11,49,145,203,123,158,220,71,53,217,181,238,94,40,252,66,34,178,7,77,132,69,249,255,225,19,177,189,227,26,148,158,118,249,77,220,1,40,1,50,32,198,36,35,214,47,105,222,85,224,93,35,170,31,21,82,202,187,3,9,109,33,141,176,224,93,158,65,246,131,134,112,81]},"operation":"merge"}

The byte sequence error is already addressed here: https://github.com/farcasterxyz/hub-monorepo/issues/1823

I'm not sure if the new shuttle fixes this @sanjayprabhu probably has a good take

sds commented 3 months ago

Hey all, we've officially deprecated the Replicator due to issues such as this one. We recommend you have a look at the Shuttle package if you want to build a service that ingests messages from hubs into another data source. Thanks!