We are conducting a screening of ~60,000 records in colandrapp, but are finding that the 'unscreened' list often does not load. In addition, sometimes the include / exclude requests start failing (timeout / socket hangups), and we have to wait 10 - 30 minutes for the system to recover the ability to submit decisions.
A typical error when loading the unscreened page is below:
{
"message": "500 - {\"errors\":\"(psycopg2.errors.OutOfMemory) could not resize shared memory segment \\\"/PostgreSQL.39801251\\\" to 8388608 bytes: Cannot allocate memory\\n [SQL: \\\"SELECT studies.id AS studies_id, studies.created_at AS studies_created_at, studies.last_updated AS studies_last_updated, studies.user_id AS studies_user_id, studies.review_id AS studies_review_id, studies.tags AS studies_tags, studies.data_source_id AS studies_data_source_id, studies.dedupe_status AS studies_dedupe_status, studies.citation_status AS studies_citation_status, studies.fulltext_status AS studies_fulltext_status, studies.data_extraction_status AS studies_data_extraction_status, dedupes_1.id AS dedupes_1_id, dedupes_1.created_at AS dedupes_1_created_at, dedupes_1.review_id AS dedupes_1_review_id, dedupes_1.duplicate_of AS dedupes_1_duplicate_of, dedupes_1.duplicate_score AS dedupes_1_duplicate_score, citations_1.id AS citations_1_id, citations_1.created_at AS citations_1_created_at, citations_1.last_updated AS citations_1_last_updated, citations_1.review_id AS citations_1_review_id, citations_1.type_of_work AS citations_1_type_of_work, citations_1.title AS citations_1_title, citations_1.secondary_title AS citations_1_secondary_title, citations_1.abstract AS citations_1_abstract, citations_1.pub_year AS citations_1_pub_year, citations_1.pub_month AS citations_1_pub_month, citations_1.authors AS citations_1_authors, citations_1.keywords AS citations_1_keywords, citations_1.type_of_reference AS citations_1_type_of_reference, citations_1.journal_name AS citations_1_journal_name, citations_1.volume AS citations_1_volume, citations_1.issue_number AS citations_1_issue_number, citations_1.doi AS citations_1_doi, citations_1.issn AS citations_1_issn, citations_1.publisher AS citations_1_publisher, citations_1.language AS citations_1_language, citations_1.other_fields AS citations_1_other_fields, citations_1.text_content_vector_rep AS citations_1_text_content_vector_rep, fulltexts_1.id AS fulltexts_1_id, fulltexts_1.created_at AS fulltexts_1_created_at, fulltexts_1.last_updated AS fulltexts_1_last_updated, fulltexts_1.review_id AS fulltexts_1_review_id, fulltexts_1.filename AS fulltexts_1_filename, fulltexts_1.original_filename AS fulltexts_1_original_filename, fulltexts_1.text_content AS fulltexts_1_text_content, fulltexts_1.text_content_vector_rep AS fulltexts_1_text_content_vector_rep, data_extractions_1.id AS data_extractions_1_id, data_extractions_1.created_at AS data_extractions_1_created_at, data_extractions_1.last_updated AS data_extractions_1_last_updated, data_extractions_1.review_id AS data_extractions_1_review_id, data_extractions_1.extracted_items AS data_extractions_1_extracted_items \\\\nFROM studies JOIN citations ON citations.id = studies.id LEFT OUTER JOIN dedupes AS dedupes_1 ON studies.id = dedupes_1.id LEFT OUTER JOIN citations AS citations_1 ON studies.id = citations_1.id LEFT OUTER JOIN fulltexts AS fulltexts_1 ON studies.id = fulltexts_1.id LEFT OUTER JOIN data_extractions AS data_extractions_1 ON studies.id = data_extractions_1.id \\\\nWHERE %(param_1)s = studies.review_id AND studies.id IN (\\\\n SELECT t.id\\\\n FROM (SELECT\\\\n studies.id,\\\\n studies.dedupe_status,\\\\n studies.citation_status,\\\\n screenings.user_ids\\\\n FROM studies\\\\n LEFT JOIN (SELECT citation_id, ARRAY_AGG(user_id) AS user_ids\\\\n FROM citation_screenings\\\\n GROUP BY citation_id\\\\n ) AS screenings\\\\n ON studies.id = screenings.citation_id\\\\n ) AS t\\\\n WHERE\\\\n t.dedupe_status = 'not_duplicate' -- this is necessary!\\\\n AND t.citation_status NOT IN ('excluded', 'included', 'conflict')\\\\n AND (t.citation_status = 'not_screened' OR NOT 6672 = ANY(t.user_ids))\\\\n ) ORDER BY random() \\\\n LIMIT %(param_2)s\\\"] [parameters: {'param_1': [removed for github], 'param_2': 1000}] (Background on this error at: http://sqlalche.me/e/e3q8)\"}",
"error": {
"name": "StatusCodeError",
"statusCode": 500,
"message": "500 - {\"errors\":\"(psycopg2.errors.OutOfMemory) could not resize shared memory segment \\\"/PostgreSQL.39801251\\\" to 8388608 bytes: Cannot allocate memory\\n [SQL: \\\"SELECT studies.id AS studies_id, studies.created_at AS studies_created_at, studies.last_updated AS studies_last_updated, studies.user_id AS studies_user_id, studies.review_id AS studies_review_id, studies.tags AS studies_tags, studies.data_source_id AS studies_data_source_id, studies.dedupe_status AS studies_dedupe_status, studies.citation_status AS studies_citation_status, studies.fulltext_status AS studies_fulltext_status, studies.data_extraction_status AS studies_data_extraction_status, dedupes_1.id AS dedupes_1_id, dedupes_1.created_at AS dedupes_1_created_at, dedupes_1.review_id AS dedupes_1_review_id, dedupes_1.duplicate_of AS dedupes_1_duplicate_of, dedupes_1.duplicate_score AS dedupes_1_duplicate_score, citations_1.id AS citations_1_id, citations_1.created_at AS citations_1_created_at, citations_1.last_updated AS citations_1_last_updated, citations_1.review_id AS citations_1_review_id, citations_1.type_of_work AS citations_1_type_of_work, citations_1.title AS citations_1_title, citations_1.secondary_title AS citations_1_secondary_title, citations_1.abstract AS citations_1_abstract, citations_1.pub_year AS citations_1_pub_year, citations_1.pub_month AS citations_1_pub_month, citations_1.authors AS citations_1_authors, citations_1.keywords AS citations_1_keywords, citations_1.type_of_reference AS citations_1_type_of_reference, citations_1.journal_name AS citations_1_journal_name, citations_1.volume AS citations_1_volume, citations_1.issue_number AS citations_1_issue_number, citations_1.doi AS citations_1_doi, citations_1.issn AS citations_1_issn, citations_1.publisher AS citations_1_publisher, citations_1.language AS citations_1_language, citations_1.other_fields AS citations_1_other_fields, citations_1.text_content_vector_rep AS citations_1_text_content_vector_rep, fulltexts_1.id AS fulltexts_1_id, fulltexts_1.created_at AS fulltexts_1_created_at, fulltexts_1.last_updated AS fulltexts_1_last_updated, fulltexts_1.review_id AS fulltexts_1_review_id, fulltexts_1.filename AS fulltexts_1_filename, fulltexts_1.original_filename AS fulltexts_1_original_filename, fulltexts_1.text_content AS fulltexts_1_text_content, fulltexts_1.text_content_vector_rep AS fulltexts_1_text_content_vector_rep, data_extractions_1.id AS data_extractions_1_id, data_extractions_1.created_at AS data_extractions_1_created_at, data_extractions_1.last_updated AS data_extractions_1_last_updated, data_extractions_1.review_id AS data_extractions_1_review_id, data_extractions_1.extracted_items AS data_extractions_1_extracted_items \\\\nFROM studies JOIN citations ON citations.id = studies.id LEFT OUTER JOIN dedupes AS dedupes_1 ON studies.id = dedupes_1.id LEFT OUTER JOIN citations AS citations_1 ON studies.id = citations_1.id LEFT OUTER JOIN fulltexts AS fulltexts_1 ON studies.id = fulltexts_1.id LEFT OUTER JOIN data_extractions AS data_extractions_1 ON studies.id = data_extractions_1.id \\\\nWHERE %(param_1)s = studies.review_id AND studies.id IN (\\\\n SELECT t.id\\\\n FROM (SELECT\\\\n studies.id,\\\\n studies.dedupe_status,\\\\n studies.citation_status,\\\\n screenings.user_ids\\\\n FROM studies\\\\n LEFT JOIN (SELECT citation_id, ARRAY_AGG(user_id) AS user_ids\\\\n FROM citation_screenings\\\\n GROUP BY citation_id\\\\n ) AS screenings\\\\n ON studies.id = screenings.citation_id\\\\n ) AS t\\\\n WHERE\\\\n t.dedupe_status = 'not_duplicate' -- this is necessary!\\\\n AND t.citation_status NOT IN ('excluded', 'included', 'conflict')\\\\n AND (t.citation_status = 'not_screened' OR NOT 6672 = ANY(t.user_ids))\\\\n ) ORDER BY random() \\\\n LIMIT %(param_2)s\\\"] [parameters: {'param_1': [removed for github], 'param_2': 1000}] (Background on this error at: http://sqlalche.me/e/e3q8)\"}",
"error": {
"errors": "(psycopg2.errors.OutOfMemory) could not resize shared memory segment \"/PostgreSQL.39801251\" to 8388608 bytes: Cannot allocate memory\n [SQL: \"SELECT studies.id AS studies_id, studies.created_at AS studies_created_at, studies.last_updated AS studies_last_updated, studies.user_id AS studies_user_id, studies.review_id AS studies_review_id, studies.tags AS studies_tags, studies.data_source_id AS studies_data_source_id, studies.dedupe_status AS studies_dedupe_status, studies.citation_status AS studies_citation_status, studies.fulltext_status AS studies_fulltext_status, studies.data_extraction_status AS studies_data_extraction_status, dedupes_1.id AS dedupes_1_id, dedupes_1.created_at AS dedupes_1_created_at, dedupes_1.review_id AS dedupes_1_review_id, dedupes_1.duplicate_of AS dedupes_1_duplicate_of, dedupes_1.duplicate_score AS dedupes_1_duplicate_score, citations_1.id AS citations_1_id, citations_1.created_at AS citations_1_created_at, citations_1.last_updated AS citations_1_last_updated, citations_1.review_id AS citations_1_review_id, citations_1.type_of_work AS citations_1_type_of_work, citations_1.title AS citations_1_title, citations_1.secondary_title AS citations_1_secondary_title, citations_1.abstract AS citations_1_abstract, citations_1.pub_year AS citations_1_pub_year, citations_1.pub_month AS citations_1_pub_month, citations_1.authors AS citations_1_authors, citations_1.keywords AS citations_1_keywords, citations_1.type_of_reference AS citations_1_type_of_reference, citations_1.journal_name AS citations_1_journal_name, citations_1.volume AS citations_1_volume, citations_1.issue_number AS citations_1_issue_number, citations_1.doi AS citations_1_doi, citations_1.issn AS citations_1_issn, citations_1.publisher AS citations_1_publisher, citations_1.language AS citations_1_language, citations_1.other_fields AS citations_1_other_fields, citations_1.text_content_vector_rep AS citations_1_text_content_vector_rep, fulltexts_1.id AS fulltexts_1_id, fulltexts_1.created_at AS fulltexts_1_created_at, fulltexts_1.last_updated AS fulltexts_1_last_updated, fulltexts_1.review_id AS fulltexts_1_review_id, fulltexts_1.filename AS fulltexts_1_filename, fulltexts_1.original_filename AS fulltexts_1_original_filename, fulltexts_1.text_content AS fulltexts_1_text_content, fulltexts_1.text_content_vector_rep AS fulltexts_1_text_content_vector_rep, data_extractions_1.id AS data_extractions_1_id, data_extractions_1.created_at AS data_extractions_1_created_at, data_extractions_1.last_updated AS data_extractions_1_last_updated, data_extractions_1.review_id AS data_extractions_1_review_id, data_extractions_1.extracted_items AS data_extractions_1_extracted_items \\nFROM studies JOIN citations ON citations.id = studies.id LEFT OUTER JOIN dedupes AS dedupes_1 ON studies.id = dedupes_1.id LEFT OUTER JOIN citations AS citations_1 ON studies.id = citations_1.id LEFT OUTER JOIN fulltexts AS fulltexts_1 ON studies.id = fulltexts_1.id LEFT OUTER JOIN data_extractions AS data_extractions_1 ON studies.id = data_extractions_1.id \\nWHERE %(param_1)s = studies.review_id AND studies.id IN (\\n SELECT t.id\\n FROM (SELECT\\n studies.id,\\n studies.dedupe_status,\\n studies.citation_status,\\n screenings.user_ids\\n FROM studies\\n LEFT JOIN (SELECT citation_id, ARRAY_AGG(user_id) AS user_ids\\n FROM citation_screenings\\n GROUP BY citation_id\\n ) AS screenings\\n ON studies.id = screenings.citation_id\\n ) AS t\\n WHERE\\n t.dedupe_status = 'not_duplicate' -- this is necessary!\\n AND t.citation_status NOT IN ('excluded', 'included', 'conflict')\\n AND (t.citation_status = 'not_screened' OR NOT 6672 = ANY(t.user_ids))\\n ) ORDER BY random() \\n LIMIT %(param_2)s\"] [parameters: {'param_1': [removed for github], 'param_2': 1000}] (Background on this error at: http://sqlalche.me/e/e3q8)"
},
"options": {
"uri": "http://localhost:5000/api/studies",
"json": true,
"auth": {
"user": "[removed for github]"
},
"qs": {
"review_id": "[removed for github]",
"fields": "id,citation.title,citation.authors,citation.journal_name,citation.pub_year,citation.abstract,citation.keywords,citation.screenings,citation_status,tags",
"citation_status": "pending",
"order_by": "relevance",
"page": 0,
"per_page": 100
},
"simple": true,
"resolveWithFullResponse": false,
"transform2xxOnly": false
},
"response": {
"statusCode": 500,
"body": {
"errors": "(psycopg2.errors.OutOfMemory) could not resize shared memory segment \"/PostgreSQL.39801251\" to 8388608 bytes: Cannot allocate memory\n [SQL: \"SELECT studies.id AS studies_id, studies.created_at AS studies_created_at, studies.last_updated AS studies_last_updated, studies.user_id AS studies_user_id, studies.review_id AS studies_review_id, studies.tags AS studies_tags, studies.data_source_id AS studies_data_source_id, studies.dedupe_status AS studies_dedupe_status, studies.citation_status AS studies_citation_status, studies.fulltext_status AS studies_fulltext_status, studies.data_extraction_status AS studies_data_extraction_status, dedupes_1.id AS dedupes_1_id, dedupes_1.created_at AS dedupes_1_created_at, dedupes_1.review_id AS dedupes_1_review_id, dedupes_1.duplicate_of AS dedupes_1_duplicate_of, dedupes_1.duplicate_score AS dedupes_1_duplicate_score, citations_1.id AS citations_1_id, citations_1.created_at AS citations_1_created_at, citations_1.last_updated AS citations_1_last_updated, citations_1.review_id AS citations_1_review_id, citations_1.type_of_work AS citations_1_type_of_work, citations_1.title AS citations_1_title, citations_1.secondary_title AS citations_1_secondary_title, citations_1.abstract AS citations_1_abstract, citations_1.pub_year AS citations_1_pub_year, citations_1.pub_month AS citations_1_pub_month, citations_1.authors AS citations_1_authors, citations_1.keywords AS citations_1_keywords, citations_1.type_of_reference AS citations_1_type_of_reference, citations_1.journal_name AS citations_1_journal_name, citations_1.volume AS citations_1_volume, citations_1.issue_number AS citations_1_issue_number, citations_1.doi AS citations_1_doi, citations_1.issn AS citations_1_issn, citations_1.publisher AS citations_1_publisher, citations_1.language AS citations_1_language, citations_1.other_fields AS citations_1_other_fields, citations_1.text_content_vector_rep AS citations_1_text_content_vector_rep, fulltexts_1.id AS fulltexts_1_id, fulltexts_1.created_at AS fulltexts_1_created_at, fulltexts_1.last_updated AS fulltexts_1_last_updated, fulltexts_1.review_id AS fulltexts_1_review_id, fulltexts_1.filename AS fulltexts_1_filename, fulltexts_1.original_filename AS fulltexts_1_original_filename, fulltexts_1.text_content AS fulltexts_1_text_content, fulltexts_1.text_content_vector_rep AS fulltexts_1_text_content_vector_rep, data_extractions_1.id AS data_extractions_1_id, data_extractions_1.created_at AS data_extractions_1_created_at, data_extractions_1.last_updated AS data_extractions_1_last_updated, data_extractions_1.review_id AS data_extractions_1_review_id, data_extractions_1.extracted_items AS data_extractions_1_extracted_items \\nFROM studies JOIN citations ON citations.id = studies.id LEFT OUTER JOIN dedupes AS dedupes_1 ON studies.id = dedupes_1.id LEFT OUTER JOIN citations AS citations_1 ON studies.id = citations_1.id LEFT OUTER JOIN fulltexts AS fulltexts_1 ON studies.id = fulltexts_1.id LEFT OUTER JOIN data_extractions AS data_extractions_1 ON studies.id = data_extractions_1.id \\nWHERE %(param_1)s = studies.review_id AND studies.id IN (\\n SELECT t.id\\n FROM (SELECT\\n studies.id,\\n studies.dedupe_status,\\n studies.citation_status,\\n screenings.user_ids\\n FROM studies\\n LEFT JOIN (SELECT citation_id, ARRAY_AGG(user_id) AS user_ids\\n FROM citation_screenings\\n GROUP BY citation_id\\n ) AS screenings\\n ON studies.id = screenings.citation_id\\n ) AS t\\n WHERE\\n t.dedupe_status = 'not_duplicate' -- this is necessary!\\n AND t.citation_status NOT IN ('excluded', 'included', 'conflict')\\n AND (t.citation_status = 'not_screened' OR NOT 6672 = ANY(t.user_ids))\\n ) ORDER BY random() \\n LIMIT %(param_2)s\"] [parameters: {'param_1': [removed for github], 'param_2': 1000}] (Background on this error at: http://sqlalche.me/e/e3q8)"
},
"headers": {
"server": "gunicorn/19.6.0",
"date": "Tue, 08 Mar 2022 11:08:35 GMT",
"connection": "close",
"content-type": "application/json",
"content-length": "4292"
},
"request": {
"uri": {
"protocol": "http:",
"slashes": true,
"auth": null,
"host": "localhost:5000",
"port": "5000",
"hostname": "localhost",
"hash": null,
"search": "?review_id=[removed for github]&fields=id%2Ccitation.title%2Ccitation.authors%2Ccitation.journal_name%2Ccitation.pub_year%2Ccitation.abstract%2Ccitation.keywords%2Ccitation.screenings%2Ccitation_status%2Ctags&citation_status=pending&order_by=relevance&page=0&per_page=100",
"query": "review_id=[removed for github]&fields=id%2Ccitation.title%2Ccitation.authors%2Ccitation.journal_name%2Ccitation.pub_year%2Ccitation.abstract%2Ccitation.keywords%2Ccitation.screenings%2Ccitation_status%2Ctags&citation_status=pending&order_by=relevance&page=0&per_page=100",
"pathname": "/api/studies",
"path": "/api/studies?review_id=[removed for github]&fields=id%2Ccitation.title%2Ccitation.authors%2Ccitation.journal_name%2Ccitation.pub_year%2Ccitation.abstract%2Ccitation.keywords%2Ccitation.screenings%2Ccitation_status%2Ctags&citation_status=pending&order_by=relevance&page=0&per_page=100",
"href": "http://localhost:5000/api/studies?review_id=[removed for github]&fields=id%2Ccitation.title%2Ccitation.authors%2Ccitation.journal_name%2Ccitation.pub_year%2Ccitation.abstract%2Ccitation.keywords%2Ccitation.screenings%2Ccitation_status%2Ctags&citation_status=pending&order_by=relevance&page=0&per_page=100"
},
"method": "GET",
"headers": {
"authorization": "{I've removed this for security reasons}",
"accept": "application/json"
}
}
}
}
}
We are conducting a screening of ~60,000 records in colandrapp, but are finding that the 'unscreened' list often does not load. In addition, sometimes the include / exclude requests start failing (timeout / socket hangups), and we have to wait 10 - 30 minutes for the system to recover the ability to submit decisions.
A typical error when loading the unscreened page is below:
Thanks for any help with these errors.