NRGI / rgi-assessment-tool

MEAN build of RGI 2015 assessment tool
MIT License
5 stars 1 forks source link

Corrupted files without source #540

Open alexander-elgin opened 7 years ago

alexander-elgin commented 7 years ago

The following files are corrupted. No source provided for them

s3_url: https://s3.amazonaws.com/rgi-documents/0e249d24c4e8dc7e3a236a9a7a5a17aa453c5661.pdf answers: [AU-2016-MI-079, AU-2016-MI-080, AU-2016-MI-081, AU-2016-MI-144, AU-2016-MI-145, AU-2016-MI-129, AU-2016-MI-130]

s3_url: https://s3.amazonaws.com/rgi-documents/43b06b88fa560b7892a81d4310a42ab581351410.pdf answers: [MY-2016-HY-096, MY-2016-HY-042]

s3_url: https://s3.amazonaws.com/rgi-documents/ca919c2160e33b7c8c12151e14bdd7451168218c.pdf answers: [AO-2016-HY-015]

s3_url: https://s3.amazonaws.com/rgi-documents/ff58025f4d762db1445e7df7dc7c44a332393709.pdf answers: [GN-2016-MI-137]

s3_url: https://s3.amazonaws.com/rgi-documents/8594716c9154a56bf49d8e7a55deb20bf913447a.pdf answers: [VE-2016-HY-098, VE-2016-HY-063]

s3_url: https://s3.amazonaws.com/rgi-documents/b92c98f5c05604a38cf7301d70c457172ccc65ef.pdf answers: [MA-2016-MI-099]

s3_url: https://s3.amazonaws.com/rgi-documents/fe04aa096c068822f5e565d370368bb41743b2b5.pdf answers: [TD-2016-HY-142, TD-2016-HY-165]

s3_url: https://s3.amazonaws.com/rgi-documents/8744d5b34fa2ca5e099ffe4ddb03ebb8630a9e86.pdf answers: [AO-2016-HY-001]

s3_url: https://s3.amazonaws.com/rgi-documents/9a189719d8be4cc7055f3a007a95a5137491a942.pdf answers: [ET-2016-MI-073, ET-2016-MI-075]

s3_url: https://s3.amazonaws.com/rgi-documents/30de7a4e5657969454c0874acdc3ceecfe35fda7.pdf answers: [ET-2016-MI-074, ET-2016-MI-076]

s3_url: https://s3.amazonaws.com/rgi-documents/867bebe4a1ef7004ae931d80fa114c891c9edb44.pdf answers: [SS-2016-HY-015, SS-2016-HY-022, SS-2016-HY-067, SS-2016-HY-048, SS-2016-HY-024]

s3_url: https://s3.amazonaws.com/rgi-documents/00c30d0206f4588e5a63d127d0681f489005f298.pdf answers: [PG-2016-MI-016, PG-2016-MI-028, PG-2016-MI-034, PG-2016-MI-044, PG-2016-MI-031, PG-2016-MI-076, PG-2016-MI-133, PG-2016-MI-029]

s3_url: https://s3.amazonaws.com/rgi-documents/a054c977f80fd2c6738246389b05de4fde2fbbc1.pdf answers: [BW-2016-MI-101]

s3_url: https://s3.amazonaws.com/rgi-documents/b5a90857d3f17e36b8a672c225b68a4fcc78ac03.pdf answers: [MY-2016-HY-079, MY-2016-HY-048, MY-2016-HY-028, MY-2016-HY-132, MY-2016-HY-030, MY-2016-HY-032, MY-2016-HY-107, MY-2016-HY-134]

s3_url: https://s3.amazonaws.com/'rgi-documents'/dbfa93c2d74f30635bd28ffd6ac78b7ccb505fa0.pdf answers: [LY-2016-HY-151, LY-2016-HY-153]

s3_url: https://s3.amazonaws.com/'rgi-documents'/4a85b0a881f70694e53cec390aee415ccbf6cf94.pdf answers: [ZW-2016-MI-059, ZW-2016-MI-061]

s3_url: https://s3.amazonaws.com/'rgi-documents'/7acc685f39744c5131a6eaa043fc84912f970eab.pdf answers: [TD-2016-HY-050, TD-2016-HY-063]

s3_url: https://s3.amazonaws.com/'rgi-documents'/34163cca602b7ee71e2b8dcc4469dedd0d70d2f5.docx answers: [VN-2016-HY-056, VN-2016-HY-057, VN-2016-HY-061, VN-2016-HY-107]

s3_url: https://s3.amazonaws.com/'rgi-documents'/5a5d9ce3bcc6b308e97e00923e45d492a87924ee.pdf answers: [VN-2016-HY-056]

s3_url: https://s3.amazonaws.com/'rgi-documents'/d3be37b05a01dd827409d9a8dfc35eda5bca6faa.doc answers: [GN-2016-MI-104, GN-2016-MI-105, GN-2016-MI-155, GN-2016-MI-132]

s3_url: https://s3.amazonaws.com/rgi-documents/66027620352fb931b91538cdaef9378094a275f7.pdf answers: []

s3_url: https://s3.amazonaws.com/rgi-documents/31583c0839604e1fa4794c11a614674625e3e969.pdf answers: []

s3_url: https://s3.amazonaws.com/rgi-documents/77cca9148af68895e6cc696f8de4a68e87fcae70.pdf answers: []

s3_url: https://s3.amazonaws.com/rgi-documents/c81031dc51539562c235befc02b1e8ebd405ac45.pdf answers: []

s3_url: https://s3.amazonaws.com/rgi-documents/1cac5be4edc518177a91c1d538983b8790329a56.pdf answers: []

s3_url: https://s3.amazonaws.com/rgi-documents/f568490dabbb7607c76af7b3f39996f6be1d14cc.pdf answers: []

s3_url: https://s3.amazonaws.com/rgi-documents/8b6791bda80bfadcb789d1f695a33e06b0420132.pdf answers: []

s3_url: https://s3.amazonaws.com/rgi-documents/c19c170f550eb3062251588813c20fd46584ab6c.pdf answers: []

s3_url: https://s3.amazonaws.com/rgi-documents/1e98741672742c3d3f5652977ff5cd9a756ff0da.pdf answers: []

s3_url: https://s3.amazonaws.com/rgi-documents/4422568fed82d36e92b69a6cdcc9a8bbb3bb48e8.pdf answers: []

s3_url: https://s3.amazonaws.com/rgi-documents/4b12e0208d7bdf206cfeac1198b10e585daa46e3.pdf answers: []

s3_url: https://s3.amazonaws.com/rgi-documents/f7cb01b80d3f7f2520f7a575a012592b16aca5d1.pdf answers: []

s3_url: https://s3.amazonaws.com/rgi-documents/427b0ac7aeb3fc477f61a3a0fa7900ab17d6221c.pdf answers: []

s3_url: https://s3.amazonaws.com/rgi-documents/cfe1b80447db2157b19612fce6d7ce083b776877.pdf answers: []

s3_url: https://s3.amazonaws.com/rgi-documents/7af99af1bca0494f4dd0bc6654b1e3122c3972d9.pdf answers: []

s3_url: https://s3.amazonaws.com/rgi-documents/7b2a5a4fdf671e7d9a0375544484580c2a75b2ba.pdf answers: []

s3_url: https://s3.amazonaws.com/rgi-documents/9dd8b24a1fd00a3b34eaf6801164f60d3d4a580b.pdf answers: []

s3_url: https://s3.amazonaws.com/rgi-documents/a233a31fb5766ba849ff28bc3e0c1fcb2dc2743e.pdf answers: []

s3_url: https://s3.amazonaws.com/rgi-documents/b60bb14e2af321e3ebddd192d25469715d68629a.pdf answers: []

s3_url: https://s3.amazonaws.com/rgi-documents/ae56e56fd0a2572232e62308f8ef6acf4bbe108c.pdf answers: []

s3_url: https://s3.amazonaws.com/rgi-documents/dc9df2f5c1d8e5978cb2140db39c7afe997bb081.pdf answers: []

s3_url: https://s3.amazonaws.com/rgi-documents/ed34b65d23091aefe231d26d0de0e99ea96d619f.pdf answers: []

s3_url: https://s3.amazonaws.com/rgi-documents/28b0d21a4e9aa29e7c5d8ff74f6ea5f4441f1beb.pdf answers: []

s3_url: https://s3.amazonaws.com/rgi-documents/39b978180090353187ab4e6ce414432e242a83b2.pdf answers: []

s3_url: https://s3.amazonaws.com/rgi-documents/ece8ad1489310e4dcb139960cd3875634b06c8a2.pdf answers: []

s3_url: https://s3.amazonaws.com/rgi-documents/9af02daaec89bfbf94d49905a3495e3dcd76626b.pdf answers: []

s3_url: https://s3.amazonaws.com/rgi-documents/1c2b45a88c8b7578653040d98040d8f1446105e7.pdf answers: []

s3_url: https://s3.amazonaws.com/rgi-documents/4f4a3f9bba5b8d58399a77cfb20a8a54fb6e4bd0.pdf answers: []

s3_url: https://s3.amazonaws.com/rgi-documents/504a40c60046af98d3bd642af6a820766d1fced2.pdf answers: []

s3_url: https://s3.amazonaws.com/rgi-documents/e92f23f1546fc235f6e31064f444f77ffe81ebcb.pdf answers: []

s3_url: https://s3.amazonaws.com/rgi-documents/535f507366002b22ec3c04b9ea1afbe7f893283d.pdf answers: []

s3_url: https://s3.amazonaws.com/rgi-documents/eeecdc6862db0f35a96a0128f790d390189011dc.pdf answers: []

s3_url: https://s3.amazonaws.com/rgi-documents/120e3fcd1c063aaa037cfdb1091827308db51c92.pdf answers: []

s3_url: https://s3.amazonaws.com/rgi-documents/9a009aa06cd4b2694533abb6bd4d534519cc6c6f.pdf answers: []

s3_url: https://s3.amazonaws.com/rgi-documents/be5233e5c0730da9c3c25a1b84ee905d7e3d2658.pdf answers: []

s3_url: https://s3.amazonaws.com/rgi-documents/8114b50f8ca4d8d819143912f1c68a112e2ed00a.png?dl=0 answers: []

s3_url: https://s3.amazonaws.com/rgi-documents/1b13ccfda99f3e7ce6c33de01ad7bd7f340daf65.pdf?openelement answers: []

s3_url: https://s3.amazonaws.com/rgi-documents/c0aa0198fac04d8ad7344f5d90bd1250d0c87671.pdf?openelement answers: []

alexander-elgin commented 7 years ago

s3_url: https://s3.amazonaws.com/rgi-documents/cfcd58323aaa4659a815da701e84e99f08266308.pdf answers: []

s3_url: https://s3.amazonaws.com/rgi-documents/4abd5fa142f8e93351c6a921999b82914f3fe9a0.pdf answers: []

iprunache commented 7 years ago

Both files have been imported as remote files and both are truncated. So no user upload was involved here as the app tries to download the files from the provided url.

But it seems that the user was notified that the import failed as both files were immediately re imported and that succeeded, see successful files below and a log sample that shows the re import. Perhaps we should delete the initial import if we can detect it failed.

https://s3.amazonaws.com/rgi-documents/cfcd58323aaa4659a815da701e84e99f08266308.pdf

https://s3.amazonaws.com/rgi-documents/4abd5fa142f8e93351c6a921999b82914f3fe9a0.pdf

Log sample:

{ _id: ObjectId("5809052a241fac001843ee28"), name: "docs", level: NumberLong("30"), msg: "UPLOAD A REMOTE FILE http://www.petroamazonas.gob.ec/wp-content/uploads/downloads/2015/03/INFORME-PAM-2014-para-web.pdf", time: ISODate("2016-10-20T17:55:54.993Z"), __v: NumberLong("0") }
{ _id: ObjectId("58090538241fac001843ee2a"), name: "docs", level: NumberLong("30"), msg: "copy the remote file http://www.petroamazonas.gob.ec/wp-content/uploads/downloads/2015/03/INFORME-PAM-2014-para-web.pdf to /tmp/57f74627e63ce000107bcb10-1476986168704.pdf", time: ISODate("2016-10-20T17:56:08.704Z"), __v: NumberLong("0") }
{ _id: ObjectId("58090541241fac001843ee2b"), name: "docs", level: NumberLong("30"), msg: "the remote file http://www.petroamazonas.gob.ec/wp-content/uploads/downloads/2015/03/INFORME-PAM-2014-para-web.pdf has been successfully to /tmp/57f74627e63ce000107bcb10-1476986168704.pdf", time: ISODate("2016-10-20T17:56:17.297Z"), __v: NumberLong("0") }
{ _id: ObjectId("58090541241fac001843ee2c"), name: "docs", level: NumberLong("30"), msg: "attempt to read the local file /tmp/57f74627e63ce000107bcb10-1476986168704.pdf", time: ISODate("2016-10-20T17:56:17.731Z"), __v: NumberLong("0") }
{ _id: ObjectId("58090542241fac001843ee2d"), name: "docs", level: NumberLong("30"), msg: "read successfully the local file /tmp/57f74627e63ce000107bcb10-1476986168704.pdf", time: ISODate("2016-10-20T17:56:18.156Z"), __v: NumberLong("0") }
{ _id: ObjectId("58090542241fac001843ee2e"), name: "docs", level: NumberLong("30"), msg: "search a document by the hash cfcd58323aaa4659a815da701e84e99f08266308 generated using the local file /tmp/57f74627e63ce000107bcb10-1476986168704.pdf", time: ISODate("2016-10-20T17:56:18.206Z"), __v: NumberLong("0") }
{ _id: ObjectId("58090542241fac001843ee2f"), name: "docs", level: NumberLong("30"), msg: "the hash cfcd58323aaa4659a815da701e84e99f08266308 is not found. Transfer the file to S3", time: ISODate("2016-10-20T17:56:18.250Z"), __v: NumberLong("0") }
{ _id: ObjectId("58090543241fac001843ee30"), name: "docs", level: NumberLong("30"), msg: "the file cfcd58323aaa4659a815da701e84e99f08266308.pdf has been transferred successfully.", time: ISODate("2016-10-20T17:56:19.038Z"), __v: NumberLong("0") }
{ _id: ObjectId("58090551c375fa001a165994"), name: "docs", level: NumberLong("30"), msg: "UPLOAD A REMOTE FILE http://www.petroamazonas.gob.ec/wp-content/uploads/downloads/2015/03/INFORME-PAM-2014-para-web.pdf", time: ISODate("2016-10-20T17:56:33.217Z"), __v: NumberLong("0") }
{ _id: ObjectId("58090554c375fa001a165996"), name: "docs", level: NumberLong("30"), msg: "copy the remote file http://www.petroamazonas.gob.ec/wp-content/uploads/downloads/2015/03/INFORME-PAM-2014-para-web.pdf to /tmp/57f74627e63ce000107bcb10-1476986196425.pdf", time: ISODate("2016-10-20T17:56:36.425Z"), __v: NumberLong("0") }
{ _id: ObjectId("5809055dc375fa001a165997"), name: "docs", level: NumberLong("30"), msg: "the remote file http://www.petroamazonas.gob.ec/wp-content/uploads/downloads/2015/03/INFORME-PAM-2014-para-web.pdf has been successfully to /tmp/57f74627e63ce000107bcb10-1476986196425.pdf", time: ISODate("2016-10-20T17:56:45.059Z"), __v: NumberLong("0") }
{ _id: ObjectId("5809055dc375fa001a165998"), name: "docs", level: NumberLong("30"), msg: "attempt to read the local file /tmp/57f74627e63ce000107bcb10-1476986196425.pdf", time: ISODate("2016-10-20T17:56:45.677Z"), __v: NumberLong("0") }
{ _id: ObjectId("5809055ec375fa001a165999"), name: "docs", level: NumberLong("30"), msg: "read successfully the local file /tmp/57f74627e63ce000107bcb10-1476986196425.pdf", time: ISODate("2016-10-20T17:56:46.302Z"), __v: NumberLong("0") }
{ _id: ObjectId("5809055ec375fa001a16599a"), name: "docs", level: NumberLong("30"), msg: "search a document by the hash 83d3700ebdb503960b77d4e378a70b4d147b00b6 generated using the local file /tmp/57f74627e63ce000107bcb10-1476986196425.pdf", time: ISODate("2016-10-20T17:56:46.424Z"), __v: NumberLong("0") }
{ _id: ObjectId("5809055ec375fa001a16599b"), name: "docs", level: NumberLong("30"), msg: "the hash 83d3700ebdb503960b77d4e378a70b4d147b00b6 is not found. Transfer the file to S3", time: ISODate("2016-10-20T17:56:46.462Z"), __v: NumberLong("0") }
{ _id: ObjectId("5809055fc375fa001a16599c"), name: "docs", level: NumberLong("30"), msg: "the file 83d3700ebdb503960b77d4e378a70b4d147b00b6.pdf has been transferred successfully.", time: ISODate("2016-10-20T17:56:47.770Z"), __v: NumberLong("0") }