open-xml-templating / docxtemplater

Generate docx, pptx, and xlsx from templates (Word, Powerpoint and Excel documents), from Node.js or the browser. Demo: https://www.docxtemplater.com/demo. #docx #office #generator #templating #report #json #generate #generation #template #create #pptx #docx #xlsx #react #vuejs #angularjs #browser #typescript #image #html #table #chart
https://www.docxtemplater.com
Other
2.93k stars 341 forks source link

Doc is corrupted when ran in Lambda #711

Closed pake-perez closed 1 year ago

pake-perez commented 1 year ago

Environment

How to reproduce my problem :

My template is the following : fotomulta.zip

With the following js files :

document.js


let Document = {
  readTemplate: (docName) => {
    let defer = q.defer();
    let templatesFolderPath = path.resolve('./' + process.env.TEMPLATES_PATH);
    // Load the docx file as binary content
    fs.readFile(`${templatesFolderPath}/${docName}`, 'binary', (err, data) => {
      if (err) {
        defer.reject(err);
        return;
      }
       const zip = new pizzip(data);
       doc = new docxHandler(zip, {
         paragraphLoop: true,
         linebreaks: true,
       });
       defer.resolve(doc);
    });
        return defer.promise;
  },
};
module.exports = Document;

Main function

const document = require('./document.js');
const myFunc = async (req, res) => {
    let name = req.body.name;
    let plate = req.body.plate;
    let model = req.body.model;
    try {
      let doc = await document.readTemplate(templateNames.fotoMulta);
      doc.render({
        name: name,
        date: new Date().toDateString(),
        plate: plate,
        car_model: model,
      });
      const buf = doc.getZip().generate({
        type: 'nodebuffer',
      });

      let fileName = `${name}-${plate}-${model}-${new Date().getTime()}`;
      //Set headers to send a docx file
      res.set({
        'Content-Type': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
        'Content-Disposition': `attachment; filename=${fileName}`,
        'Content-Length': buf.length,
      });
      res.end(buf);
    } catch (error) {
      console.error(error);
      res.status(500).send('There was an internal error');
    }
},
}

I would expect it to :

Correct file image

Incorrect file image

there are more coincidences along the whole response.

I hope somebody can help, thank you in advance!

edi9999 commented 1 year ago

Hello, interesting.

I don't think I have ever seen this before.

Could you please send me the two generated docx for me to analyze the differences ?

pake-perez commented 1 year ago

Hi @edi9999 Sure thank you for the quick answer.

Here I leave both files, they were created with the exact same code correct_file.zip wrong_file.zip

edi9999 commented 1 year ago

Hi, I did a bit of research :

https://forum.serverless.com/t/error-handling-binary-files-with-an-express-app-deployed-to-aws-lambda/8336

Here's a copy of that site for future reference :

Hello,

I'm porting an old existing Express app to AWS Lambda. The app just contains 1 endpoint, and the operation is really simple:

  1. Receives a docx file via POST request
  2. Manipulates the content of the file
  3. Sends a modified copy of the docx file to the sender.

That's it. It works as a standalone app in a VPS. But it fails when deployed to lambda AWS using serverless. The received file is corrupt.

This is a simplified version of the app.js file

var express = require('serverless-express/express');
var formidable = require('formidable');
var fs = require('fs');
var JSZip = require('JSZip');
var Docxtemplater = require('docxtemplater');
var handler = require('serverless-express/handler');

// Instance express
var app = express();

app.post('/', function (req, res, next) {

    var form = new formidable.IncomingForm();

    // Callback to answer inconming POST request
    form.parse(req, function (err, fields, files) {
        // File sent
        uploaded_path = files.file.path;

        fs.readFile(uploaded_path, function (err, data) {

           // Create zip file with the data. It fails here in AWS Lambda!
           var zip = new JSZip(data);

           // Pass the zip file to Docxtemplater to do stuff...
           var doc = new Docxtemplater();
           doc.loadZip(zip);

           // More stuff here... whatever
        }
    }
}

module.exports.handler = handler(app);

And here the serverless.yml file

service: myservice

plugins:
  - serverless-apigw-binary
  - serverless-offline
  - serverless-express

custom:
  apigwBinary:
    types:
      - 'application/vnd.openxmlformats-officedocument.wordprocessingml.document'

provider:
  name: aws
  runtime: nodejs8.10
  stage: dev
  region: eu-west-3
  role: myrole
functions:
  app:
    handler: app.handler
    events:
      - http: GET /
      - http: POST /
      - http: 'ANY {proxy+}'

The app works just fine as standalone app, as said. The file sent via POST is properly read and passed to Docxtemplater library. But in AWS Lambda, the file is always corrupt. Not only with docx files. Any file sent is detected as corrupt.

This is the error message when calling new JSZip(data)

End of data reached (data length = 403921, asked index = 70664704). Corrupted zip ?

My guess: the file is not being sent as binary. But I don't know how to configure serverless to tell lambda to do it.

Response 1 :

Hi, have you checked if serverless-apigw-binary added necessary binary types? I would recommend to open your gateway console and take a look. https://take.ms/OXzz7 77

API+Gateway+2019-05-22+01-13-00

can be useful as well https://docs.aws.amazon.com/apigateway/latest/developerguide/api-gateway-payload-encodings.html 103

Response 2 :

Hi,

Yes. Checked. There was missing binary types. Specifically: application/x-www-form-urlencoded

Now it seems to work. Thanks!

pake-perez commented 1 year ago

Hi @edi9999 thank you so much for this, at first glance I thought it could had been something like that and I tried adding the docx and x-www-form-urlencoded binary types, but the thing here is that I am not sending the file to lambda, it is reading it from the same folder system.

Then the ZIP file is being created and sent as application/vnd.openxmlformats-officedocument.wordprocessingml.document I was thinking that maybe the zip generation within Lambda creates that difference but I haven't been able to prove it. I just know that the file is being read exactly the same.

Any other idea? :S

edi9999 commented 1 year ago

I think for sure it has nothing to do with docxtemplater.

Docxtemplater does not read the ENV variables, and works in all supported node versions.

So it has something to do with how lambda allows to generate binary files.

I think you could change the following (dropping docxtemplater completely), and still have the bug :

let Document = {
  readTemplate: (docName) => {
    let defer = q.defer();
    let templatesFolderPath = path.resolve('./' + process.env.TEMPLATES_PATH);
    // Load the docx file as binary content
    fs.readFile(`${templatesFolderPath}/${docName}`, 'binary', (err, data) => {
      if (err) {
        defer.reject(err);
        return;
      }
       const zip = new pizzip(data);
       defer.resolve(zip);
    });
        return defer.promise;
  },
};
module.exports = Document;

const document = require('./document.js');
const myFunc = async (req, res) => {
    let name = req.body.name;
    let plate = req.body.plate;
    let model = req.body.model;
    try {
      let zip = await document.readTemplate(templateNames.fotoMulta);
      const buf = zip.generate({
        type: 'nodebuffer',
      });

      let fileName = `${name}-${plate}-${model}-${new Date().getTime()}`;
      //Set headers to send a docx file
      res.set({
        'Content-Type': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
        'Content-Disposition': `attachment; filename=${fileName}`,
        'Content-Length': buf.length,
      });
      res.end(buf);
    } catch (error) {
      console.error(error);
      res.status(500).send('There was an internal error');
    }
},
}

It must have something to do with AWS lambda

edi9999 commented 1 year ago

I'm closing this issue since I don't think this is something that can be fixed by me, if you have any other info, don't hesitate to write back

edi9999 commented 1 year ago

Did you find a solution @pake-perez ?