unidoc / unipdf

Golang PDF library for creating and processing PDF files (pure go)
https://unidoc.io
Other
2.6k stars 255 forks source link

[BUG] Rendering of Page Blocks #95

Closed peterjiz closed 5 years ago

peterjiz commented 5 years ago

Description

Any attempts to scale, translate, or modify the source page block, results in a damaged copy of the pdf (headers and bullet points are missing from the destination pdf)

Expected Behavior

Expectation: intact copy of the source document with scaling/translation transformation applied

Actual Behavior

Header & bullet points & possibly other elements not rendered in destination pdf.

Attachments

Code: `c := creator.New()

srcHeight, srcWidth := ExtractPDFDimensions(inputFilepath)
minRatio := math.Min(destinationHeight/srcHeight, destinationWidth/srcWidth)

f, err := os.Open(inputFilepath)
if err != nil {
    return "", errors.New("Could not read the input pdf to be padded")
}

defer f.Close()

pdfReader, err := pdf.NewPdfReader(f)
if err != nil {
    return "", errors.New("Could not read the input pdf to be padded")
}

c.SetPageSize(creator.PageSize{destinationWidth, destinationHeight})

for _, page := range pdfReader.PageList {

    c.NewPage()

    //Create the page block from the page
    pageBlock, err := creator.NewBlockFromPage(page)
    if err != nil {
        return "", errors.New("Could not create block from page")
    }

    // Play with the page
    pageBlock.Scale(minRatio, minRatio)
    pageBlock.Scale(xscale, yscale)
    pageBlockHeight, pageBlockWidth := pageBlock.Height(), pageBlock.Width()
    xDiff := (destinationWidth - pageBlockWidth)
    yDiff := (destinationHeight - pageBlockHeight)
    pageBlock.SetPos(xPosition*xDiff, yPosition*yDiff)

    //Draw
    c.Draw(pageBlock)
}

err = c.WriteToFile(outputFilepath)` 

Workaround Code to deal with this: ` func updateMediaboxX(mediabox *pdf.PdfRectangle, xPosition, destinationWidth, blockWidth float64) (float64, float64, float64) { // x Gap is the amount of extra space between the block and the page xGap := math.Abs(math.Abs(mediabox.Urx-mediabox.Llx) - blockWidth)

//x Diff is the amount we need to pad our pdf by
xDiff := (destinationWidth - blockWidth)

//Assuming x-Position is center, we need to padContents by the same amount @ left and @ right, while accounting for the gap
newLLx := mediabox.Llx + (-((xPosition) * xDiff))
newUrx := mediabox.Urx + (((1 - xPosition) * xDiff) - xGap)

return xDiff, newLLx, newUrx

}

func updateMediaboxY(mediabox *pdf.PdfRectangle, yPosition, destinationHeight, blockHeight float64) (float64, float64, float64) { // y Gap is the amount of extra space between the page and the block yGap := math.Abs(math.Abs(mediabox.Ury-mediabox.Lly) - blockHeight)

//y Diff is the amount we need to pad our pdf by.
yDiff := (destinationHeight - blockHeight)

//Assuming y-Position is center, we need to padContents by the same amount @ top and @ bottom, while accounting for the gap
newLLy := mediabox.Lly - ((1 - yPosition) * yDiff) + yGap
newUry := mediabox.Ury + (yPosition * yDiff)

return yDiff, newLLy, newUry

}

func pad(...){ c := creator.New()

srcHeight, srcWidth := ExtractPDFDimensions(inputFilepath)
minRatio := math.Min(destinationHeight/srcHeight, destinationWidth/srcWidth)

// compensate for unipdf's non-functioning pageBlock.scale
scaledBlockHeight, scaledBlockWidth := minRatio*yscale*srcHeight, minRatio*xscale*srcWidth
resizedPDF, err := resizePDF_cpdf(tempdir, inputFilepath, scaledBlockHeight, scaledBlockWidth)
if err != nil {
    return "", err
}

f, err := os.Open(resizedPDF)
if err != nil {
    return "", errors.New("Could not read the input pdf to be padded")
}

defer f.Close()

pdfReader, err := pdf.NewPdfReader(f)
if err != nil {
    return "", errors.New("Could not read the input pdf to be padded")
}

     // only way to get all elements to render is if the destination dimensions are exactly = the page block dimensions (pre-modifications of the page block)
c.SetPageSize(creator.PageSize{scaledBlockWidth, scaledBlockHeight})

for _, page := range pdfReader.PageList {

    //edited the source library to return the page created by the creator
    headeredPage := c.NewPage() 

    //Create the scaled page block from page
    pageBlock, err := creator.NewBlockFromPage(page)
    if err != nil {
        return "", errors.New("Could not scale the block from page")
    }

    pageBlock.SetPos(0, 0) //only way to get all pdf elements to render is if position = 0,0

    //Deal with x - compensate for unipdf's non-functioning pageBlock.scale and setPos
    pageBlockWidth := pageBlock.Width()
    _, headeredPage.MediaBox.Llx, headeredPage.MediaBox.Urx = updateMediaboxX(headeredPage.MediaBox, xPosition, destinationWidth, pageBlockWidth)

    //Deal with y - compensate for unipdf's non-functioning pageBlock.scale and setPos
    pageBlockHeight := pageBlock.Height()
    _, headeredPage.MediaBox.Lly, headeredPage.MediaBox.Ury = updateMediaboxY(headeredPage.MediaBox, yPosition, destinationHeight, pageBlockHeight)

    c.Draw(pageBlock)
}

err = c.WriteToFile(outputFilepath)

}` Src.pdf Expectations.pdf Reality.pdf

adrg commented 5 years ago

Hi @peterjiz

The following code produces the attached PDF. I did not find any issues with it. Is there something I am missing? It's not an exact match of Expectations.pdf because I don't know what are the values of destinationWidth, destinationHeight, or what the ExtractPDFDimensions function does. I am using the development branch here but the latest release of unipdf should produce the same results.

Output: out.pdf

package main

import (
    "log"
    "math"
    "os"

    "github.com/unidoc/unipdf/creator"
    "github.com/unidoc/unipdf/model"
)

func main() {
    inputFilepath := "src.pdf"
    outputFilepath := "out.pdf"

    c := creator.New()

    f, err := os.Open(inputFilepath)
    if err != nil {
        log.Fatal(err)
    }
    defer f.Close()

    pdfReader, err := model.NewPdfReader(f)
    if err != nil {
        log.Fatal(err)
    }

    destinationHeight := 600.0
    destinationWidth := 400.0
    marginsH := 60.0
    marginsV := 15.0

    c.SetPageSize(creator.PageSize{destinationWidth, destinationHeight})
    c.SetPageMargins(marginsH, marginsH, marginsV, marginsV)

    for _, page := range pdfReader.PageList {
        // Get original dimensions.
        mbox := page.MediaBox
        srcWidth := mbox.Urx - mbox.Llx
        srcHeight := mbox.Ury - mbox.Lly

        // Calculate scale.
        dstWidth := srcWidth * 0.65
        dstHeight := srcHeight * 0.65
        minRatio := math.Min(dstHeight/srcHeight, dstWidth/srcWidth)

        c.NewPage()

        // Create the page block from the page.
        pageBlock, err := creator.NewBlockFromPage(page)
        if err != nil {
            log.Fatal(err)
        }

        // Scale block.
        pageBlock.Scale(minRatio, minRatio)

        // Position block.
        xDiff := (destinationWidth - pageBlock.Width() - marginsH)
        yDiff := marginsV
        pageBlock.SetPos(xDiff, yDiff)

        // Draw
        if err = c.Draw(pageBlock); err != nil {
            log.Fatal(err)
        }
    }

    if err = c.WriteToFile(outputFilepath); err != nil {
        log.Fatal(err)
    }
}
peterjiz commented 5 years ago

@adrg

I'll try the development branch some time next week, and report back on the issue!

Ended up replacing unipdf with cli calls to ghostscript and cpdf, but I would much prefer a programmatic approach: 1 portable binary with 0 external dependencies.

Edit: dimensions are 595x841. ExtractPDFDimensions just returns the height, width of the first page. Values are swapped if page is rotated.

gunnsth commented 5 years ago

@peterjiz Did you have a chance to test? Would like to close the issue if fixed.