galkahana / PDF-Writer

High performance library for creating, modiyfing and parsing PDF files in C++
http://www.pdfhummus.com
Apache License 2.0
900 stars 216 forks source link

Strange behavior of CalculateTextDimensions with trailing spaces #151

Open box8 opened 5 years ago

box8 commented 5 years ago

test.cpp

#include <iostream>
#include "PDFUsedFont.h"
#include "PDFWriter.h"

int main(int argc, char **argv) {
    PDFWriter pdfWriter;
    PDFUsedFont* font = pdfWriter.GetFontForFile("NotoSerifCJKjp-Regular.otf");

    // SPACE
    PDFUsedFont::TextMeasures tm1 = font->CalculateTextDimensions(" test", 11);
    std::cout << tm1.width  << std::endl;
    PDFUsedFont::TextMeasures tm2 = font->CalculateTextDimensions("te st", 11);
    std::cout << tm2.width  << std::endl;
    PDFUsedFont::TextMeasures tm3 = font->CalculateTextDimensions("test ", 11);
    std::cout << tm3.width  << std::endl;

    // NO-BREAK SPACE
    PDFUsedFont::TextMeasures tm4 = font->CalculateTextDimensions("\xc2\xa0test", 11);
    std::cout << tm4.width  << std::endl;
    PDFUsedFont::TextMeasures tm5 = font->CalculateTextDimensions("te\xc2\xa0st", 11);
    std::cout << tm5.width  << std::endl;
    PDFUsedFont::TextMeasures tm6 = font->CalculateTextDimensions("test\xc2\xa0", 11);
    std::cout << tm6.width  << std::endl;

    return 0;
}

result:

21.747
21.626
18.876
21.747
21.626
18.876
box8 commented 5 years ago

Example PDF: test-spaces.pdf

Trailing spaces are clearly missing.

#include "PageContentContext.h"
#include "PDFPage.h"
#include "PDFUsedFont.h"
#include "PDFWriter.h"

#define FONT_SIZE 30

int main(int argc, char **argv)
{
    const char *str[6] = {
        " test",
        "te st",
        "test ",
        "\xc2\xa0test",
        "te\xc2\xa0st",
        "test\xc2\xa0"
    };
    PDFWriter pdf;
    auto font = pdf.GetFontForFile("NotoSerifCJKjp-Regular.otf");
    pdf.StartPDF("test-spaces.pdf", ePDFVersionMax);
    PDFPage* page = new PDFPage();
    page->SetMediaBox(PDFRectangle(0, 0, 595, 842));
    auto pcc = pdf.StartPageContentContext(page);
    for (int i = 0; i < 6; ++i) {
        auto tm = font->CalculateTextDimensions(str[i], FONT_SIZE);
        pcc->q();
        pcc->cm(1, 0, 0, 1, 50, 800 - 50 * i);
        pcc->q();
        pcc->K(0, 0.8, 0.8, 0);
        pcc->k(0, 0.8, 0.8, 0);
        pcc->w(0);
        pcc->re(tm.xMin, tm.yMin, tm.width, tm.height);
        pcc->B();
        pcc->Q();
        pcc->BT();
        pcc->Tf(font, FONT_SIZE);
        pcc->Tm(1, 0, 0, 1, 0, 0);
        pcc->Tj(str[i]);
        pcc->Tf(font, 12);
        pcc->Tj("     ");
        pcc->Tj(std::to_string(tm.xMin));
        pcc->Tj(" ");
        pcc->Tj(std::to_string(tm.width));
        pcc->ET();
        pcc->Q();
    }
    pdf.EndPageContentContext(pcc);
    pdf.WritePage(page);
    delete page;
    pdf.EndPDF();
    return 0;
}
ka-ba commented 1 year ago

This is still an issue (with trailing and with leading blanks), as it seems. I'm using a mono font here and release 4.5.6 (as by the release notes there are no relevant changes up to the most recent release). This is the output of my test (string and its width):

size of "x00:00x": 48.948
size of "   :   ": 43.2
size of "x  :  x": 48.948
size of "   :  x": 49.68
size of "x  :   ": 42.468

The output is produced by the line

BOOST_LOG_TRIVIAL(trace) << "size of \"" << text_[0] << "\": " << font_->CalculateTextDimensions(text_[0],fontSize_).width;

(with boost_logs mgmt data removed manually).

It shows that blanks within the string take the same room as other characters, but leading or trailing blanks being handled differently. Is there a solution or a work-around to get correct numbers?