UglyToad / PdfPig

Read and extract text and other content from PDFs in C# (port of PDFBox)
https://github.com/UglyToad/PdfPig/wiki
Apache License 2.0
1.73k stars 241 forks source link

Support for Writing custom document properties in PDFs? #732

Closed Nieg closed 1 year ago

Nieg commented 1 year ago

Hi, Can I ask if there are any plans and/or possibility of supporting writing custom document properties in PDF files as well as the 'known' ones (Author and Keywords and such)? just like Support for custom document properties in PDFs?

I figure out that PdfDocumentBuilder can add document properties. Is there any way to write some custom properties like below pdfs?

Pdf_Simple_With_MetaData.pdf newPdf2.pdf newPdf2 _special_prop.pdf

Numpsy commented 1 year ago

If you mean writing them into the document info dictionary, then I asked about that and the current code has this extra change : https://github.com/UglyToad/PdfPig/pull/715

Nieg commented 1 year ago

yes, I will test it tomorrow. By copy the metadata of the pdfs to a new created one. The pdfs for test is already uploaded in this issuel

Nieg commented 1 year ago

Thanks for replying. I tested it with the test, and it works well. I'm looking forward to your next update.

newPdf2_special_prop.pdf Pdf_Simple_With_MetaData.pdf

public class PdfBuilderMetaDataTest
{
    private readonly string baseDir = Environment.GetFolderPath(Environment.SpecialFolder.Desktop);

    [Theory]
    [InlineData(@"newPdf2_special_prop.pdf")]
    [InlineData(@"Pdf_Simple_With_MetaData.pdf")]
    public async Task CopyMetaDataToAnotherShouldBeSaveAsTheSame(string file)
    {
        var path = Path.Combine(baseDir, file);
        var doc = PdfDocument.Open(path);
        var dic = doc.Information.DocumentInformationDictionary;
        var builder = new PdfDocumentBuilder();
        builder.AddPage(PageSize.A4);

        var infoBuilder = builder.DocumentInformation;
        foreach (var pair in dic.Data)
        {
            infoBuilder.CustomMetadata.Add(pair.Key,
                (pair.Value as StringToken).Data);
        }

        var bytes = builder.Build();
        var doc2 = PdfDocument.Open(bytes);
        var dic2 = doc2.Information.DocumentInformationDictionary;
        Assert.True(DicHasSaveValues(dic.Data, dic2.Data));
        var savePath = Path.Combine(baseDir, $"Gen_{file}");
        if (File.Exists(savePath))
        {
            File.Delete(savePath);
        }

        await File.WriteAllBytesAsync(savePath, bytes);
    }

    private bool DicHasSaveValues(IReadOnlyDictionary<string, IToken> dic1, IReadOnlyDictionary<string, IToken> dic2)
    {
        if (dic1.Count != dic2.Count)
        {
            return false;
        }

        var sDic1 = new SortedDictionary<string, IToken>();
        foreach (var pair in sDic1)
        {
            sDic1.Add(pair.Key, pair.Value);
        }

        var sDic2 = new SortedDictionary<string, IToken>();
        foreach (var pair in sDic2)
        {
            sDic2.Add(pair.Key, pair.Value);
        }

        var enum1 = sDic1.GetEnumerator();
        var enum2 = sDic2.GetEnumerator();
        while (enum1.MoveNext() && enum2.MoveNext())
        {
            var curr1 = enum1.Current;
            var curr2 = enum2.Current;
            if (curr1.Key != curr2.Key)
            {
                return false;
            }

            if (curr1.Value.GetType() != curr2.Value.GetType())
            {
                return false;
            }

            if ((curr1.Value as StringToken).Data != (curr2.Value as StringToken).Data)
            {
                return false;
            }
        }

        return true;
    }
}