empira / PDFsharp

PDFsharp and MigraDoc Foundation for .NET 6 and .NET Framework
https://docs.pdfsharp.net/
Other
504 stars 124 forks source link

6.0.0-preview-4 - unable to set /Metadata at /Catalog level #42

Open podprad opened 1 year ago

podprad commented 1 year ago

Hi,

In 6.0.0-preview-4 I'm unable to set XMP metadata. I add own /Metadata dictionary, it's written to the output PDF, but finally PdfSharp links own indirect object:

1 0 obj
<<
/Type/Catalog
/Pages 2 0 R
/Metadata 9 0 R
>>
endobj
...
8 0 obj
<<
/Length 340
/Type/Metadata
/Subtype/XML
>>
stream
<?xpacket begin="" id="W5M0MpCehiHzreSzNTczkc9d"?>
<x:xmpmeta xmlns:x="adobe:ns:meta/" x:xmptk="Adobe XMP Core 6.1.10">
  <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
    <rdf:Description rdf:about=""
        xmlns:xmp="http://ns.adobe.com/xap/1.0/"
      xmp:CreatorTool="Test"/>
  </rdf:RDF>
</x:xmpmeta>
endstream
endobj
...
9 0 obj
<<
/Type/Metadata
/Subtype/XML
/Length 1469
>>
stream
<?xpacket begin="" id="W5M0MpCehiHzreSzNTczkc9d"?>
  <x:xmpmeta xmlns:x="adobe:ns:meta/" x:xmptk="3.1-701">
    <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
      <rdf:Description rdf:about="" xmlns:pdf="http://ns.adobe.com/pdf/1.3/">
        <pdf:Producer>PDFsharp 6.0.0-preview-4 under Microsoft Windows 10.0.19045</pdf:Producer><pdf:Keywords></pdf:Keywords>
      </rdf:Description>
      <rdf:Description rdf:about="" xmlns:dc="http://purl.org/dc/elements/1.1/">
        <dc:title><rdf:Alt><rdf:li xml:lang="x-default"></rdf:li></rdf:Alt></dc:title>
        <dc:creator><rdf:Seq><rdf:li></rdf:li></rdf:Seq></dc:creator>
        <dc:description><rdf:Alt><rdf:li xml:lang="x-default"></rdf:li></rdf:Alt></dc:description>
      </rdf:Description>
      <rdf:Description rdf:about="" xmlns:xmp="http://ns.adobe.com/xap/1.0/">
        <xmp:CreatorTool>PDFsharp 6.0.0-preview-4 (www.pdfsharp.net)</xmp:CreatorTool>
        <xmp:CreateDate>0001-01-01T00:00:00.0000000</xmp:CreateDate>
        <xmp:ModifyDate>0001-01-01T00:00:00.0000000</xmp:ModifyDate>
      </rdf:Description>
      <rdf:Description rdf:about="" xmlns:xmpMM="http://ns.adobe.com/xap/1.0/mm/">
        <xmpMM:DocumentID>uuid:cd4cf696-81b0-4f4a-8296-ad24a0482084</xmpMM:DocumentID>
        <xmpMM:InstanceID>uuid:97a97879-905c-4417-86f7-8ac6a426854a</xmpMM:InstanceID>
      </rdf:Description>
    </rdf:RDF>
  </x:xmpmeta>
<?xpacket end="w"?>                
endstream
endobj

In 1.50.5147 it was possible.

Here is the source code allowing to reproduce the issue: https://github.com/podprad/misc_public/tree/main/playgrounds/csharp/PDFSharpMetaIssue

Pasted code:

<Project Sdk="Microsoft.NET.Sdk">

    <PropertyGroup>
        <OutputType>Exe</OutputType>

        <TargetFramework>net6.0</TargetFramework>
<!--        <TargetFramework>net48</TargetFramework>-->
    </PropertyGroup>

    <ItemGroup>
      <None Remove="Pdf14Simplest.pdf" />
      <Content Include="Pdf14Simplest.pdf">
        <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
      </Content>
    </ItemGroup>

    <ItemGroup>
      <PackageReference Include="PDFsharp" Version="6.0.0-preview-4" Condition="'$(TargetFramework)' == 'net6.0'"/>
      <PackageReference Include="PDFsharp" Version="1.50.5147" Condition="'$(TargetFramework)' == 'net48'"/>
    </ItemGroup>

</Project>
namespace PDFSharpMetaIssue
{
    using System.Text;
    using PdfSharp.Pdf;
    using PdfSharp.Pdf.Advanced;

    public static class Program
    {
        public static void Main()
        {
            const string XmpToSet = @"<?xpacket begin="""" id=""W5M0MpCehiHzreSzNTczkc9d""?>
<x:xmpmeta xmlns:x=""adobe:ns:meta/"" x:xmptk=""Adobe XMP Core 6.1.10"">
  <rdf:RDF xmlns:rdf=""http://www.w3.org/1999/02/22-rdf-syntax-ns#"">
    <rdf:Description rdf:about=""""
        xmlns:xmp=""http://ns.adobe.com/xap/1.0/""
      xmp:CreatorTool=""Test""/>
  </rdf:RDF>
</x:xmpmeta>";

            var xmpBytes = new UTF8Encoding(false).GetBytes(XmpToSet);

            var filePath = "Pdf14Simplest.pdf";

            using (var document = PdfSharp.Pdf.IO.PdfReader.Open(filePath))
            {
                var catalog = document.Internals.Catalog;

                if (catalog.Elements.TryGetValue("/Metadata", out var oldMetadata))
                {
                    if (oldMetadata is PdfReference oldMetadataReference)
                    {
                        catalog.Elements.Remove("/Metadata");
                        document.Internals.RemoveObject(oldMetadataReference.Value);
                    }
                }

                var newMetadata = new PdfDictionary();
                newMetadata.CreateStream(xmpBytes);
                newMetadata.Elements.Add("/Type", new PdfName("/Metadata"));
                newMetadata.Elements.Add("/Subtype", new PdfName("/XML"));

                document.Internals.AddObject(newMetadata);

                catalog.Elements.Add("/Metadata", newMetadata.Reference);

                document.Save("Output.pdf");
            }
        }
    }
}
julienrffr commented 1 year ago

Hi,

I've just fixed this issue in pull request #44

PdfSharp was simply setting its own new /Metadata object, not taking into account any already existing one that would be set.

julienrffr commented 1 year ago

I've updated my fix, making the behavior depend on a document option (it gives more control). See details in pull request.

podprad commented 1 year ago

Thank you for analysing it and your time.

I added a comment directly in PR.