ssimms / pdfapi2

Create, modify, and examine PDF files in Perl
Other
15 stars 20 forks source link

Several issues with bookmarks (+fix) #83

Open vadim-160102 opened 2 months ago

vadim-160102 commented 2 months ago

I totally forgot about this.

use strict;
use warnings;
use feature 'say';
use utf8;
use Test::More;
use PDF::API2 2.047;

my $pdf;

########################################################

$pdf = PDF::API2-> new; $pdf-> page;
$pdf-> outline-> outline for 1 .. 2;
$pdf-> outline-> last-> delete;
is $pdf-> outline-> count, 1, 
    'looks like we successfully deleted the last item...';

$pdf = PDF::API2-> from_string( $pdf-> to_string );
is $pdf-> outline-> count, 1, 
    '... (save, re-open and check to be sure)';

########################################################

$pdf = PDF::API2-> new; $pdf-> page;
$pdf-> outline-> outline-> title( 'bookmark' );
$pdf = PDF::API2-> from_string( $pdf-> to_string );

my $title = eval { $pdf-> outline-> first-> title };
is $@, '', 'we can examine bookmarks in existing file';

$pdf-> outline-> count;             # fix for test #3

$title = $pdf-> outline-> first-> title;
is $title, 'bookmark', 'yes we can';

########################################################

my $chinese = '中國的';
$pdf = PDF::API2-> new; $pdf-> page;
$pdf-> outline-> outline-> title( $chinese );
$pdf = PDF::API2-> from_string( $pdf-> to_string );

$pdf-> outline-> count;             # fix for test #3

is join( ' ', unpack 'U*', $pdf-> outline-> first-> title ),
   join( ' ', unpack 'U*', $chinese ),
    '...and read interesting characters';

########################################################

$pdf = PDF::API2-> new; $pdf-> page;
$pdf-> outline-> outline-> title( $_ ) for qw/ a B c D e F /;
$pdf = PDF::API2-> from_string( $pdf-> to_string );

$pdf-> outline-> count;             # fix for test #3

my $bm = $pdf-> outline-> first;
while ( $bm ) {
    my $next = $bm-> next;
    $bm-> delete if $bm-> title =~ /\p{Lu}/;
    $bm = $next
}
$pdf = PDF::API2-> from_string( $pdf-> to_string );

is $pdf-> outline-> count, 3,
    'we can modify outline in existing file and save changes';

########################################################

done_testing;

The output:

ok 1 - looks like we successfully deleted the last item...
not ok 2 - ... (save, re-open and check to be sure)
#   Failed test '... (save, re-open and check to be sure)'
#   at outline.pl line 19.
#          got: '2'
#     expected: '1'
not ok 3 - we can examine bookmarks in existing file
#   Failed test 'we can examine bookmarks in existing file'
#   at outline.pl line 29.
#          got: 'Can't locate object method "title" via package "PDF::API2::Basic::PDF::Objind" at outline.pl line 28.
# '
#     expected: ''
ok 4 - yes we can
not ok 5 - ...and read interesting characters
#   Failed test '...and read interesting characters'
#   at outline.pl line 45.
#          got: '254 255 78 45 87 11 118 132'
#     expected: '20013 22283 30340'
not ok 6 - we can modify outline in existing file and save changes
#   Failed test 'we can modify outline in existing file and save changes'
#   at outline.pl line 67.
#          got: '6'
#     expected: '3'
1..6
# Looks like you failed 4 tests of 6.

2 files to patch:

--- PDF/API2_orig.pm    Sat May 18 21:19:41 2024
+++ PDF/API2.pm Sat Jun 22 11:28:39 2024
@@ -833,6 +833,7 @@
         bless $obj, 'PDF::API2::Outlines';
         $obj->{' api'} = $self;
         weaken $obj->{' api'};
+        $obj->count();
     }
     else {
         $obj = PDF::API2::Outlines->new($self);
--- PDF/API2/Outline_orig.pm    Sat May 18 21:19:41 2024
+++ PDF/API2/Outline.pm Sat Jun 22 11:27:46 2024
@@ -90,26 +90,24 @@
     if ($count) {
         $self->{'Count'} = PDFNum($self->is_open() ? $count : -$count);
     }
-
+    else {
+        delete $self->{'Count'}
+    }
     return $count;
 }

 sub _load_children {
     my $self = shift();
     my $item = $self->{'First'};
-    return unless $item;
-    $item->realise();
-    bless $item, __PACKAGE__;

-    push @{$self->{' children'}}, $item;
-    while ($item->next()) {
-        $item = $item->next();
+    while ($item) {
         $item->realise();
         bless $item, __PACKAGE__;
+        $item->{' api'} = $self->{' api'};
         push @{$self->{' children'}}, $item;
-    }
-    return $self;
-}
+        $item = $item->next()
+     }
+ }

 =head3 first

@@ -124,6 +122,9 @@
     if (defined $self->{' children'} and defined $self->{' children'}->[0]) {
         $self->{'First'} = $self->{' children'}->[0];
     }
+    else {
+        delete $self->{'First'}
+    }
     return $self->{'First'};
 }

@@ -140,6 +141,9 @@
     if (defined $self->{' children'} and defined $self->{' children'}->[-1]) {
         $self->{'Last'} = $self->{' children'}->[-1];
     }
+    else {
+        delete $self->{'Last'}
+    }
     return $self->{'Last'};
 }

@@ -154,7 +158,7 @@

 sub parent {
     my $self = shift();
-    $self->{'Parent'} = shift() if defined $_[0];
+#    $self->{'Parent'} = shift() if defined $_[0];
     return $self->{'Parent'};
 }

@@ -167,8 +171,11 @@
 =cut

 sub prev {
-    my $self = shift();
-    $self->{'Prev'} = shift() if defined $_[0];
+    my ($self, $other) = @_;
+    if ($other) {
+        $self->{'Prev'} = $other;
+        $self->{' api'}{'pdf'}->out_obj($self);
+    }
     return $self->{'Prev'};
 }

@@ -181,8 +188,11 @@
 =cut

 sub next {
-    my $self = shift();
-    $self->{'Next'} = shift() if defined $_[0];
+    my ($self, $other) = @_;
+    if ($other) {
+        $self->{'Next'} = $other;
+        $self->{' api'}{'pdf'}->out_obj($self);
+    }
     return $self->{'Next'};
 }

@@ -208,6 +218,7 @@
         $self->{' api'}->{'pdf'}->new_obj($child);
     }

+    $self->{' api'}{'pdf'}->out_obj($self);
     return $child;
 }

@@ -268,6 +279,7 @@
         $item = $item->next();
         push @{$self->{' children'}}, $item;
     }
+    $self->{' api'}{'pdf'}->out_obj($self);
     return $self;
 }

@@ -286,12 +298,20 @@

     my $prev = $self->prev();
     my $next = $self->next();
-    $prev->next($next) if defined $prev;
-    $next->prev($prev) if defined $next;

+    if (defined $prev and defined $next) {
+        $prev->next($next);
+        $next->prev($prev);
+    }
+    else {
+        delete $prev->{'Next'} if defined $prev;
+        delete $next->{'Prev'} if defined $next;
+    }
+
     my $siblings = $self->parent->{' children'};
     @$siblings = grep { $_ ne $self } @$siblings;
     delete $self->parent->{' children'} unless $self->parent->has_children();
+    $self->{' api'}{'pdf'}->out_obj($self->parent);

     return;
 }
@@ -326,6 +346,7 @@
     # Set
     my $is_open = shift();
     $self->{' closed'} = (not $is_open);
+    $self->{' api'}{'pdf'}->out_obj($self);

     return $self;
 }
@@ -362,12 +383,17 @@
     # Get
     unless (@_) {
         return unless $self->{'Title'};
-        return $self->{'Title'}->val();
+        my $s = $self->{'Title'}->val();
+        if ( $s =~ s/^\x{fe}\x{ff}// ) {
+            $s = Encode::decode( 'UTF-16BE', $s )
+        }
+        return $s;
     }

     # Set
     my $text = shift();
     $self->{'Title'} = PDFStr($text);
+    $self->{' api'}{'pdf'}->out_obj($self);
     return $self;
 }

@@ -403,6 +429,7 @@
         $self->{'Dest'} = PDFStr($destination);
     }

+    $self->{' api'}{'pdf'}->out_obj($self);
     return $self;
 }

@@ -443,6 +470,7 @@
     $self->{'A'}->{'S'}   = PDFName('URI');
     $self->{'A'}->{'URI'} = PDFStr($uri);

+    $self->{' api'}{'pdf'}->out_obj($self);
     return $self;
 }

@@ -465,6 +493,7 @@
     $self->{'A'}->{'S'} = PDFName('Launch');
     $self->{'A'}->{'F'} = PDFStr($file);

+    $self->{' api'}{'pdf'}->out_obj($self);
     return $self;
 }

@@ -511,6 +540,7 @@

     $self->{'A'}->{'D'} = _destination(PDFNum($page_number), $location, @args);

+    $self->{' api'}{'pdf'}->out_obj($self);
     return $self;
 }

Lines marked 'fix for test #3' in the script are no longer required and can be deleted, and:

ok 1 - looks like we successfully deleted the last item...
ok 2 - ... (save, re-open and check to be sure)
ok 3 - we can examine bookmarks in existing file
ok 4 - yes we can
ok 5 - ...and read interesting characters
ok 6 - we can modify outline in existing file and save changes
1..6

(and outline.t passes OK)

Edit: I think I missed something indeed! (Rubber duck strikes again.) The count() can't reliably be used to re-bless because it doesn't follow items which are "closed". Perhaps dedicated internal traversing sub will be required.

Edit 2 : So, then, the sub below should be added/appended to PDF/API2/Outline.pm; and line patched in PDF/API2.pm should be a call to this sub i.e. $obj->_fix_on_open();

sub _fix_on_open {
    my $self = shift();

    if ($self->has_children()) {
        $self->_load_children() unless exists $self->{' children'};
        foreach my $child (@{$self->{' children'}}) {
            $child->_fix_on_open();
        }
    }
}