Closed p5pRT closed 20 years ago
This is a bug report for perl from lvirden@cas.org\, generated with the help of perlbug 1.31 running under perl v5.6.0.
----------------------------------------------------------------- While trying to build the htpl package\, I received this core dump from perl:
$ /ldatae/gnu/perl*/perl htpl-crp.pl assertion botched (chunk's tail overwrite?): *(unsigned int *)((caddr_t)ovp + nbytes - sizeof (unsigned int)) == 0x55555555
$ cat /home/lwv26/.cpan/build/htpl-2.83/htpl-crp.pl #!/usr/local/bin/perl
use strict vars; use vars qw($glob_ary $subname);
require "parse-exp.pl";
use XML::Parser; my $parser = new XML::Parser(Style => 'Tree');
my (%scopes\, @subs);
# Parse input
my @files; push(@files\, $ARGV[0] || (-f 'macros.xpl' ? 'macros.xpl' : 'htpl.subs')); foreach (qw(/usr/share/htpl-site.xpl /usr/local/share/htpl-site.xpl htpl-site.xpl)) { push(@files\, $_) if (-f $_); }
my @nodes;
foreach (@files) { my $tree = $parser->parsefile($_); die "Root must be HTPL" unless($tree->[0] eq 'HTPL'); my @these = @{$tree->[1]}; shift @these; push(@nodes\, @these); }
my $result = ['HTPL'\, [{}\, @nodes]];
# Output parser
open(OP\, ">htpl-parse.c");
select(OP);
print \<\<EOM; /** HTPL Macro parser ********************************** ** This file is created automatically by htpl-crp.pl ** ** Do not attempt to edit *****************************/
#define __HTPARSE__ #include "htpl.h"
#define RETURN(x) {int v = (x); destroypersist(); return v;} #define numtokens (persist->tokens->num)
EOM
# Recurse over tree
&recur($result);
select(STDOUT); close(OP);
# Output header file
my $nmacs = $#subs + 1;
open(O\, ">htpl-sh.h"); foreach (@subs) { next unless ($_); print O "int parse_$_(STR\, int);\n"; }
my @thescopes = keys %scopes; unshift(@thescopes\, "none"); $scopes{"none"} = "no_scope"; if (@thescopes) { print O "\nenum scopevalues {" . join("\,\n "\, (map {$scopes{$_};} @thescopes)) . "};\n"; print O "#ifdef __HTPARSE__\n"; print O "char *scope_names[] = {\"" . join("\"\,\n \""\, @thescopes) . "\"};\n"; print O "int scope_ids[] = {" . join("\, "\, map {"0"} @thescopes) . "};\n"; print O \<\<EOI #else extern char *scope_names[]; extern int scope_ids[]; #endif EOI }
print O "\n#define NUM_MACROS $nmacs\n"; close(O);
print $nmacs . " macros compiled.\n";
sub recur { my ($node\, @stack) = @_; my @array = @$node;
my $item;
my $code;
my $atref = {};
$atref = shift @array unless ($#array % 2); # $atref contains the tag
# Convert sub tags to hash for quick find
my %hash = @array;
# Create function name
my $sub = join("_"\, @stack);
$sub =~ s/-/_/g;
$subname = uc(join(" "\, grep {!/^__/} @stack[1 .. $#stack]));
# Create function header push (@subs\, $sub);
# Initialize scoping
my $precode = ''; my $postcode = '';
my ($max\, $min);
# Check if this macro has minimum/maximum parameters if (defined($max = $atref->{'MAX'}) + ($min = $atref->{'MIN'})) { # + and not ||\, so both sides are evaluated $code .= &outparamcount($min\, $max\, $subname); }
if ($atref->{'ASSERT'}) { $code .= &outassert($atref->{'ASSERT'}\, $subname); }
# Check if this macro can be used by the user\, or is it for inner use only # The attribute PRIVATE limits a macro to calling from another macro
if ($atref->{'PRIVATE'}) { $code .= &outensurenest; }
# Check if this domain has prerequisites # Code under __PRE sub tag will be evaluated once for every script in # first occurunce. Useful for requiring optional modules
my $pre = $hash{'__PRE'};
$code .= &outpre(&juice($pre)) if ($pre);
# Check if this macro is aliased
# __ALIAS reduced a macro to a unification of another macro
my $alias = $hash{'__ALIAS'};
if ($alias) { $precode = &outpersist . $precode; my ($todo\, %that) = &juice($alias); $code .= &outterminal($subname); $postcode = $postcode . &outsuccess; foreach (split(/\n/\, $todo)) { $code .= &outunify($_\, $that{'DIR'}); } goto done ; }
# SCOPE is an attribute used to group perl code in { } to enable scoping # Not usable in areas
if (($atref->{'SCOPE'} || $atref->{'PARAMS'}) && !$atref->{'AREA'}) { $precode .= &outcode("{"); $postcode = &outcode("}") . $postcode; }
# PARAMS is an attribute used for tags in SGML notion
if ($atref->{'PARAMS'}) { $precode .= &outgettags($atref->{'MANDATORY'}\, $subname); }
# The FRIEND attribute is used for non blocking macros\, and allows them to # masquerade another scope. This is useful only for calling other macros.
if ($atref->{'FRIEND'} && !$atref->{'AREA'}) { $precode .= &outpush($atref->{'FRIEND'}\, 1); $postcode = &outpop($atref->{'FRIEND'}\, $subname) . $postcode; }
# The BROTHER attribute on a non blocking macro (or on a blocking macro # inherited to the forward tag enforces a SCOPE in the entrance to a # macro. An Additional CHANGE attribute might be specified to change the # current scope to a different one once verified
if ($atref->{'BROTHER'} && !$atref->{'AREA'}) { if ($atref->{'CHANGE'}) { $code .= &outpop($atref->{'BROTHER'}\, $subname) . &outpush($atref->{'CHANGE'}); } else { if ($atref->{' SYS '} eq '__REV') { $postcode = &outnopop($atref->{'BROTHER'}\, $subname) . $postcode; } else { $precode .= &outnopop($atref->{'BROTHER'}\, $subname); } } }
# The POP attribute enforces a scope check in the entrance for a macro\, # and pops the scope from the stack
if ($atref->{'POP'}) { $postcode = &outpop($atref->{'POP'}\, $subname) . $postcode; }
# The PUSH attribute supplies a scope to be pushed into the stack
if ($atref->{'PUSH'}) { $code .= &outpush($atref->{'PUSH'}); }
# Now lets's check if this tag is a leaf - if so\, we should reduce to the # code
my $this = $hash{'0'};
if ($this =~ /\S+/) { $precode = &outpersist . $precode; $code .= &outcode($this); $postcode .= &outsuccess(); goto done; }
my $codet = &operations($sub\, \%hash\, @array);
if ($codet || $atref->{'NOOP'}) { $precode = &outpersist . $precode; $postcode .= &outsuccess; $code .= $codet; goto done; }
# This tag is a nonterminal
my @ks = keys %hash; my ($key\, $ref);
# IF this is a blocking tag
if ($atref->{'AREA'}) { my %tiny = qw(__FWD PUSH __REV POP); foreach $key (qw(__FWD __REV)) { $ref = $hash{$key}; my @ary = @$ref; my $attr = shift @ary; $attr->{$tiny{$key}} = $atref->{'BLOCK'} if ($atref->{'BLOCK'}); $attr->{'BROTHER'} = $atref->{'BROTHER'}; $attr->{' SYS '} = $key; if ($atref->{'SCOPE'}) { @ary = makedo($ary[1]) if ($#ary == 1 && $ary[0] eq '0'); unshift(@ary\, makedo("{")) if ($key eq '__FWD'); push(@ary\, makedo("}")) if ($key eq '__REV'); } unshift(@ary\, $attr); &recur(\@ary\, (@stack\, lc($key))); } $code .= &outarea($sub); goto done; }
# This is a matching node
$code .= &outnonterminal;
# Check all children
foreach $key (@ks) { next if ($key =~ /^__/ || $key eq '0'); $code .= &outtoken(lc($key)\, @stack); $ref = $hash{$key}; my @ary = @$ref; # shift @ary; &recur(\@ary\, (@stack\, lc($key))); }
# Add a check for unification failure
$code .= outendsub(0); done: print &outheader($sub) . $precode . $code . $postcode . &outfooter($sub); print "\n"; }
sub outheader { my $sub = shift; return \<\<EOM; int parse_$sub(stack\, untag) int untag; STR stack; {
TOKEN token; static done = 0; STR buff; int code; static int nesting = 0;
EOM }
sub outnonterminal { return \<\<EOM; eat(&stack\, token); EOM }
sub outfooter { return "}\n"; }
sub outpre { my $code = &escape(shift); return \<\<EOM; if (!done) { done = 1; printcode("$code"); } EOM }
sub outbeginsub { return \<\<EOM; code = 1; EOM }
sub outterminal { my $sub = shift; return \<\<EOM; nesting++; if (nesting > 1) RETURN(croak("Infinite loop in $sub")) EOM }
sub outunify { my $alias = &escape(shift\, 1); $alias =~ s/\\n$//; my $dir = shift; my $dn = "untag"; $dn = "0" if ($dir eq 'FWD'); $dn = "1" if ($dir eq 'REV'); my $alias_parsed = &wrapcode(&assemble($alias)); return \<\<EOM; buff = (STR)mysprintf($alias_parsed); nest++; code = parse_htpl(buff\, $dn); nest--; if (!code) { croak("Unification of '%s' failed"\, buff); free(buff); RETURN(0) } free(buff);
EOM }
sub outcond { my $code = shift; my ($min\, $max\, $assert\, $scope\, $sub) = @_; my $txt;
return $code unless (join(""\, @_) ne "");
die "Non numeric minimum $min in $sub" if ($min && $min !~ /^\d+$/); die "Non numeric maximum $max in $sub" if ($max && $max !~ /^\d+$/);
my $ret = ""; if ($min =~ /\d/ || $max =~ /\d/) { $ret .= \<\<EOM; EOM } my @conds; push(@conds\, "numtokens >= $min") if ($min =~ /\d/); push(@conds\, "numtokens \<= $max") if ($max =~ /\d/); push(@conds\, &code2c($assert)) if ($assert); push(@conds\, &ifscope($scope)) if ($scope);
return $code unless(@conds);
my @lines = split(/\n/\, $code); @lines = map {" $_";} @lines; $code = join("\n"\, @lines); $ret = " " . &wrapcode("if (" . join(" && "\, @conds) . ") ") . " { $code } "; return $ret; }
sub ifscope { my @scopes = map {&getscope($_)} split(/\,\s*/\, shift); my @conds = map {"currscope->scope == $_"} @scopes; return "currscope && (" . join(" || "\, @conds) . ")"; }
sub outcode { my $code = shift; my @p = @_; my $txt;
my ($ret\, $scode\, $tcode\, $l); foreach $l (split(/\r?\n/\, $code)) { next unless ($l); $tcode = &escape($l); $scode = &wrapcode(&assemble($tcode)); if ($l =~ /^\s*\#\w/) { $ret .= \<\<EOM; buff = (STR)mysprintf($scode); nest++; code = parse_htpl(strchr(buff\, '#') + 1\, 0); nest--; if (!code) { croak("Unification of '%s' failed"\, buff); free(buff); RETURN(0) } free(buff); EOM next; } $scode =~ s/(\s+)%#/$1#/; if ($glob_ary) { $ret .= \<\<EOM; printfcode($scode); EOM } else { $tcode =~ s/\%\%/%/g; $ret .= \<\<EOM; printcode("$tcode"); EOM } } $ret; }
sub outtoken { my ($this\, @stack) = @_; my $uthis = uc($this); my $sub2 = join("_"\, (@stack\, $this)); $sub2 =~ s/-/_/g; return \<\<EOM; if (!strcasecmp(token\, "$uthis")) return parse_$sub2(stack\, untag); EOM }
sub juice { my $obj = shift; # return $obj unless (ref($obj)); my @ary = @$obj; my $att = shift @ary; # return $att unless (ref($att)); my %hash = @ary; return ($hash{'0'}\, %$att); }
sub escape { my $s = shift; $s .= "\n" unless ($s =~ /\n$/ || $_[0]); $s =~ s/\\/\\\\/g; $s =~ s/"/\\"/g; $s =~ s/^\n//; $s .= "\n"; $s =~ s/\n+/\n/g; $s =~ s/\n/\\n/g; return $s; }
sub outsuccess { return \<\<EOM; nesting = 0; RETURN(1) EOM }
sub outensurenest { return \<\<EOM; if (!nest) RETURN(0) EOM }
sub outgettags { my $mand = shift; my $sub = shift; my $ret = &outcode("my %%tags = &HTML::HTPL::Sys::parse_tags('%1*%\');"); $ret .= &outcode("&publish(&proper(sub {uc(\$_);}\, %%tags));"); $ret .= &outcode("&HTML::HTPL::Sys::enforce_tags('$mand'\, '$sub'\, %%tags);") if ($mand); $ret; }
sub outarea { my $sub = shift; return \<\<EOM; if (!untag) return parse_${sub}___fwd(stack\, untag); else return parse_${sub}___rev(stack\, untag); EOM }
sub outassert { my ($assert\, $sub) = @_;
my $c = &code2c($assert);
my $ret = \<\<EOM; if (!($c)) { RETURN(croak("Assert failed on $sub: $assert")); } EOM $ret; }
sub outparamcount { my ($min\, $max\, $sub) = @_; die "Non numeric minimum $min in $sub" if ($min && $min !~ /^\d+$/); die "Non numeric maximum $max in $sub" if ($max && $max !~ /^\d+$/); my $ret; $ret .= \<\<EOM if ($min); if (numtokens \< $min) RETURN(croak("$sub called with %d arguments\, minimum needed is $min"\, numtokens)) EOM $ret .= \<\<EOM if (defined($max)); if (numtokens > $max) RETURN(croak("$sub called with %d arguments\, maximum needed is $max"\, numtokens))
EOM $ret; }
sub outpop { return &outnopop(@_) . \<\<EOM; popscope(); EOM }
sub outnopop { my ($scope\, $sub) = @_; my ($sym\, $op) = ("!="\, "&&"); ($sym\, $op) = ("=="\, "||") if ($scope =~ s/^\!\s*//); my @conds = map { "currscope->scope $sym " . &getscope($_) } split(/\,\s*/\, $scope); my $cond = join(" $op "\, @conds); my $ret = \<\<EOM; if (!currscope) RETURN(croak("Unexpected $sub")) if ($cond) RETURN(croak("Now in scope %s from %d and met $sub\, expecting: $scope"\, scope_names[currscope->scope]\, currscope->nline)) EOM $ret; }
sub outpush { my ($scope\, $noinc) = @_; $noinc = $noinc * 1; my $code = &getscope($scope); return \<\<EOM; pushscope($code\, $noinc); EOM }
sub getscope { my $name = shift; return $scopes{$name} if ($scopes{$name}); my $val = $name; $val =~ tr/A-Z/a-z/; $val =~ s/[^a-z0-9]/_/g; $val = "scope_$val"; $scopes{$name} = $val; $val; }
sub outconst { my $val = shift; return \<\<EOM; return $val; EOM }
sub outc { my $code = shift; return \<\<EOM; { $code } EOM }
sub outpersist { " makepersist(stack);\n"; }
sub makedo { ('__DO'\, [{}\, "0"\, shift]); }
sub outendsub { my $code = shift; return \<\<EOM; return $code; EOM }
sub outxfer { my ($dir\, $var\, $scope) = @_; return \<\<EOM; if (!${dir}var("$var"\, "$scope")) RETURN(croak("Scope $scope not found in stack")); EOM }
sub outset { my ($var\, $val) = @_; my $s = expandstr($var); return \<\<EOM; setvar("$var"\, $s); EOM }
sub outcroak { my $scode = &wrapcode(&assemble(shift)); return \<\<EOM; RETURN(croak($scode)) EOM }
sub operations {
# Now let's iterate over all the sons and process them # $flag is turned if we found out this token was a terminal # $this is the contents of the tag # %that ts the attributes # $codet gets the code # All of this tags can be used with MIN or MAX to apply a parameter count # condition. Otherwise the tag will not execute but the macro will not # fail. ASSERT can be used to check the parameters.
my $sub = shift; my $superhash = shift; my @a = @_; my $code = ""; while (@a) { my $key = shift @a; my $this = shift @a; my ($todo\, %that); ($todo\, %that) = &juice($this); my @params = @that{qw(MIN MAX ASSERT BROTHER)}; my $codet = undef; my $doneterminal = undef;
if ($key eq '__MACRO') { my $atts = $this->[0]; my $name = $atts->{'NAME'} || $atts->{'ID'}; delete $atts->{'NAME'}; delete $atts->{'ID'}; $superhash->{$name} = $this; next; } # __INCLUDE parses another macro and includes it
if ($key eq '__INCLUDE') { if (!$doneterminal) { $codet = &outbeginsub; $doneterminal = 1; } foreach (split(/\n/\, $todo)) { $codet .= &outunify($_\, $that{'DIR'}); } }
# __BROTHER is an operative simillar to the BROTHER attribute # It enables verifying scopes in the middle of a macro
if ($key eq '__BROTHER') { $codet = &outnopop(&juice($this)\, $subname); }
# __TRUE makes a macro succeed
if ($key eq '__TRUE') { $codet = &outconst(1); }
# __FALSE makes a macro fail
if ($key eq '__FALSE') { $codet = &outconst(0); }
# __DO adds actual code to the buffer
if ($key eq '__DO') { $codet = &outcode($todo); }
# __POP pops a scope
if ($key eq '__POP') { $codet = &outpop($that{'SCOPE'} || $todo\, $subname); }
# __PUSH pushes a scope
if ($key eq '__PUSH') { $codet = &outpush($that{'SCOPE'} || $todo); }
if ($key eq '__SET') { $codet = &outset($that{'VAR'} || $todo\, $that{'VALUE'}\,); }
if ($key eq '__IMPORT') { $codet = &outxfer('import'\, $that{'VAR'} || $todo\, $that{'SCOPE'}); }
if ($key eq '__EXPORT') { $codet = &outxfer('export'\, $that{'VAR'} || $todo\, $that{'SCOPE'}); }
# __C adds C code to the parser
if ($key eq '__C') { $codet = &outc($todo); }
if ($key eq '__NOOP') { $codet = "/* do nothing */\n"; }
if ($key eq '__BLOCK') { my @those = @$this; shift @those; $codet = &operations($sub\, $superhash\, @those); }
if ($key eq '__CROAK') { $codet = &outcroak($that{'MSG'} || $todo); }
if ($codet) { $code .= &outcond($codet\, @params\, $sub); } } return $code; }
In \200008151125\.HAA09656@​lwv26awu\.cas\.org\, "Larry W. Virden" writes: :While trying to build the htpl package\, I received this core dump from :perl: : :$ /ldatae/gnu/perl*/perl htpl-crp.pl :assertion botched (chunk's tail overwrite?): *(unsigned int *)((caddr_t)ovp + :nbytes - sizeof (unsigned int)) == 0x55555555 [...]
This still occurs with @6713.
I've been having a hell of a time trying to debug this. I suspect\, but have not been able to confirm\, that the problem is caused by pp_concat()\, in some subset of the cases when TARG == right\, left is not UTF\, right is UTF.
I've been trying to cut the code down to a more usable test case: currently down to 288 lines of code + XML::Parser\, and a 48 line input file. I'll keep working on it\, but if anyone wants what I've done so far I'll be happy to pass it over.
Is there any easy way to turn on UTF8 on a string?
Hugo
In \200008191857\.TAA09579@​crypt\.compulink\.co\.uk\, I wrote: :I've been trying to cut the code down to a more usable test case: :currently down to 288 lines of code + XML::Parser\, and a 48 line :input file. I'll keep working on it\, but if anyone wants what I've :done so far I'll be happy to pass it over. : :Is there any easy way to turn on UTF8 on a string?
Thanks to Spider for the hint of 'pack("U0a*"\, $string)'.
I managed to get the test case down to this:
#!/usr/local/bin/perl
use strict vars;
sub r2 {
my $s = '' . pack "U0a*"\, 'mnopqrstuvwx';
$s = "abcdefghijkl" . $s;
}
r2($_) for 0\, 1;
This occurred because the wrong string was being freed in pp_concat. Attached patch fixes this\, and tests the fix.
I also noticed that in some branches the pp_concat code only copied until it saw a nul byte; the patch also fixes and tests this.
Hugo
On Sun\, Aug 20\, 2000 at 07:30:46AM +0100\, Hugo wrote:
In \200008191857\.TAA09579@​crypt\.compulink\.co\.uk\, I wrote: :I've been trying to cut the code down to a more usable test case: :currently down to 288 lines of code + XML::Parser\, and a 48 line :input file. I'll keep working on it\, but if anyone wants what I've :done so far I'll be happy to pass it over. : :Is there any easy way to turn on UTF8 on a string?
Thanks to Spider for the hint of 'pack("U0a*"\, $string)'.
I managed to get the test case down to this: #!/usr/local/bin/perl use strict vars; sub r2 { my $s = '' . pack "U0a*"\, 'mnopqrstuvwx'; $s = "abcdefghijkl" . $s; }
r2($_) for 0\, 1;This occurred because the wrong string was being freed in pp_concat. Attached patch fixes this\, and tests the fix.
Applied\, thanks.
Hugo \hv@​crypt\.compulink\.co\.uk wrote
Is there any easy way to turn on UTF8 on a string?
If you mean "How can I create a string with the UTF8 flag set but with all its characters less than 256?"\, then substr() is your friend:
DB\<7> $x = substr "abc\x{100}"\, -1
DB\<8> Dump $x SV = PV(0x10a2f8) at 0x20dea8 REFCNT = 1 FLAGS = (POK\,pPOK\,UTF8) PV = 0xf49f8 "\304\200"\0 CUR = 2 LEN = 6
DB\<9>
Of course\, that's making assumptions about the implementation which might not remain true.
OTOH\, if you mean "How can I force the UTF8 flag on?"\, that shouldn't be possible with a Perl program. Otherwise there's a bug.
Mike Guy
On Wed\, Aug 23\, 2000 at 04:55:43PM +0100\, Mike Guy wrote:
OTOH\, if you mean "How can I force the UTF8 flag on?"\, that shouldn't be possible with a Perl program. Otherwise there's a bug.
Uhm\, the behaviour of pack("U0C*"\,$foo) is very\, very deliberate.
Seems a harmless change that could be useful.
On Fri\, Nov 17\, 2000 at 08:57:45AM -0500\, Stephen P. Potter wrote:
Seems a harmless change that could be useful.
Thanks\, applied. Though I can't see how this relates to 20000815.006...?
--- perlmodlib.PLo Tue Nov 14 10:27:45 2000 +++ perlmodlib.PL Tue Nov 14 10:30:02 2000 @@ -580\,6 +580\,12 @@ standards for naming modules and the interface to methods in those modules.
+If developing modules for private internal or project specific use\, +that will never be released to the public\, then you should ensure +that their names will not clash with any future public module. You +can do this either by using the reserved Local::* category or by +using a category name that includes an underscore like Foo_Corp::*. + To be portable each component of a module name should be limited to 11 characters. If it might be used on MS-DOS then try to ensure each is unique in the first 8 characters. Nested modules make this easier.
Lightning flashed\, thunder crashed and Jarkko Hietaniemi \jhi@​iki\.fi whispered : | On Fri\, Nov 17\, 2000 at 08:57:45AM -0500\, Stephen P. Potter wrote: | > Seems a harmless change that could be useful. |
---|---|---|
Thanks\, applied. Though I can't see how this relates to 20000815.006...? |
Dyslexia.... That should have been 20000518.006.
-spp
Migrated from rt.perl.org#3694 (status was 'resolved')
Searchable as RT3694$