Perl / perl5

🐪 The Perl programming language
https://dev.perl.org/perl5/
Other
1.91k stars 542 forks source link

No subject provided #2206

Closed p5pRT closed 20 years ago

p5pRT commented 24 years ago

Migrated from rt.perl.org#3505 (status was 'resolved')

Searchable as RT3505$

p5pRT commented 24 years ago

From @simoncozens

This should be a nice easy job for someone.

  $a=v120.300; $b = v200.400;   $a^=$a;

This'll put illegal UTF8 characters in $a\, and here's why.

Take a look at Perl_do_vop in doop.c of cfgperl.

We're reading from the PV in left\, which we store in lc\, and the PV in right\, which we store in rc\, and we're or'ing each character. We put the result in the PV of the target\, sv\, which we call dc.

With me so far? So here's what happens​:

  120 ^ 200 = 176   200 ^ 300 = 188

So\, the first character of dc becomes 176\, but we're in Unicode now\, so that encodes as \302\260. You'll note this is two bytes.

Here's the kicker​: Since the target is also the left-hand side of the op\, ($a = $a^$b) we've just written two bytes to the start of the string. And since the original character\, 120\, was only one byte\, we've scribbled over the start of the next character!

To fix this​: if (sv==left | sv==right)   * Keep dc as a new\, separate string until we've finished   * Set SvPV(sv) to dc when we're done   * Free whichever of lc or rc it replaces.

Perl Info ``` Flags: category=core severity=low Site configuration information for perl v5.6.0: Configured by root at Thu Apr 13 08:15:45 JST 2000. Summary of my perl5 (revision 5.0 version 6 subversion 0) configuration: Platform: osname=linux, osvers=2.3.99-pre3, archname=i686-linux uname='linux othersideofthe.earth.li 2.3.99-pre3 #3 smp sat mar 25 23:54:38 jst 2000 i686 unknown ' config_args='-d' hint=recommended, useposix=true, d_sigaction=define usethreads=undef use5005threads=undef useithreads=undef usemultiplicity=undef useperlio=undef d_sfio=undef uselargefiles=define use64bitint=undef use64bitall=undef uselongdouble=undef usesocks=undef Compiler: cc='cc', optimize='-O2', gccversion=2.95.2 20000313 (Debian GNU/Linux) cppflags='-fno-strict-aliasing -I/usr/local/include' ccflags ='-fno-strict-aliasing -I/usr/local/include -D_LARGEFILE_SOURCE -D_FILE_OFFSET_BITS=64' stdchar='char', d_stdstdio=define, usevfork=false intsize=4, longsize=4, ptrsize=4, doublesize=8 d_longlong=define, longlongsize=8, d_longdbl=define, longdblsize=12 ivtype='long', ivsize=4, nvtype='double', nvsize=8, Off_t='off_t', lseeksize=8 alignbytes=4, usemymalloc=n, prototype=define Linker and Libraries: ld='cc', ldflags =' -L/usr/local/lib' libpth=/usr/local/lib /lib /usr/lib libs=-lnsl -lndbm -lgdbm -ldb -ldl -lm -lc -lposix -lcrypt libc=/lib/libc-2.1.3.so, so=so, useshrplib=false, libperl=libperl.a Dynamic Linking: dlsrc=dl_dlopen.xs, dlext=so, d_dlsymun=undef, ccdlflags='-rdynamic' cccdlflags='-fpic', lddlflags='-shared -L/usr/local/lib' Locally applied patches: @INC for perl v5.6.0: /usr/local/lib/perl5/5.6.0/i686-linux /usr/local/lib/perl5/5.6.0 /usr/local/lib/perl5/site_perl/5.6.0/i686-linux /usr/local/lib/perl5/site_perl/5.6.0 /usr/local/lib/perl5/site_perl/5.5.670 /usr/local/lib/perl5/site_perl . Environment for perl v5.6.0: HOME=/home/simon LANG (unset) LANGUAGE (unset) LD_LIBRARY_PATH (unset) LOGDIR (unset) PATH=/home/simon/bin:/bin:/usr/bin:/usr/X11R6/bin:/usr/rhs/bin:/usr/local/bin:/usr/local/sbin:/usr/sbin:/sbin:/opt/kde/bin:/home/simon/bin PERL_BADLANG (unset) SHELL=/bin/zsh ```
p5pRT commented 24 years ago

From [Unknown Contact. See original ticket]

In message \20000714151309\.7170\.qmail@​othersideofthe\.earth\.li   "Simon Cozens" \simon@​othersideofthe\.earth\.li wrote​:

To fix this​: if (sv==left | sv==right) * Keep dc as a new\, separate string until we've finished * Set SvPV(sv) to dc when we're done * Free whichever of lc or rc it replaces.

Something like this you mean​:

diff -c 'perl-5.6.0/doop.c' 'perl-20000714-002/doop.c' Index​: ./doop.c *** ./doop.c Sun Mar 12 03​:36​:32 2000 --- ./doop.c Fri Jul 14 23​:56​:54 2000 *************** *** 1062\,1067 **** --- 1062\,1068 ----   char *rsave;   bool left_utf = DO_UTF8(left);   bool right_utf = DO_UTF8(right); + I32 needlen;

  if (left_utf && !right_utf)   sv_utf8_upgrade(right); *************** *** 1074\,1090 ****   rsave = rc = SvPV(right\, rightlen);   len = leftlen \< rightlen ? leftlen : rightlen;   lensave = len; ! if (SvOK(sv) || SvTYPE(sv) > SVt_PVMG) {   STRLEN n_a;   dc = SvPV_force(sv\, n_a);   if (SvCUR(sv) \< len) {   dc = SvGROW(sv\, len + 1);   (void)memzero(dc + SvCUR(sv)\, len - SvCUR(sv) + 1);   }   }   else { ! I32 needlen = ((optype == OP_BIT_AND) ! ? len : (leftlen > rightlen ? leftlen : rightlen));   Newz(801\, dc\, needlen + 1\, char);   (void)sv_usepvn(sv\, dc\, needlen);   dc = SvPVX(sv); /* sv_usepvn() calls Renew() */ --- 1075\,1097 ----   rsave = rc = SvPV(right\, rightlen);   len = leftlen \< rightlen ? leftlen : rightlen;   lensave = len; ! if ((left_utf || right_utf) && (sv == left || sv == right)) { ! needlen = optype == OP_BIT_AND ? len : leftlen + rightlen; ! Newz(801\, dc\, needlen + 1\, char); ! } ! else if (SvOK(sv) || SvTYPE(sv) > SVt_PVMG) {   STRLEN n_a;   dc = SvPV_force(sv\, n_a);   if (SvCUR(sv) \< len) {   dc = SvGROW(sv\, len + 1);   (void)memzero(dc + SvCUR(sv)\, len - SvCUR(sv) + 1);   } + if (optype != OP_BIT_AND && (left_utf || right_utf)) + dc = SvGROW(sv\, leftlen + rightlen + 1);   }   else { ! needlen = ((optype == OP_BIT_AND) ! ? len : (leftlen > rightlen ? leftlen : rightlen));   Newz(801\, dc\, needlen + 1\, char);   (void)sv_usepvn(sv\, dc\, needlen);   dc = SvPVX(sv); /* sv_usepvn() calls Renew() */ *************** *** 1093\,1106 ****   (void)SvPOK_only(sv);   if (left_utf || right_utf) {   UV duc\, luc\, ruc;   STRLEN lulen = leftlen;   STRLEN rulen = rightlen; - STRLEN dulen = 0;   I32 ulen;

- if (optype != OP_BIT_AND) - dc = SvGROW(sv\, leftlen+rightlen+1); -   switch (optype) {   case OP_BIT_AND​:   while (lulen && rulen) { --- 1100\,1110 ----   (void)SvPOK_only(sv);   if (left_utf || right_utf) {   UV duc\, luc\, ruc; + char *dcsave = dc;   STRLEN lulen = leftlen;   STRLEN rulen = rightlen;   I32 ulen;

  switch (optype) {   case OP_BIT_AND​:   while (lulen && rulen) { *************** *** 1113\,1120 ****   duc = luc & ruc;   dc = (char*)uv_to_utf8((U8*)dc\, duc);   } ! dulen = dc - SvPVX(sv); ! SvCUR_set(sv\, dulen);   break;   case OP_BIT_XOR​:   while (lulen && rulen) { --- 1117\,1125 ----   duc = luc & ruc;   dc = (char*)uv_to_utf8((U8*)dc\, duc);   } ! if (sv == left || sv == right) ! (void)sv_usepvn(sv\, dcsave\, needlen); ! SvCUR_set(sv\, dc - dcsave);   break;   case OP_BIT_XOR​:   while (lulen && rulen) { *************** *** 1140\,1147 ****   dc = (char*)uv_to_utf8((U8*)dc\, duc);   }   mop_up_utf​: ! dulen = dc - SvPVX(sv); ! SvCUR_set(sv\, dulen);   if (rulen)   sv_catpvn(sv\, rc\, rulen);   else if (lulen) --- 1145\,1153 ----   dc = (char*)uv_to_utf8((U8*)dc\, duc);   }   mop_up_utf​: ! if (sv == left || sv == right) ! (void)sv_usepvn(sv\, dcsave\, needlen); ! SvCUR_set(sv\, dc - dcsave);   if (rulen)   sv_catpvn(sv\, rc\, rulen);   else if (lulen) diff -c 'perl-5.6.0/t/op/bop.t' 'perl-20000714-002/t/op/bop.t' Index​: ./t/op/bop.t *** ./t/op/bop.t Mon Feb 28 09​:38​:33 2000 --- ./t/op/bop.t Sat Jul 15 00​:07​:03 2000 *************** *** 9\,15 ****   unshift @​INC\, '../lib';   }

! print "1..30\n";

  # numerics   print ((0xdead & 0xbeef) == 0x9ead ? "ok 1\n" : "not ok 1\n"); --- 9\,15 ----   unshift @​INC\, '../lib';   }

! print "1..35\n";

  # numerics   print ((0xdead & 0xbeef) == 0x9ead ? "ok 1\n" : "not ok 1\n"); *************** *** 81\,83 **** --- 81\,98 ----   print "ok 28\n" if sprintf("%vd"\, v4095.801.4095 & v801.4095) eq '801.801';   print "ok 29\n" if sprintf("%vd"\, v4095.801.4095 | v801.4095) eq '4095.4095.4095';   print "ok 30\n" if sprintf("%vd"\, v801.4095 ^ v4095.801.4095) eq '3294.3294.4095'; + + # + print "ok 31\n" if sprintf("%vd"\, v120.v300 & v200.v400) eq '72.256'; + print "ok 32\n" if sprintf("%vd"\, v120.v300 | v200.v400) eq '248.444'; + print "ok 33\n" if sprintf("%vd"\, v120.v300 ^ v200.v400) eq '176.188'; + + # + my $a = v120.300; + my $b = v200.400; + $a ^= $b; + print "ok 34\n" if sprintf("%vd"\, $a) eq '176.188'; + my $a = v120.300; + my $b = v200.400; + $a |= $b; + print "ok 35\n" if sprintf("%vd"\, $a) eq '248.444';

Tom

p5pRT commented 24 years ago

From [Unknown Contact. See original ticket]

Tom Hughes (lists.p5p)​:

In message \20000714151309\.7170\.qmail@&#8203;othersideofthe\.earth\.li "Simon Cozens" \simon@&#8203;othersideofthe\.earth\.li wrote​:

To fix this​: if (sv==left | sv==right) * Keep dc as a new\, separate string until we've finished * Set SvPV(sv) to dc when we're done * Free whichever of lc or rc it replaces.

Something like this you mean​:

A bit like that\, yes. Thanks!

Bug-people\, close that ticket!

p5pRT commented 24 years ago

From [Unknown Contact. See original ticket]

In message \slrn8n07i0\.viv\.simon@&#8203;justanother\.perlhacker\.org   simon@​brecon.co.uk (Simon Cozens) wrote​:

Tom Hughes (lists.p5p)​:

Something like this you mean​:

A bit like that\, yes. Thanks!

Bug-people\, close that ticket!

Done.

Tom

p5pRT commented 24 years ago

From @jhi

On Sat\, Jul 15\, 2000 at 12​:21​:56AM +0100\, Tom Hughes wrote​:

In message \20000714151309\.7170\.qmail@&#8203;othersideofthe\.earth\.li "Simon Cozens" \simon@&#8203;othersideofthe\.earth\.li wrote​:

To fix this​: if (sv==left | sv==right) * Keep dc as a new\, separate string until we've finished * Set SvPV(sv) to dc when we're done * Free whichever of lc or rc it replaces.

Something like this you mean​:

In it is.