Closed p5pRT closed 20 years ago
This should be a nice easy job for someone.
$a=v120.300; $b = v200.400; $a^=$a;
This'll put illegal UTF8 characters in $a\, and here's why.
Take a look at Perl_do_vop in doop.c of cfgperl.
We're reading from the PV in left\, which we store in lc\, and the PV in right\, which we store in rc\, and we're or'ing each character. We put the result in the PV of the target\, sv\, which we call dc.
With me so far? So here's what happens:
120 ^ 200 = 176 200 ^ 300 = 188
So\, the first character of dc becomes 176\, but we're in Unicode now\, so that encodes as \302\260. You'll note this is two bytes.
Here's the kicker: Since the target is also the left-hand side of the op\, ($a = $a^$b) we've just written two bytes to the start of the string. And since the original character\, 120\, was only one byte\, we've scribbled over the start of the next character!
To fix this: if (sv==left | sv==right) * Keep dc as a new\, separate string until we've finished * Set SvPV(sv) to dc when we're done * Free whichever of lc or rc it replaces.
In message \20000714151309\.7170\.qmail@​othersideofthe\.earth\.li "Simon Cozens" \simon@​othersideofthe\.earth\.li wrote:
To fix this: if (sv==left | sv==right) * Keep dc as a new\, separate string until we've finished * Set SvPV(sv) to dc when we're done * Free whichever of lc or rc it replaces.
Something like this you mean:
diff -c 'perl-5.6.0/doop.c' 'perl-20000714-002/doop.c' Index: ./doop.c *** ./doop.c Sun Mar 12 03:36:32 2000 --- ./doop.c Fri Jul 14 23:56:54 2000 *************** *** 1062\,1067 **** --- 1062\,1068 ---- char *rsave; bool left_utf = DO_UTF8(left); bool right_utf = DO_UTF8(right); + I32 needlen;
if (left_utf && !right_utf) sv_utf8_upgrade(right); *************** *** 1074\,1090 **** rsave = rc = SvPV(right\, rightlen); len = leftlen \< rightlen ? leftlen : rightlen; lensave = len; ! if (SvOK(sv) || SvTYPE(sv) > SVt_PVMG) { STRLEN n_a; dc = SvPV_force(sv\, n_a); if (SvCUR(sv) \< len) { dc = SvGROW(sv\, len + 1); (void)memzero(dc + SvCUR(sv)\, len - SvCUR(sv) + 1); } } else { ! I32 needlen = ((optype == OP_BIT_AND) ! ? len : (leftlen > rightlen ? leftlen : rightlen)); Newz(801\, dc\, needlen + 1\, char); (void)sv_usepvn(sv\, dc\, needlen); dc = SvPVX(sv); /* sv_usepvn() calls Renew() */ --- 1075\,1097 ---- rsave = rc = SvPV(right\, rightlen); len = leftlen \< rightlen ? leftlen : rightlen; lensave = len; ! if ((left_utf || right_utf) && (sv == left || sv == right)) { ! needlen = optype == OP_BIT_AND ? len : leftlen + rightlen; ! Newz(801\, dc\, needlen + 1\, char); ! } ! else if (SvOK(sv) || SvTYPE(sv) > SVt_PVMG) { STRLEN n_a; dc = SvPV_force(sv\, n_a); if (SvCUR(sv) \< len) { dc = SvGROW(sv\, len + 1); (void)memzero(dc + SvCUR(sv)\, len - SvCUR(sv) + 1); } + if (optype != OP_BIT_AND && (left_utf || right_utf)) + dc = SvGROW(sv\, leftlen + rightlen + 1); } else { ! needlen = ((optype == OP_BIT_AND) ! ? len : (leftlen > rightlen ? leftlen : rightlen)); Newz(801\, dc\, needlen + 1\, char); (void)sv_usepvn(sv\, dc\, needlen); dc = SvPVX(sv); /* sv_usepvn() calls Renew() */ *************** *** 1093\,1106 **** (void)SvPOK_only(sv); if (left_utf || right_utf) { UV duc\, luc\, ruc; STRLEN lulen = leftlen; STRLEN rulen = rightlen; - STRLEN dulen = 0; I32 ulen;
- if (optype != OP_BIT_AND) - dc = SvGROW(sv\, leftlen+rightlen+1); - switch (optype) { case OP_BIT_AND: while (lulen && rulen) { --- 1100\,1110 ---- (void)SvPOK_only(sv); if (left_utf || right_utf) { UV duc\, luc\, ruc; + char *dcsave = dc; STRLEN lulen = leftlen; STRLEN rulen = rightlen; I32 ulen;
switch (optype) { case OP_BIT_AND: while (lulen && rulen) { *************** *** 1113\,1120 **** duc = luc & ruc; dc = (char*)uv_to_utf8((U8*)dc\, duc); } ! dulen = dc - SvPVX(sv); ! SvCUR_set(sv\, dulen); break; case OP_BIT_XOR: while (lulen && rulen) { --- 1117\,1125 ---- duc = luc & ruc; dc = (char*)uv_to_utf8((U8*)dc\, duc); } ! if (sv == left || sv == right) ! (void)sv_usepvn(sv\, dcsave\, needlen); ! SvCUR_set(sv\, dc - dcsave); break; case OP_BIT_XOR: while (lulen && rulen) { *************** *** 1140\,1147 **** dc = (char*)uv_to_utf8((U8*)dc\, duc); } mop_up_utf: ! dulen = dc - SvPVX(sv); ! SvCUR_set(sv\, dulen); if (rulen) sv_catpvn(sv\, rc\, rulen); else if (lulen) --- 1145\,1153 ---- dc = (char*)uv_to_utf8((U8*)dc\, duc); } mop_up_utf: ! if (sv == left || sv == right) ! (void)sv_usepvn(sv\, dcsave\, needlen); ! SvCUR_set(sv\, dc - dcsave); if (rulen) sv_catpvn(sv\, rc\, rulen); else if (lulen) diff -c 'perl-5.6.0/t/op/bop.t' 'perl-20000714-002/t/op/bop.t' Index: ./t/op/bop.t *** ./t/op/bop.t Mon Feb 28 09:38:33 2000 --- ./t/op/bop.t Sat Jul 15 00:07:03 2000 *************** *** 9\,15 **** unshift @INC\, '../lib'; }
! print "1..30\n";
# numerics print ((0xdead & 0xbeef) == 0x9ead ? "ok 1\n" : "not ok 1\n"); --- 9\,15 ---- unshift @INC\, '../lib'; }
! print "1..35\n";
# numerics print ((0xdead & 0xbeef) == 0x9ead ? "ok 1\n" : "not ok 1\n"); *************** *** 81\,83 **** --- 81\,98 ---- print "ok 28\n" if sprintf("%vd"\, v4095.801.4095 & v801.4095) eq '801.801'; print "ok 29\n" if sprintf("%vd"\, v4095.801.4095 | v801.4095) eq '4095.4095.4095'; print "ok 30\n" if sprintf("%vd"\, v801.4095 ^ v4095.801.4095) eq '3294.3294.4095'; + + # + print "ok 31\n" if sprintf("%vd"\, v120.v300 & v200.v400) eq '72.256'; + print "ok 32\n" if sprintf("%vd"\, v120.v300 | v200.v400) eq '248.444'; + print "ok 33\n" if sprintf("%vd"\, v120.v300 ^ v200.v400) eq '176.188'; + + # + my $a = v120.300; + my $b = v200.400; + $a ^= $b; + print "ok 34\n" if sprintf("%vd"\, $a) eq '176.188'; + my $a = v120.300; + my $b = v200.400; + $a |= $b; + print "ok 35\n" if sprintf("%vd"\, $a) eq '248.444';
Tom
Tom Hughes (lists.p5p):
In message \20000714151309\.7170\.qmail@​othersideofthe\.earth\.li "Simon Cozens" \simon@​othersideofthe\.earth\.li wrote:
To fix this: if (sv==left | sv==right) * Keep dc as a new\, separate string until we've finished * Set SvPV(sv) to dc when we're done * Free whichever of lc or rc it replaces.
Something like this you mean:
A bit like that\, yes. Thanks!
Bug-people\, close that ticket!
In message \slrn8n07i0\.viv\.simon@​justanother\.perlhacker\.org simon@brecon.co.uk (Simon Cozens) wrote:
Tom Hughes (lists.p5p):
Something like this you mean:
A bit like that\, yes. Thanks!
Bug-people\, close that ticket!
Done.
Tom
On Sat\, Jul 15\, 2000 at 12:21:56AM +0100\, Tom Hughes wrote:
In message \20000714151309\.7170\.qmail@​othersideofthe\.earth\.li "Simon Cozens" \simon@​othersideofthe\.earth\.li wrote:
To fix this: if (sv==left | sv==right) * Keep dc as a new\, separate string until we've finished * Set SvPV(sv) to dc when we're done * Free whichever of lc or rc it replaces.
Something like this you mean:
In it is.
Migrated from rt.perl.org#3505 (status was 'resolved')
Searchable as RT3505$