Perl / perl5

🐪 The Perl programming language
https://dev.perl.org/perl5/
Other
1.85k stars 527 forks source link

allow space or tab before unquoted string terminators in addition to before quoted ones #9773

Closed p5pRT closed 14 years ago

p5pRT commented 14 years ago

Migrated from rt.perl.org#66834 (status was 'rejected')

Searchable as RT66834$

p5pRT commented 14 years ago

From @davidnicol

Created by @davidnicol

I should have fixed this a decade ago\, but I fixed it tonight.

Perl's commitment to allowing white space most anywhere has been broken in the case of here-document string terminators without quotes\, such as

  print \<\< blah;   test   blah

which is a syntax error\, while

  print \<\< "blah";   test   blah

prints "test\n" as desired.

I think I reported this a long time ago and someone agreed that it was a bug\, but I couldn't find my report in rt so here's a new ticket.

And a patch to duplicate the existing logic for skipping spaces and tabs before the quoted terminators for bare terminators and backslash-prefaced terminators\, consisting of two new lines in toke.c (not dependent on my other work there\, but the comment in the earlier patch does show up in this one) and two new tests\, expected to go in heredoc.t which doesn't exist in origin yet.

Please note the use of test.pl's C\ method and string eval as a method for testing syntax which without repair simply doesn't parse. Is what I have done the standard way to do that?

Inline Patch ```diff diff --git a/t/op/heredoc.t b/t/op/heredoc.t index b1ab684..607b19f 100644 --- a/t/op/heredoc.t +++ b/t/op/heredoc.t @@ -7,7 +7,7 @@ BEGIN { } use strict; -plan(tests => 6); +plan(tests => 8); # heredoc without newline (#65838) @@ -65,3 +65,19 @@ HEREDOC "long terminator fails correctly" ); } + +# allow whitespace before the termstring +is( << 'WITH', <<'WITHOUT', "allow SPACE_OR_TAB before quoted terminator"); +test +WITH +test +WITHOUT + +eval <<\SYNTAX or fail( "allow SPACE_OR_TAB before terminator"); +is( << WITH, <
Perl Info ``` Flags: category=core severity=low This perlbug was built using Perl v5.8.5 in the Red Hat build system. It is being executed now by Perl v5.8.5 - Mon Jul 24 18:27:47 EDT 2006. Site configuration information for perl v5.8.5: Configured by Red Hat, Inc. at Mon Jul 24 18:27:47 EDT 2006. Summary of my perl5 (revision 5 version 8 subversion 5) configuration: Platform: osname=linux, osvers=2.6.9-22.18.bz155725.el, archname=s390x-linux-thread-multi uname='linux spark.z900.redhat.com 2.6.9-22.18.bz155725.el #1 smp thu nov 17 15:25:33 est 2005 s390x s390x s390x gnulinux ' config_args='-des -Doptimize=-O2 -g -pipe -m64 -Dversion=5.8.5 -Dmyhostname=localhost -Dperladmin=root@localhost -Dcc=gcc -Dcf_by=Red Hat, Inc. -Dinstallprefix=/usr -Dprefix=/usr -Dlibpth=/usr/local/lib64 /lib64 /usr/lib64 -Dprivlib=/usr/lib/perl5/5.8.5 -Dsitelib=/usr/lib/perl5/site_perl/5.8.5 -Dvendorlib=/usr/lib/perl5/vendor_perl/5.8.5 -Darchlib=/usr/lib64/perl5/5.8.5/s390x-linux-thread-multi -Dsitearch=/usr/lib64/perl5/site_perl/5.8.5/s390x-linux-thread-multi -Dvendorarch=/usr/lib64/perl5/vendor_perl/5.8.5/s390x-linux-thread-multi -Darchname=s390x-linux -Dvendorprefix=/usr -Dsiteprefix=/usr -Duseshrplib -Dusethreads -Duseithreads -Duselargefiles -Dd_dosuid -Dd_semctl_semun -Di_db -Ui_ndbm -Di_gdbm -Di_shadow -Di_syslog -Dman3ext=3pm -Duseperlio -Dinstallusrbinperl -Ubincompat5005 -Uversiononly -Dpager=/usr/bin/less -isr -Dinc_version_list=5.8.4 5.8.3 5.8.2 5.8.1 5.8.0' hint=recommended, useposix=true, d_sigaction=define usethreads=define use5005threads=undef useithreads=define usemultiplicity=define useperlio=define d_sfio=undef uselargefiles=define usesocks=undef use64bitint=define use64bitall=define uselongdouble=undef usemymalloc=n, bincompat5005=undef Compiler: cc='gcc', ccflags ='-D_REENTRANT -D_GNU_SOURCE -DDEBUGGING -fno-strict-aliasing -pipe -I/usr/local/include -D_LARGEFILE_SOURCE -D_FILE_OFFSET_BITS=64 -I/usr/include/gdbm', optimize='-O2 -g -pipe -m64', cppflags='-D_REENTRANT -D_GNU_SOURCE -DDEBUGGING -fno-strict-aliasing -pipe -I/usr/local/include -I/usr/include/gdbm' ccversion='', gccversion='3.4.6 20060404 (Red Hat 3.4.6-2)', gccosandvers='' intsize=4, longsize=8, ptrsize=8, doublesize=8, byteorder=87654321 d_longlong=define, longlongsize=8, d_longdbl=define, longdblsize=8 ivtype='long', ivsize=8, nvtype='double', nvsize=8, Off_t='off_t', lseeksize=8 alignbytes=8, prototype=define Linker and Libraries: ld='gcc', ldflags ='' libpth=/usr/local/lib64 /lib64 /usr/lib64 libs=-lresolv -lnsl -lgdbm -ldb -ldl -lm -lcrypt -lutil -lpthread -lc perllibs=-lresolv -lnsl -ldl -lm -lcrypt -lutil -lpthread -lc libc=/lib/libc-2.3.3.so, so=so, useshrplib=true, libperl=libperl.so gnulibc_version='2.3.4' Dynamic Linking: dlsrc=dl_dlopen.xs, dlext=so, d_dlsymun=undef, ccdlflags='-Wl,-E -Wl,-rpath,/usr/lib64/perl5/5.8.5/s390x-linux-thread-multi/CORE' cccdlflags='-fPIC', lddlflags='-shared' Locally applied patches: @INC for perl v5.8.5: /usr/lib64/perl5/5.8.5/s390x-linux-thread-multi /usr/lib/perl5/5.8.5 /usr/lib64/perl5/site_perl/5.8.5/s390x-linux-thread-multi /usr/lib64/perl5/site_perl/5.8.4/s390x-linux-thread-multi /usr/lib64/perl5/site_perl/5.8.3/s390x-linux-thread-multi /usr/lib64/perl5/site_perl/5.8.2/s390x-linux-thread-multi /usr/lib64/perl5/site_perl/5.8.1/s390x-linux-thread-multi /usr/lib64/perl5/site_perl/5.8.0/s390x-linux-thread-multi /usr/lib/perl5/site_perl/5.8.5 /usr/lib/perl5/site_perl/5.8.4 /usr/lib/perl5/site_perl/5.8.3 /usr/lib/perl5/site_perl/5.8.2 /usr/lib/perl5/site_perl/5.8.1 /usr/lib/perl5/site_perl/5.8.0 /usr/lib/perl5/site_perl /usr/lib64/perl5/vendor_perl/5.8.5/s390x-linux-thread-multi /usr/lib64/perl5/vendor_perl/5.8.4/s390x-linux-thread-multi /usr/lib64/perl5/vendor_perl/5.8.3/s390x-linux-thread-multi /usr/lib64/perl5/vendor_perl/5.8.2/s390x-linux-thread-multi /usr/lib64/perl5/vendor_perl/5.8.1/s390x-linux-thread-multi /usr/lib64/perl5/vendor_perl/5.8.0/s390x-linux-thread-multi /usr/lib/perl5/vendor_perl/5.8.5 /usr/lib/perl5/vendor_perl/5.8.4 /usr/lib/perl5/vendor_perl/5.8.3 /usr/lib/perl5/vendor_perl/5.8.2 /usr/lib/perl5/vendor_perl/5.8.1 /usr/lib/perl5/vendor_perl/5.8.0 /usr/lib/perl5/vendor_perl . Environment for perl v5.8.5: HOME=/root LANG=en_US.UTF-8 LANGUAGE (unset) LD_LIBRARY_PATH (unset) LOGDIR (unset) PATH=/usr/kerberos/sbin:/usr/kerberos/bin:/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin:/usr/X11R6/bin:/root/bin PERL_BADLANG (unset) SHELL=/bin/bash ```
p5pRT commented 14 years ago

From @tamias

On Sun\, Jun 21\, 2009 at 10​:25​:43PM -0700\, David Nicol wrote​:

I should have fixed this a decade ago\, but I fixed it tonight.

Perl's commitment to allowing white space most anywhere has been broken in the case of here-document string terminators without quotes\, such as

print \<\< blah; test blah

which is a syntax error\, while

print \<\< "blah"; test blah

prints "test\n" as desired.

I think I reported this a long time ago and someone agreed that it was a bug\, but I couldn't find my report in rt so here's a new ticket.

That is actually the documented behavior.

perldoc perlop​:

  \<\<EOF

  ... There must be no space between the "\<\<" and the   identifier\, unless the identifier is quoted. (If you put a   space it will be treated as a null identifier\, which is valid\,   and matches the first empty line.) ...

Ronald

p5pRT commented 14 years ago

The RT System itself - Status changed from 'new' to 'open'

p5pRT commented 14 years ago

From @davidnicol

On Thu\, Jul 2\, 2009 at 8​:13 PM\, Ronald J Kimball\rjk\-perl\-p5p@&#8203;tamias\.net wrote​:

That is actually the documented behavior.

perldoc perlop​:

   \<\<EOF

      ... There must be no space between the "\<\<" and the       identifier\, unless the identifier is quoted.  (If you put a       space it will be treated as a null identifier\, which is valid\,       and matches the first empty line.) ...

Ronald

Hmm. No hint of deprecation of the unquoted null identifier in that paragraph.

Before I go and include a touch to perlop.pod in an addendum to my Big Heredoc Omnibus Patch\, is there any situation where HEREDOC WHITESPACE BAREWORD makes sense? I have not been able to contrive one\, the "juxtaposition operator" having been rejected in http​://dev.perl.org/perl6/doc/design/apo/A03.html

until now​:

  push @​pieces\, split \<\< while (@​pieces \< 20)   padding padding

is that worth saving?

also there might be variables interpolated in the doc with Tie​::Function type side effects.

p5pRT commented 14 years ago

@rgs - Status changed from 'open' to 'rejected'

p5pRT commented 14 years ago

From @davidnicol

Are there any situations where HEREDOC WHITESPACE BAREWORD makes sense and BAREWORD is not in the following short list?

  and eq if for foreach ge gt le lt ne or unless until while x

Everything else always binds rightward\, and there is still no way to define infix subroutines.

What I'd like to do is check the possible terminator against that list and treat the situation like the implied null terminator (issue an optional syntax warning that does not mention deprecation) when there is a match\, otherwise treat the unlisted bareword as the terminator.

p5pRT commented 14 years ago

From @davidnicol

On Mon\, Jul 6\, 2009 at 5​:35 PM\, David Nicol\davidnico@&#8203;cpan\.org wrote​:

and eq if for foreach ge gt le lt ne or unless until while x

also cmp. Here's a revised patch.

http​://tipjar.com/perlhacking/heredocpatch_revised.txt

-- Gaming Edison's ratio by shunning air conditioning doesn't actually work.

p5pRT commented 14 years ago

From @davidnicol

Inline Patch ```diff diff --git a/MANIFEST b/MANIFEST index 8006cd7..628e586 100644 --- a/MANIFEST +++ b/MANIFEST @@ -4141,6 +4141,7 @@ t/op/gv.t See if typeglobs work t/op/hashassign.t See if hash assignments work t/op/hash.t See if the complexity attackers are repelled t/op/hashwarn.t See if warnings for bad hash assignments work +t/op/heredoc.t See if heredoc edge and corner cases work t/op/inccode.t See if coderefs work in @INC t/op/inccode-tie.t See if tie to @INC works t/op/incfilter.t See if the source filters in coderef-in-@INC work diff --git a/pod/perlop.pod b/pod/perlop.pod index f52f79f..0170202 100644 --- a/pod/perlop.pod +++ b/pod/perlop.pod @@ -1709,10 +1709,14 @@ the terminating string are the value of the item. The terminating string may be either an identifier (a word), or some quoted text. An unquoted identifier works like double quotes. -There may not be a space between the C<< << >> and the identifier, -unless the identifier is explicitly quoted. (If you put a space it -will be treated as a null identifier, which is valid, and matches the -first empty line.) The terminating string must appear by itself + +Prior to perl version 5.12, any space between +the C<< << >> and an unquoted alphanumeric identifier was treated +as the null identifier, which is valid, and matches the +first empty line, but now space is allowed before unquoted identifiers +as long as they are not infix keywords. + +The terminating string must appear by itself (unquoted and with no surrounding whitespace) on the terminating line. If the terminating string is quoted, the type of quotes used determine @@ -1746,6 +1750,9 @@ This is the only form of quoting in perl where there is no need to worry about escaping content, something that code generators can and do make good use of. +A bare terminator prefixed with a backslash also triggers this +quoting mode. + =item Backticks The content of the here doc is treated just as it would be if the @@ -1798,8 +1805,8 @@ you'll need to remove leading whitespace from each line manually: down from the door where it began. FINIS -If you use a here-doc within a delimited construct, such as in C, -the quoted material must come on the lines following the final delimiter. +When using a here-doc within a delimited construct, such as in C, +the quoted material starts on the line following the final delimiter. So instead of s/this/<. - Additionally, the quoting rules for the end of string identifier are not related to Perl's quoting rules -- C, C, and the like are not supported in place of C<''> and C<"">, and the only interpolation is for diff --git a/t/lib/warnings/toke b/t/lib/warnings/toke index e5ca400..caddeb5 100644 --- a/t/lib/warnings/toke +++ b/t/lib/warnings/toke @@ -141,14 +141,14 @@ Use of comma-less variable list is deprecated at - line 5. Use of comma-less variable list is deprecated at - line 5. ######## # toke.c -use warnings 'deprecated' ; +use warnings 'syntax' ; $a = <<; -no warnings 'deprecated' ; +no warnings 'syntax' ; $a = <<; EXPECT -Use of bare << to mean <<"" is deprecated at - line 3. + << interpreted as heredoc with null terminator at - line 3. ######## # toke.c use warnings 'syntax' ; diff --git a/t/test.pl b/t/test.pl index 32c4a37..e96d2c7 100644 --- a/t/test.pl +++ b/t/test.pl @@ -660,8 +660,8 @@ sub _fresh_perl { my($prog, $resolve, $runperl_args, $name) = @_; $runperl_args ||= {}; - $runperl_args->{progfile} = $tmpfile; - $runperl_args->{stderr} = 1; + $runperl_args->{progfile} ||= $tmpfile; + $runperl_args->{stderr} = 1 unless exists $runperl_args->{stderr}; open TEST, ">$tmpfile" or die "Cannot open $tmpfile: $!"; @@ -698,12 +698,6 @@ sub _fresh_perl { } my $pass = $resolve->($results); - unless ($pass) { - _diag "# PROG: \n$prog\n"; - _diag "# EXPECTED:\n", $resolve->(), "\n"; - _diag "# GOT:\n$results\n"; - _diag "# STATUS: $status\n"; - } # Use the first line of the program as a name if none was given unless( $name ) { @@ -712,6 +706,15 @@ sub _fresh_perl { } _ok($pass, _where(), "fresh_perl - $name"); + + unless ($pass) { + _diag "# PROG: \n$prog\n"; + _diag "# EXPECTED:\n", $resolve->(), "\n"; + _diag "# GOT:\n$results\n"; + _diag "# STATUS: $status\n"; + } + + return $pass; } # diff --git a/toke.c b/toke.c index a15dca6..0342ced 100644 --- a/toke.c +++ b/toke.c @@ -11240,6 +11240,29 @@ S_scan_trans(pTHX_ char *start) return s; } +/* and cmp eq if for foreach ge gt le lt ne or unless until while x */ +#define LeftBindingBareword(p) (\ +(*p == 'x' && !isALNUM_lazy_if(&p[1],UTF)) ||\ +(*p == 'e' && p[1] == 'q' && !isALNUM_lazy_if(&p[2],UTF)) ||\ +(*p == 'i' && p[1] == 'f' && !isALNUM_lazy_if(&p[2],UTF)) ||\ +(*p == 'g' && p[1] == 'e' && !isALNUM_lazy_if(&p[2],UTF)) ||\ +(*p == 'g' && p[1] == 't' && !isALNUM_lazy_if(&p[2],UTF)) ||\ +(*p == 'l' && p[1] == 'e' && !isALNUM_lazy_if(&p[2],UTF)) ||\ +(*p == 'l' && p[1] == 't' && !isALNUM_lazy_if(&p[2],UTF)) ||\ +(*p == 'n' && p[1] == 'e' && !isALNUM_lazy_if(&p[2],UTF)) ||\ +(*p == 'o' && p[1] == 'r' && !isALNUM_lazy_if(&p[2],UTF)) ||\ +(*p == 'a' && p[1] == 'n' && p[2] == 'd' && !isALNUM_lazy_if(&p[3],UTF)) ||\ +(*p == 'c' && p[1] == 'm' && p[2] == 'p' && !isALNUM_lazy_if(&p[3],UTF)) ||\ +(*p == 'f' && p[1] == 'o' && p[2] == 'r' && (!isALNUM_lazy_if(&p[3],UTF) ||\ + (p[3] == 'e' && p[4] == 'a' && p[5] == 'c' && p[6] == 'h' && \ + !isALNUM_lazy_if(&p[3],UTF)))) ||\ +(*p == 'u' && p[1] == 'n' && ( \ + (p[2] == 't' && p[3]=='i' && p[4]=='l' && !isALNUM_lazy_if(&p[3],UTF)) || \ + (p[2] == 'l' && p[3]=='e' && p[4]=='s' && p[5] == 's' && \ + !isALNUM_lazy_if(&p[6],UTF)))) ||\ +(*p == 'w' && p[1] == 'h' && p[2] == 'i' && p[3]== 'l' && p[4] == 'e' && \ + !isALNUM_lazy_if(&p[5],UTF)) \ +) STATIC char * S_scan_heredoc(pTHX_ register char *s) @@ -11268,25 +11291,29 @@ S_scan_heredoc(pTHX_ register char *s) d = PL_tokenbuf; e = PL_tokenbuf + sizeof PL_tokenbuf - 1; if (!outer) - *d++ = '\n'; + *d++ = '\n'; peek = s; while (SPACE_OR_TAB(*peek)) - peek++; + peek++; if (*peek == '`' || *peek == '\'' || *peek =='"') { - s = peek; - term = *s++; - s = delimcpy(d, e, s, PL_bufend, term, &len); - d += len; - if (s < PL_bufend) - s++; + s = peek; + term = *s++; + s = delimcpy(d, e, s, PL_bufend, term, &len); + d += len; + if (s < PL_bufend) + s++; } else { + if (*peek == '\\' || ( isALNUM_lazy_if(peek,UTF) && !LeftBindingBareword(peek))) + s = peek; if (*s == '\\') + /* <<\FOO is equivalent to <<'FOO' */ s++, term = '\''; else term = '"'; - if (!isALNUM_lazy_if(s,UTF)) - deprecate_old("bare << to mean <<\"\""); + if (!isALNUM_lazy_if(s,UTF) && ckWARN(WARN_SYNTAX)) + Perl_warner(aTHX_ packWARN(WARN_SYNTAX), + " << interpreted as heredoc with null terminator"); for (; isALNUM_lazy_if(s,UTF); s++) { if (d < e) *d++ = *s; @@ -11448,8 +11475,30 @@ S_scan_heredoc(pTHX_ register char *s) #endif if (!outer || !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = filter_gets(PL_linestr, PL_rsfp, 0))) { - CopLINE_set(PL_curcop, (line_t)PL_multi_start); - missingterm(PL_tokenbuf); + I32 len_without; + /* in other words, we are out of input */ + /* maybe the string terminator didn't have a newline? */ + sv_catpvn(tmpstr,"\n",1); /* add a newline */ + len_without = SvCUR(tmpstr) - len; + s = SvPVX(tmpstr) + len_without; + if ( len_without > -1 /* there has been enough data */ + && *(s-1) == '\n' /* the terminator starts at line-begin */ + && *s == term /* the terminator starts correctly */ + && memEQ(s,PL_tokenbuf,len) /* and checks out okay */ + ) + { + /* chop the quoted heredoc at the right place */ + SvCUR_set(tmpstr,len_without); + /* pretend we got the string terminator from filter_gets */ + sv_setpvn(PL_linestr,s,len); + PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);; + } + else + { + /* No, we have reached EOF without finding the terminator. */ + CopLINE_set(PL_curcop, (line_t)PL_multi_start); + missingterm(PL_tokenbuf); + } } #ifdef PERL_MAD stuffstart = s - SvPVX(PL_linestr); ```