Closed p5pRT closed 20 years ago
This is a bug report for perl from rick@consumercontact.com\, generated with the help of perlbug 1.26 running under perl 5.00503.
/[\S]/ is matching more stuff than /\S/
I get the same results with 5.003 on DGUX\, 5.004_03 on Linux\, 5.005_03 on Windows and 5.5.650 on this platform.
$ perl -e '@a=map chr\,0..255;die if grep(/\S/\,@a)!=grep(/[^\s]/\,@a)' Died at -e line 1. $ perl -e '@a=map chr\,0..255;die if grep(/\S/\,@a)!=grep(/[\S]/\,@a)' Died at -e line 1. $ perl -e '@a=map chr\,0..255;die if grep(/\s/\,@a)!=grep(/[\s]/\,@a)' $ perl -e '@a=map chr\,0..255;die if grep(/\s/\,@a)!=grep(/[^\S]/\,@a)'
These are the characters that don't match \S and [\S] on this system.
$ perl -le 'for (0..255) {print unless chr($_) =~ /\S/}' 0 9 10 11 12 13 32 $ perl -le 'for (0..255) {print unless chr($_) =~ /[\S]/}' 9 10 12 13 32
Site configuration information for perl 5.00503:
Configured by rick at Mon Jul 26 15:51:49 EDT 1999.
Summary of my perl5 (5.0 patchlevel 5 subversion 3) configuration: Platform: osname=svr4\, osvers=\, archname=i386-svr4 uname='unix_sv consumer 4.2mp 2.1.3 i386 x86at ' hint=recommended\, useposix=true\, d_sigaction=define usethreads=undef useperlio=undef d_sfio=undef Compiler: cc='/bin/cc'\, optimize='-O'\, gccversion= cppflags='-I/usr/include -I/usr/ucbinclude -I/usr/local/include' ccflags ='-I/usr/include -I/usr/ucbinclude -I/usr/local/include' stdchar='unsigned char'\, d_stdstdio=define\, usevfork=false intsize=4\, longsize=4\, ptrsize=4\, doublesize=8 d_longlong=undef\, longlongsize=\, d_longdbl=define\, longdblsize=12 alignbytes=4\, usemymalloc=y\, prototype=define Linker and Libraries: ld='/bin/cc'\, ldflags ='-L/usr/ccs/lib -L/usr/ucblib -L/usr/local/lib -L/usr/gnu/lib' libpth=/usr/local/lib /usr/gnu/lib /shlib /lib /usr/lib /usr/ccs/lib /usr/ucblib libs=-lsocket -lnsl -ldbm -ldl -lld -lm -lc -lcrypt -lucb libc=\, so=so\, useshrplib=true\, libperl=libperl.so Dynamic Linking: dlsrc=dl_dlopen.xs\, dlext=so\, d_dlsymun=undef\, ccdlflags=' ' cccdlflags='-KPIC'\, lddlflags='-G -L/usr/ccs/lib -L/usr/ucblib -L/usr/local/lib -L/usr/gnu/lib'
Locally applied patches:
@INC for perl 5.00503: /usr/local/lib/perl5/5.00503/i386-svr4 /usr/local/lib/perl5/5.00503 /usr/local/lib/perl5/site_perl/5.005/i386-svr4 /usr/local/lib/perl5/site_perl/5.005 .
Environment for perl 5.00503: HOME=/home1/rick LANG=C LANGUAGE (unset) LD_LIBRARY_PATH=/usr/opt/dash/lib:.:/usr/lib/ARCserve LOGDIR (unset) PATH=/usr/opt/dash/ccl_custom:/usr/opt/dash:/usr/opt/vsifax/obin:/data1/ubl:/usr/local/bin:/usr/local/shbin:/usr/bin:/usr/ccs/bin:/usr/ucb:/usr/opt/vsifax/bin:/usr/lib/ARCserve:/usr/sbin:/usr/X/bin:/data1/time/PROG:/home1/rick/bin:/opt/bin:. PERL_BADLANG (unset) SHELL=/usr/bin/ksh
It looks like /\D/ isn't the same as /[\D]/ in 5.5.650 either (it is in 5.005_03).
I have probably naively went too far on this but it seems to work. It looked to me like the code for DIGIT and SPACE should just be symmetric with that for ALNUM so I made it that way.
I don't know how to test the UTF8 stuff.
Rick
*** perl5.5.650/regexec.c.old Thu Feb 17 09:28:07 2000 --- perl5.5.650/regexec.c Thu Feb 17 13:36:34 2000 *************** *** 2084\,2090 **** PL_reg_flags |= RF_tainted; /* FALL THROUGH */ case SPACE: ! if (!nextchr && locinput >= PL_regeol) sayNO; if (!(OP(scan) == SPACE ? isSPACE(nextchr) : isSPACE_LC(nextchr))) --- 2084\,2090 ---- PL_reg_flags |= RF_tainted; /* FALL THROUGH */ case SPACE: ! if (!nextchr) sayNO; if (!(OP(scan) == SPACE ? isSPACE(nextchr) : isSPACE_LC(nextchr))) *************** *** 2095\,2105 **** PL_reg_flags |= RF_tainted; /* FALL THROUGH */ case SPACEUTF8: ! if (!nextchr && locinput >= PL_regeol) sayNO; if (nextchr & 0x80) { if (!(OP(scan) == SPACEUTF8 ! ? swash_fetch(PL_utf8_space\,(U8*)locinput) : isSPACE_LC_utf8((U8*)locinput))) { sayNO; --- 2095\,2105 ---- PL_reg_flags |= RF_tainted; /* FALL THROUGH */ case SPACEUTF8: ! if (!nextchr) sayNO; if (nextchr & 0x80) { if (!(OP(scan) == SPACEUTF8 ! ? swash_fetch(PL_utf8_space\, (U8*)locinput) : isSPACE_LC_utf8((U8*)locinput))) { sayNO; *************** *** 2117\,2125 **** PL_reg_flags |= RF_tainted; /* FALL THROUGH */ case NSPACE: ! if (!nextchr) sayNO; ! if (OP(scan) == SPACE ? isSPACE(nextchr) : isSPACE_LC(nextchr)) sayNO; nextchr = UCHARAT(++locinput); --- 2117\,2125 ---- PL_reg_flags |= RF_tainted; /* FALL THROUGH */ case NSPACE: ! if (!nextchr && locinput >= PL_regeol) sayNO; ! if (OP(scan) == NSPACE ? isSPACE(nextchr) : isSPACE_LC(nextchr)) sayNO; nextchr = UCHARAT(++locinput); *************** *** 2128\,2138 **** PL_reg_flags |= RF_tainted; /* FALL THROUGH */ case NSPACEUTF8: ! if (!nextchr) sayNO; if (nextchr & 0x80) { if (OP(scan) == NSPACEUTF8 ! ? swash_fetch(PL_utf8_space\,(U8*)locinput) : isSPACE_LC_utf8((U8*)locinput)) { sayNO; --- 2128\,2138 ---- PL_reg_flags |= RF_tainted; /* FALL THROUGH */ case NSPACEUTF8: ! if (!nextchr && locinput >= PL_regeol) sayNO; if (nextchr & 0x80) { if (OP(scan) == NSPACEUTF8 ! ? swash_fetch(PL_utf8_space\, (U8*)locinput) : isSPACE_LC_utf8((U8*)locinput)) { sayNO; *************** *** 2150\,2156 **** PL_reg_flags |= RF_tainted; /* FALL THROUGH */ case DIGIT: ! if (!nextchr && locinput >= PL_regeol) sayNO; if (!(OP(scan) == DIGIT ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))) --- 2150\,2156 ---- PL_reg_flags |= RF_tainted; /* FALL THROUGH */ case DIGIT: ! if (!nextchr) sayNO; if (!(OP(scan) == DIGIT ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))) *************** *** 2164\,2172 **** if (!nextchr) sayNO; if (nextchr & 0x80) { ! if (OP(scan) == NDIGITUTF8 ! ? swash_fetch(PL_utf8_digit\,(U8*)locinput) ! : isDIGIT_LC_utf8((U8*)locinput)) { sayNO; } --- 2164\,2172 ---- if (!nextchr) sayNO; if (nextchr & 0x80) { ! if (!(OP(scan) == DIGITUTF8 ! ? swash_fetch(PL_utf8_digit\, (U8*)locinput) ! : isDIGIT_LC_utf8((U8*)locinput))) { sayNO; } *************** *** 2174\,2180 **** nextchr = UCHARAT(locinput); break; } ! if (!isDIGIT(nextchr)) sayNO; nextchr = UCHARAT(++locinput); break; --- 2174\,2181 ---- nextchr = UCHARAT(locinput); break; } ! if (!(OP(scan) == DIGITUTF8 ! ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))) sayNO; nextchr = UCHARAT(++locinput); break; *************** *** 2182\,2190 **** PL_reg_flags |= RF_tainted; /* FALL THROUGH */ case NDIGIT: ! if (!nextchr) sayNO; ! if (OP(scan) == DIGIT ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)) sayNO; nextchr = UCHARAT(++locinput); --- 2183\,2191 ---- PL_reg_flags |= RF_tainted; /* FALL THROUGH */ case NDIGIT: ! if (!nextchr && locinput >= PL_regeol) sayNO; ! if (OP(scan) == NDIGIT ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)) sayNO; nextchr = UCHARAT(++locinput); *************** *** 2196\,2208 **** if (!nextchr && locinput >= PL_regeol) sayNO; if (nextchr & 0x80) { ! if (swash_fetch(PL_utf8_digit\,(U8*)locinput)) sayNO; locinput += PL_utf8skip[nextchr]; nextchr = UCHARAT(locinput); break; } ! if (isDIGIT(nextchr)) sayNO; nextchr = UCHARAT(++locinput); break; --- 2197\,2214 ---- if (!nextchr && locinput >= PL_regeol) sayNO; if (nextchr & 0x80) { ! if (OP(scan) == NDIGITUTF8 ! ? swash_fetch(PL_utf8_digit\, (U8*)locinput) ! : isDIGIT_LC_utf8((U8*)locinput)) ! { sayNO; + } locinput += PL_utf8skip[nextchr]; nextchr = UCHARAT(locinput); break; } ! if (OP(scan) == NDIGITUTF8 ! ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)) sayNO; nextchr = UCHARAT(++locinput); break;
Migrated from rt.perl.org#2152 (status was 'resolved')
Searchable as RT2152$