Front page | perl.perl5.porters |
Postings from January 2019
[perl #133782] length($^R) sometimes wrong
From:
Chris Wagner via RT
Date:
January 25, 2019 05:53
Subject:
[perl #133782] length($^R) sometimes wrong
Message ID:
rt-4.0.24-30496-1548020994-1249.133782-15-0@perl.org
To me, this looks like an issue in the UTF8 character counter. length() has to iterate through a string to decode characters. It could be as simple as an off-by-one error due to magic being present.
Notice especially that "\N{U+E4}" gives a different result than "\x{E4}".
Examples:
Perl v.5.22.4
$ perl
use Devel::Peek; while ( "\N{U+E4}bc" =~ /(..?)(?{$^N})/g ) { Dump($^R); Dump(my $r = $^R); print STDERR length($^R), "\n"; }
SV = PVMG(0x6000af300) at 0x60002b5b8
REFCNT = 1
FLAGS = (POK,pPOK,UTF8)
IV = 0
NV = 0
PV = 0x60009f8d0 "\303\244b"\0 [UTF8 "\x{e4}b"]
CUR = 3
LEN = 10
SV = PVMG(0x6000af2d0) at 0x60008d300
REFCNT = 1
FLAGS = (POK,pPOK,UTF8)
IV = 0
NV = 0
PV = 0x60009f8b0 "\303\244b"\0 [UTF8 "\x{e4}b"]
CUR = 3
LEN = 10
2
SV = PVMG(0x6000af300) at 0x60002b5b8
REFCNT = 1
FLAGS = (SMG,POK,pPOK,UTF8)
IV = 0
NV = 0
PV = 0x60009f8d0 "c"\0 [UTF8 "c"]
CUR = 1
LEN = 10
MAGIC = 0x600069df0
MG_VIRTUAL = &PL_vtbl_utf8
MG_TYPE = PERL_MAGIC_utf8(w)
MG_LEN = 2
SV = PVMG(0x6000af2d0) at 0x60008d300
REFCNT = 1
FLAGS = (POK,pPOK,UTF8)
IV = 0
NV = 0
PV = 0x60009f8b0 "c"\0 [UTF8 "c"]
CUR = 1
LEN = 10
2
$ perl
use B; while( "\N{U+E4}bc" =~ /(..?)(?{$^N})/g ) { printf "^R len: %s; Str ^R len: %s; ^R: %s\n", length($^R), length("$^R"), B::perlstring $^R }
^R len: 2; Str ^R len: 2; ^R: "\x{e4}b"
^R len: 2; Str ^R len: 1; ^R: "c"
$ perl
use B; while( "\N{U+E4}bc" =~ /(..?)(?{$^N})/g ) { printf "Str ^R len: %s; ^R len: %s; ^R: %s\n", length("$^R"), length($^R), B::perlstring $^R }
Str ^R len: 2; ^R len: 2; ^R: "\x{e4}b"
Str ^R len: 1; ^R len: 2; ^R: "c"
$ perl
use bytes "length";use B; while( "\N{U+E4}bc" =~ /(..?)(?{$^N})/g ) { printf "Str ^R len: %s; ^R len: %s; ^R: %s\n", length("$^R"), length($^R), B::perlstring $^R }
Str ^R len: 2; ^R len: 2; ^R: "\303\244"
Str ^R len: 2; ^R len: 2; ^R: "bc"
$ perl
use bytes(); use B; while( "\N{U+E4}bc" =~ /(..?)(?{$^N})/g ) { printf "Str ^R len: %s; ^R len: %s; ^R: %s\n", bytes::length("$^R"), bytes::length($^R), B::perlstring $^R }
Str ^R len: 3; ^R len: 3; ^R: "\x{e4}b"
Str ^R len: 1; ^R len: 1; ^R: "c"
$ perl
use bytes(); use B; while( "\N{U+E4}bc" =~ /(..?)(?{$^N})/g ) { printf "Str ^R len: %s; ^R len: %s; ^R: %s\n", length("$^R"), length($^R), B::perlstring $^R }
Str ^R len: 2; ^R len: 2; ^R: "\x{e4}b"
Str ^R len: 1; ^R len: 2; ^R: "c"
$ perl
use Devel::Peek; Dump "\xFE";
SV = PV(0x60002c410) at 0x6000683b0
REFCNT = 1
FLAGS = (POK,IsCOW,READONLY,PROTECT,pPOK)
PV = 0x60006f570 "\376"\0
CUR = 1
LEN = 10
COW_REFCNT = 0
$ perl
use Devel::Peek; Dump my $x = "\x{FEFE}";
SV = PV(0x60002c1c0) at 0x6000683b0
REFCNT = 1
FLAGS = (POK,IsCOW,pPOK,UTF8)
PV = 0x60006f570 "\357\273\276"\0 [UTF8 "\x{fefe}"]
CUR = 3
LEN = 10
COW_REFCNT = 1
$ perl
use bytes(); use B; while( "\x{E4}bc" =~ /(..?)(?{$^N})/g ) { printf "Str ^R len: %s; ^R len: %s; ^R: %s\n", length("$^R"), length($^R), B::perlstring $^R }
Str ^R len: 2; ^R len: 2; ^R: "\344b"
Str ^R len: 1; ^R len: 1; ^R: "c"
$ perl
use bytes(); use B; while( "\N{U+E4}bc" =~ /(..?)(?{$^N})/g ) { printf "Str ^R len: %s; ^R len: %s; ^R: %s\n", length("$^R"), length($^R), B::perlstring $^R }
Str ^R len: 2; ^R len: 2; ^R: "\x{e4}b"
Str ^R len: 1; ^R len: 2; ^R: "c"
$ perl
print "\N{U+E4}" eq "\x{E4}";
1
$
---
via perlbug: queue: perl5 status: open
https://rt.perl.org/Ticket/Display.html?id=133782
-
[perl #133782] length($^R) sometimes wrong
by Chris Wagner via RT