develooper Front page | perl.perl5.porters | Postings from January 2019

[perl #133782] length($^R) sometimes wrong

From:
Chris Wagner via RT
Date:
January 25, 2019 05:53
Subject:
[perl #133782] length($^R) sometimes wrong
Message ID:
rt-4.0.24-30496-1548020994-1249.133782-15-0@perl.org
To me, this looks like an issue in the UTF8 character counter.  length() has to iterate through a string to decode characters.  It could be as simple as an off-by-one error due to magic being present.

Notice especially that "\N{U+E4}" gives a different result than "\x{E4}".

Examples:
Perl v.5.22.4

$ perl
use Devel::Peek; while ( "\N{U+E4}bc" =~ /(..?)(?{$^N})/g ) { Dump($^R); Dump(my $r = $^R); print STDERR length($^R), "\n"; }
SV = PVMG(0x6000af300) at 0x60002b5b8
  REFCNT = 1
  FLAGS = (POK,pPOK,UTF8)
  IV = 0
  NV = 0
  PV = 0x60009f8d0 "\303\244b"\0 [UTF8 "\x{e4}b"]
  CUR = 3
  LEN = 10
SV = PVMG(0x6000af2d0) at 0x60008d300
  REFCNT = 1
  FLAGS = (POK,pPOK,UTF8)
  IV = 0
  NV = 0
  PV = 0x60009f8b0 "\303\244b"\0 [UTF8 "\x{e4}b"]
  CUR = 3
  LEN = 10
2
SV = PVMG(0x6000af300) at 0x60002b5b8
  REFCNT = 1
  FLAGS = (SMG,POK,pPOK,UTF8)
  IV = 0
  NV = 0
  PV = 0x60009f8d0 "c"\0 [UTF8 "c"]
  CUR = 1
  LEN = 10
  MAGIC = 0x600069df0
    MG_VIRTUAL = &PL_vtbl_utf8
    MG_TYPE = PERL_MAGIC_utf8(w)
    MG_LEN = 2
SV = PVMG(0x6000af2d0) at 0x60008d300
  REFCNT = 1
  FLAGS = (POK,pPOK,UTF8)
  IV = 0
  NV = 0
  PV = 0x60009f8b0 "c"\0 [UTF8 "c"]
  CUR = 1
  LEN = 10
2

$ perl
use B; while( "\N{U+E4}bc" =~ /(..?)(?{$^N})/g ) { printf "^R len: %s; Str ^R len: %s; ^R: %s\n", length($^R), length("$^R"), B::perlstring $^R }
^R len: 2; Str ^R len: 2; ^R: "\x{e4}b"
^R len: 2; Str ^R len: 1; ^R: "c"

$ perl
use B; while( "\N{U+E4}bc" =~ /(..?)(?{$^N})/g ) { printf "Str ^R len: %s; ^R len: %s; ^R: %s\n", length("$^R"), length($^R), B::perlstring $^R }
Str ^R len: 2; ^R len: 2; ^R: "\x{e4}b"
Str ^R len: 1; ^R len: 2; ^R: "c"

$ perl
use bytes "length";use B; while( "\N{U+E4}bc" =~ /(..?)(?{$^N})/g ) { printf "Str ^R len: %s; ^R len: %s; ^R: %s\n", length("$^R"), length($^R), B::perlstring $^R }
Str ^R len: 2; ^R len: 2; ^R: "\303\244"
Str ^R len: 2; ^R len: 2; ^R: "bc"

$ perl
use bytes(); use B; while( "\N{U+E4}bc" =~ /(..?)(?{$^N})/g ) { printf "Str ^R len: %s; ^R len: %s; ^R: %s\n", bytes::length("$^R"), bytes::length($^R), B::perlstring $^R }
Str ^R len: 3; ^R len: 3; ^R: "\x{e4}b"
Str ^R len: 1; ^R len: 1; ^R: "c"


$ perl
use bytes(); use B; while( "\N{U+E4}bc" =~ /(..?)(?{$^N})/g ) { printf "Str ^R len: %s; ^R len: %s; ^R: %s\n", length("$^R"), length($^R), B::perlstring $^R }
Str ^R len: 2; ^R len: 2; ^R: "\x{e4}b"
Str ^R len: 1; ^R len: 2; ^R: "c"

$ perl
use Devel::Peek; Dump "\xFE";
SV = PV(0x60002c410) at 0x6000683b0
  REFCNT = 1
  FLAGS = (POK,IsCOW,READONLY,PROTECT,pPOK)
  PV = 0x60006f570 "\376"\0
  CUR = 1
  LEN = 10
  COW_REFCNT = 0

$ perl
use Devel::Peek; Dump my $x = "\x{FEFE}";
SV = PV(0x60002c1c0) at 0x6000683b0
  REFCNT = 1
  FLAGS = (POK,IsCOW,pPOK,UTF8)
  PV = 0x60006f570 "\357\273\276"\0 [UTF8 "\x{fefe}"]
  CUR = 3
  LEN = 10
  COW_REFCNT = 1

$ perl
use bytes(); use B; while( "\x{E4}bc" =~ /(..?)(?{$^N})/g ) { printf "Str ^R len: %s; ^R len: %s; ^R: %s\n", length("$^R"), length($^R), B::perlstring $^R }
Str ^R len: 2; ^R len: 2; ^R: "\344b"
Str ^R len: 1; ^R len: 1; ^R: "c"

$ perl
use bytes(); use B; while( "\N{U+E4}bc" =~ /(..?)(?{$^N})/g ) { printf "Str ^R len: %s; ^R len: %s; ^R: %s\n", length("$^R"), length($^R), B::perlstring $^R }
Str ^R len: 2; ^R len: 2; ^R: "\x{e4}b"
Str ^R len: 1; ^R len: 2; ^R: "c"

$ perl
print "\N{U+E4}" eq "\x{E4}";
1
$



---
via perlbug:  queue: perl5 status: open
https://rt.perl.org/Ticket/Display.html?id=133782



nntp.perl.org: Perl Programming lists via nntp and http.
Comments to Ask Bjørn Hansen at ask@perl.org | Group listing | About