develooper Front page | perl.perl5.porters | Postings from July 2012

[PATCH] minimal copy of SVs, retain SVp_POK on copy

Thread Next
From:
Rev. Chip
Date:
July 28, 2012 20:32
Subject:
[PATCH] minimal copy of SVs, retain SVp_POK on copy
Message ID:
20120729033147.GA19120@tytlal.tinsaucer.com
The below is pushed to branch chip/mincopy.  It saves memory and fixes the
meaning of private vs. public POK.

Taking advantage of the magic flags fixes, already applied, this patch
finally allows us to distinguish cached string values from original string
values in order to copy only that part of an SV that is actually helpful.
As part of the same fix, it also allows the string vs. number nature of an
SV to be determined, now that cached and original strings are correctly
flagged and (when useful) copied.

For example, in the past, if you use a $var in string operations at all,
then store numbers in it and copy those numbers elsewhere e.g. into a huge
array, _all_ those array elements will be SVt_PVNV even though they hold
only NV values.  This patch fixes that waste.


commit f8a1870fd36d5627ef2eea07fb1b0fc691d495fb
Author: Chip Salzenberg <chip@pobox.com>
Date:   Sat Jul 28 20:12:44 2012 -0700

    Minimal SV copy
     1. Bug fix in sv_setsv: Stop automatically and erroneously turning a cached
        string conversion (SVp_POK) into to a public string value (SVf_POK).
     2. Bug fix for Data::Dumper so it no longer depends on the above bug.
     3. Feature in sv_setsv: Only upgrade target SV as needed, instead of
        assuming that the target SV will need the same representation as the
        source SV.  Should save a lot of memory.
     4. Feature in sv.h: Provide macros to C code that can distinguish original
        value of an SV (string vs number), now that SVf_POK is correct.  Macros
        are SvPOK_pure, SvNIOK_pure, etc.

diff --git a/dist/Data-Dumper/Dumper.pm b/dist/Data-Dumper/Dumper.pm
index a099277..d8fc831 100644
--- a/dist/Data-Dumper/Dumper.pm
+++ b/dist/Data-Dumper/Dumper.pm
@@ -10,7 +10,7 @@
 package Data::Dumper;
 
 BEGIN {
-    $VERSION = '2.135_06'; # Don't forget to set version and release
+    $VERSION = '2.135_07'; # Don't forget to set version and release
 }			   # date in POD!
 
 #$| = 1;
@@ -1332,7 +1332,7 @@ modify it under the same terms as Perl itself.
 
 =head1 VERSION
 
-Version 2.135_06  (March 20 2012)
+Version 2.135_07  (July 20 2012)
 
 =head1 SEE ALSO
 
diff --git a/dist/Data-Dumper/Dumper.xs b/dist/Data-Dumper/Dumper.xs
index 91e4c6c..6b3828f 100644
--- a/dist/Data-Dumper/Dumper.xs
+++ b/dist/Data-Dumper/Dumper.xs
@@ -1185,8 +1185,8 @@ Data_Dumper_Dumpxs(href, ...)
 			val = &PL_sv_undef;
 		    if ((svp = av_fetch(namesav, i, TRUE))) {
 			sv_setsv(name, *svp);
-			if (SvOK(*svp) && !SvPOK(*svp))
-			    (void)SvPV_nolen_const(name);
+			if (SvOK(name))
+			    (void)SvPV_force_nolen(name);
 		    }
 		    else
 			(void)SvOK_off(name);
diff --git a/gv.c b/gv.c
index c165285..c904061 100644
--- a/gv.c
+++ b/gv.c
@@ -2681,6 +2681,7 @@ Perl_amagic_call(pTHX_ SV *left, SV *right, int method, int flags)
 		 * additional cases sv_setsv is safe, too.
 		 */
 		SV* const newref = newSVsv(tmpRef);
+		SvUPGRADE(newref, SVt_PVMG);
 		SvOBJECT_on(newref);
 		/* No need to do SvAMAGIC_on here, as SvAMAGIC macros
 		   delegate to the stash. */
diff --git a/pad.c b/pad.c
index 0077e5b..7ea4245 100644
--- a/pad.c
+++ b/pad.c
@@ -1317,9 +1317,10 @@ S_pad_findlex(pTHX_ const char *namepv, STRLEN namelen, U32 flags, const CV* cv,
 	   type as the source, independent of the flags set, and on it being
 	   "good" and only copying flag bits and pointers that it understands.
 	*/
-	SV *new_namesv = newSVsv(*out_name_sv);
 	AV *  const ocomppad_name = PL_comppad_name;
 	PAD * const ocomppad = PL_comppad;
+	SV *  const new_namesv = newSVsv(*out_name_sv);
+	SvUPGRADE(new_namesv, SVt_PVNV); /* pad names use SvNVX explicitly */
 	PL_comppad_name = MUTABLE_AV(AvARRAY(padlist)[0]);
 	PL_comppad = MUTABLE_AV(AvARRAY(padlist)[1]);
 	PL_curpad = AvARRAY(PL_comppad);
diff --git a/sv.c b/sv.c
index b5950d6..94e5c1d 100644
--- a/sv.c
+++ b/sv.c
@@ -1802,17 +1802,14 @@ ignored.
 I32
 Perl_looks_like_number(pTHX_ SV *const sv)
 {
-    register const char *sbegin;
-    STRLEN len;
-
     PERL_ARGS_ASSERT_LOOKS_LIKE_NUMBER;
 
-    if (SvPOK(sv) || SvPOKp(sv)) {
-	sbegin = SvPV_nomg_const(sv, len);
-    }
+    if (SvNIOK_pure(sv))
+	return TRUE;  /* it doesn't just look like a number, it *is* a number */
+    else if (SvPOK(sv))
+	return grok_number(SvPVX_const(sv), SvCUR(sv), NULL);
     else
-	return SvFLAGS(sv) & (SVf_NOK|SVp_NOK|SVf_IOK|SVp_IOK);
-    return grok_number(sbegin, len, NULL);
+	return FALSE; /* potential stringifications of refs, globs, etc. do not look like numbers */
 }
 
 STATIC bool
@@ -2746,6 +2743,7 @@ Perl_sv_2pv_flags(pTHX_ register SV *const sv, STRLEN *const lp, const I32 flags
 {
     dVAR;
     register char *s;
+    bool priv = FALSE;
 
     if (!sv) {
 	if (lp)
@@ -2905,6 +2903,7 @@ Perl_sv_2pv_flags(pTHX_ register SV *const sv, STRLEN *const lp, const I32 flags
 	Move(ptr, s, len, char);
 	s += len;
 	*s = '\0';
+	priv = TRUE;
     }
     else if (SvNOK(sv)) {
 	if (SvTYPE(sv) < SVt_PVNV)
@@ -2926,6 +2925,7 @@ Perl_sv_2pv_flags(pTHX_ register SV *const sv, STRLEN *const lp, const I32 flags
 	if (s[-1] == '.')
 	    *--s = '\0';
 #endif
+	priv = TRUE;
     }
     else if (isGV_with_GP(sv)) {
 	GV *const gv = MUTABLE_GV(sv);
@@ -2959,7 +2959,10 @@ Perl_sv_2pv_flags(pTHX_ register SV *const sv, STRLEN *const lp, const I32 flags
 	    *lp = len;
 	SvCUR_set(sv, len);
     }
-    SvPOK_on(sv);
+    if (priv)
+	SvPOKp_on(sv);
+    else
+	SvPOK_on(sv);
     DEBUG_c(PerlIO_printf(Perl_debug_log, "0x%"UVxf" 2pv(%s)\n",
 			  PTR2UV(sv),SvPVX_const(sv)));
     if (flags & SV_CONST_RETURN)
@@ -3919,6 +3922,7 @@ Perl_sv_setsv_flags(pTHX_ SV *dstr, register SV* sstr, const I32 flags)
     register U32 sflags;
     register int dtype;
     register svtype stype;
+    MAGIC *svmg = NULL;
 
     PERL_ARGS_ASSERT_SV_SETSV_FLAGS;
 
@@ -3949,8 +3953,10 @@ Perl_sv_setsv_flags(pTHX_ SV *dstr, register SV* sstr, const I32 flags)
 	    return;
 	}
 	break;
+
     case SVt_IV:
 	if (SvIOK(sstr)) {
+	  iv_sstr:
 	    switch (dtype) {
 	    case SVt_NULL:
 		sv_upgrade(dstr, SVt_IV);
@@ -3976,12 +3982,18 @@ Perl_sv_setsv_flags(pTHX_ SV *dstr, register SV* sstr, const I32 flags)
 	}
 	if (!SvROK(sstr))
 	    goto undef_sstr;
-	if (dtype < SVt_PV && dtype != SVt_IV)
+      rv_sstr:
+	if (dtype < SVt_IV)
 	    sv_upgrade(dstr, SVt_IV);
+	else if (dtype < SVt_PVIV)
+	    sv_upgrade(dstr, SVt_PVIV);
 	break;
 
     case SVt_NV:
+	if (SvIOK(sstr))
+	    goto iv_sstr;
 	if (SvNOK(sstr)) {
+	  nv_sstr:
 	    switch (dtype) {
 	    case SVt_NULL:
 	    case SVt_IV:
@@ -4016,17 +4028,53 @@ Perl_sv_setsv_flags(pTHX_ SV *dstr, register SV* sstr, const I32 flags)
 	/* Fall through */
 #endif
     case SVt_PV:
+	if (!SvOK(sstr))
+	    goto undef_sstr;
+      pv_sstr:
 	if (dtype < SVt_PV)
 	    sv_upgrade(dstr, SVt_PV);
 	break;
+
     case SVt_PVIV:
-	if (dtype < SVt_PVIV)
-	    sv_upgrade(dstr, SVt_PVIV);
+	if (!SvOK(sstr))
+	    goto undef_sstr;
+	if (SvIOK_pure(sstr))
+	    goto iv_sstr;
+      pviv_sstr:
+	if (SvIOKp(sstr)) {
+	    if (dtype < SVt_PVIV)
+		sv_upgrade(dstr, SVt_PVIV);
+	}
+	else {
+	    if (dtype < SVt_PV)
+		sv_upgrade(dstr, SVt_PV);
+	}
 	break;
+
     case SVt_PVNV:
-	if (dtype < SVt_PVNV)
-	    sv_upgrade(dstr, SVt_PVNV);
+	if (!SvOK(sstr))
+	    goto undef_sstr;
+	if (SvNIOK_pure(sstr)) {
+	    if (SvIOK(sstr))
+		goto iv_sstr;   /* authoritative IV can stand alone */
+	    else
+		goto nv_sstr;
+	}
+      pvnv_sstr:
+	if (SvNOKp(sstr)) {
+	    if (dtype < SVt_PVNV)
+		sv_upgrade(dstr, SVt_PVNV);
+	}
+	else if (SvIOKp(sstr)) {
+	    if (dtype < SVt_PVIV)
+		sv_upgrade(dstr, SVt_PVIV);
+	}
+	else {
+	    if (dtype < SVt_PV)
+		sv_upgrade(dstr, SVt_PV);
+	}
 	break;
+
     default:
 	{
 	const char * const type = sv_reftype(sstr,0);
@@ -4043,6 +4091,8 @@ Perl_sv_setsv_flags(pTHX_ SV *dstr, register SV* sstr, const I32 flags)
 	    sv_upgrade(dstr, SVt_REGEXP);
 	break;
 
+	/* Fall through */
+
 	/* case SVt_BIND: */
     case SVt_PVLV:
     case SVt_PVGV:
@@ -4056,6 +4106,38 @@ Perl_sv_setsv_flags(pTHX_ SV *dstr, register SV* sstr, const I32 flags)
 		    glob_assign_glob(dstr, sstr, dtype);
 		    return;
 	}
+	svmg = SvVSTRING_mg(sstr);
+	if ((stype == SVt_PVMG || stype == SVt_PVLV)
+	    && !svmg   /* vstring magic is copied; will need SVt_PVMG */
+	    && !PL_tainting
+	    && !PL_tainted)
+	{
+	    switch (SvFLAGS(sstr) & (SVf_POK|SVf_IOK|SVf_NOK|SVf_ROK|SVpgv_GP|SVf_FAKE)) {
+	    case 0:
+		goto undef_sstr;
+	    case SVf_IOK:
+		goto iv_sstr;
+	    case SVf_NOK:
+		goto nv_sstr;
+	    case SVf_POK:
+		/* a string that has been used as a number probably will be again */
+		switch (SvFLAGS(sstr) & (SVp_IOK|SVp_NOK)) {
+		case 0:
+		    goto pv_sstr;
+		case SVp_IOK:
+		    goto pviv_sstr;
+		default:
+		    goto pvnv_sstr;
+		}
+	    case SVf_POK|SVf_IOK:
+		goto pviv_sstr;
+	    case SVf_POK|SVf_NOK:
+	    case SVf_POK|SVf_NOK|SVf_IOK:
+		goto pvnv_sstr;
+	    case SVf_ROK:
+		goto rv_sstr;
+	    }
+	}
 	if (stype == SVt_PVLV)
 	    SvUPGRADE(dstr, SVt_PVNV);
 	else
@@ -4170,7 +4252,7 @@ Perl_sv_setsv_flags(pTHX_ SV *dstr, register SV* sstr, const I32 flags)
     else if (dtype == SVt_REGEXP && stype == SVt_REGEXP) {
 	reg_temp_copy((REGEXP*)dstr, (REGEXP*)sstr);
     }
-    else if (sflags & SVp_POK) {
+    else if ((sflags & SVp_POK) && !SvNIOK_pure(sstr)) {
         bool isSwipe = 0;
 
 	/*
@@ -4305,9 +4387,6 @@ Perl_sv_setsv_flags(pTHX_ SV *dstr, register SV* sstr, const I32 flags)
                 SvTEMP_off(sstr);
             }
         }
-	if (sflags & SVp_NOK) {
-	    SvNV_set(dstr, SvNVX(sstr));
-	}
 	if (sflags & SVp_IOK) {
 	    SvIV_set(dstr, SvIVX(sstr));
 	    /* Must do this otherwise some other overloaded use of 0x80000000
@@ -4315,14 +4394,17 @@ Perl_sv_setsv_flags(pTHX_ SV *dstr, register SV* sstr, const I32 flags)
 	    if (sflags & SVf_IVisUV)
 		SvIsUV_on(dstr);
 	}
-	SvFLAGS(dstr) |= sflags & (SVf_IOK|SVp_IOK|SVf_NOK|SVp_NOK|SVf_UTF8);
-	{
-	    const MAGIC * const smg = SvVSTRING_mg(sstr);
-	    if (smg) {
-		sv_magic(dstr, NULL, PERL_MAGIC_vstring,
-			 smg->mg_ptr, smg->mg_len);
-		SvRMAGICAL_on(dstr);
-	    }
+	SvFLAGS(dstr) |= sflags & (SVf_IOK|SVp_IOK|SVf_UTF8);
+	/* NV assignment can be skipped when IOK, to reduce upgrades; see PVNV case in top switch */
+	if ((sflags & SVp_NOK) && PL_valid_types_NV_set[dtype & SVt_MASK]) {
+	    SvNV_set(dstr, SvNVX(sstr));
+	    SvFLAGS(dstr) |= sflags & (SVf_NOK|SVp_NOK);
+	}
+
+	/* copy vstring magic, if any */
+	if (svmg) {
+	    sv_magic(dstr, NULL, PERL_MAGIC_vstring, svmg->mg_ptr, svmg->mg_len);
+	    SvRMAGICAL_on(dstr);
 	}
     }
     else if (sflags & (SVp_IOK|SVp_NOK)) {
diff --git a/sv.h b/sv.h
index 4c58ee4..8985d5c 100644
--- a/sv.h
+++ b/sv.h
@@ -918,6 +918,12 @@ in gv.h: */
 #define SvNOK_nogthink(sv)	((SvFLAGS(sv) & (SVf_NOK|SVf_THINKFIRST|SVs_GMG)) == SVf_NOK)
 #define SvNIOK_nogthink(sv)	(SvNIOK(sv) && !(SvFLAGS(sv) & (SVf_THINKFIRST|SVs_GMG)))
 
+#define SvPOK_pure(sv)		((SvFLAGS(sv) & (SVf_POK|SVf_IOK|SVf_NOK|SVf_ROK|SVpgv_GP)) == SVf_POK)
+#define SvIOK_pure(sv)		((SvFLAGS(sv) & (SVf_POK|SVf_IOK|SVf_NOK|SVf_ROK|SVpgv_GP)) == SVf_IOK)
+#define SvUOK_pure(sv)		((SvFLAGS(sv) & (SVf_POK|SVf_IOK|SVf_NOK|SVf_ROK|SVpgv_GP|SVf_IVisUV)) == (SVf_IOK|SVf_IVisUV))
+#define SvNOK_pure(sv)		((SvFLAGS(sv) & (SVf_POK|SVf_IOK|SVf_NOK|SVf_ROK|SVpgv_GP)) == SVf_NOK)
+#define SvNIOK_pure(sv)		(SvNIOK(sv) && !(SvFLAGS(sv) &(SVf_POK|SVf_ROK|SVpgv_GP)))
+
 #define SvPOK_utf8_nog(sv)	((SvFLAGS(sv) & (SVf_POK|SVf_UTF8|SVs_GMG)) == (SVf_POK|SVf_UTF8))
 #define SvPOK_utf8_nogthink(sv)	((SvFLAGS(sv) & (SVf_POK|SVf_UTF8|SVf_THINKFIRST|SVs_GMG)) == (SVf_POK|SVf_UTF8))
 
diff --git a/t/op/eval.t b/t/op/eval.t
index 9866ca7..bccc3f6 100644
--- a/t/op/eval.t
+++ b/t/op/eval.t
@@ -479,8 +479,9 @@ SKIP: {
     open $prog, ">", $tempfile or die "Can't create test file";
     print $prog <<'END_EVAL_TEST';
     use Devel::Peek;
-    $! = 0;
-    $@ = $!;
+    $@ = "."; pos($@) = 1;  # force upgrade to PVMG
+    eval { 1 };
+    $@ = "";
     Dump($@);
     print STDERR "******\n";
     eval { die "\x{a10d}"; };
@@ -496,7 +497,6 @@ END_EVAL_TEST
 
     is($tombstone, "Done\n", 'Program completed successfully');
 
-    $first =~ s/p?[NI]OK,//g;
     s/ PV = 0x[0-9a-f]+/ PV = 0x/ foreach $first, $second;
     s/ LEN = [0-9]+/ LEN = / foreach $first, $second;
     # Dump may double newlines through pipes, though not files
diff --git a/t/op/upgrade.t b/t/op/upgrade.t
index 5f2ffa6..f7728fc 100644
--- a/t/op/upgrade.t
+++ b/t/op/upgrade.t
@@ -12,6 +12,16 @@ BEGIN {
 
 use strict;
 
+my $can_peek;
+if (!is_miniperl()) {
+    require Config;
+    $can_peek = $Config::Config{extensions} =~ /\bDevel\/Peek\b/;
+    if ($can_peek) {
+        require Devel::Peek;
+        Devel::Peek->import('Dump');
+    }
+}
+
 my $null;
 
 $! = 1;
@@ -23,17 +33,23 @@ my %types = (
     pv => "Perl rules",
     pviv => 3,
     pvnv => 1==1,
-    pvmg => $^,
+    pvmg => "hello",
 );
+pos($types{pvmg}) = 1; # pos() is stored as magic
 
 # This is somewhat cheating but I can't think of anything built in that I can
 # copy that already has type PVIV
 $types{pviv} = "Perl rules!";
 
-# use Devel::Peek; Dump $pvmg;
-
 my @keys = keys %types;
-plan tests => @keys * @keys;
+plan tests => @keys * @keys + ($can_peek ? @keys + 14 : 0);
+
+if ($can_peek) {
+    foreach my $type (@keys) {
+        my $peek = peek($types{$type});
+        ok(peek_is_type($peek, $type), "$type test value has right type", $peek);
+    }
+}
 
 foreach my $source_type (@keys) {
     foreach my $dest_type (@keys) {
@@ -48,3 +64,61 @@ foreach my $source_type (@keys) {
 	is ($vars->{dest}, $vars->{source});
     }
 }
+
+if ($can_peek) {
+    # test minimal upgrades: according to the source value, not the
+    # source type
+    my @minimal = qw( null iv nv pv pvnv );
+    my %dest;
+    for my $type (@minimal) {
+        my $mg = "hi";
+        pos($mg) = 1;           # start as PVMG
+        $mg = $types{$type};    # does not downgrade
+        my $peek = peek($mg);
+        ok(peek_is_type($peek, 'pvmg'), "\U$type\E assign does not downgrade PVMG", $peek);
+        $dest{$type} = $mg;     # should only upgrade as needed
+        $peek = peek($dest{$type});
+        ok(peek_is_type($peek, $type), "\U$type\E minimal upgrade", $peek);
+    }
+
+    # test specific case of strings used as numbers
+    {
+        my $pviv = "1";
+        my $foo = 1 << $pviv;
+        my $peek = peek($pviv);
+        ok(peek_is_type($peek, 'pviv'), "string used as integer should be PVIV", $peek);
+        my $copy = $pviv;
+        $peek = peek($copy);
+        ok(peek_is_type($peek, 'pviv'), "string used as integer should copy to PVIV", $peek);
+    }
+    {
+        my $pvnv = "1.1";
+        my $foo = 2.2 * $pvnv;
+        my $peek = peek($pvnv);
+        ok(peek_is_type($peek, 'pvnv'), "string used as number should be PVNV", $peek);
+        my $copy = $pvnv;
+        $peek = peek($copy);
+        ok(peek_is_type($peek, 'pvnv'), "string used as number should copy to PVNV", $peek);
+    }
+}
+
+
+sub peek {
+    open SAVERR, '>&STDERR';
+    open STDERR, '>', "upgrade$$.tmp";
+    Dump($_[0]);
+    open STDERR, '>&SAVERR';
+    close SAVERR;
+    open my $fh, '<', "upgrade$$.tmp";
+    local $/;
+    <$fh>
+}
+sub peek_is_type {
+    my ($peek, $expect) = @_;
+    $expect =~ s/^rv/iv/;  # SVt_IV holds references these days
+    $peek =~ /^SV = \U$expect\E\b/;
+}
+sub peek_flags {
+    $_[0] =~ m{^ \s* FLAGS \s* = \s* \( (.+) \) }xm
+      ? $1 : undef
+}

-- 
Chip Salzenberg

Thread Next


nntp.perl.org: Perl Programming lists via nntp and http.
Comments to Ask Bjørn Hansen at ask@perl.org | Group listing | About