develooper Front page | perl.perl5.porters | Postings from November 2003

perl's hash API

Thread Next
From:
Nicholas Clark
Date:
November 16, 2003 14:01
Subject:
perl's hash API
Message ID:
20031116220047.GF6287@plum.flirble.org
Since 5.003_01 Perl's hash functions have been available with 2 APIs.
The original API takes a char */STRLEN pair for the key. 5.003_01
introduced a second API that takes SVs, and for fetch and store returns
HE*s rather than SV**s. The implementation arrived in this patch from
Sarathy, which also introduced the shared string table:

http://www.xray.mpe.mpg.de/mailing-lists/perl5-porters/9605/msg00790.html

The second API suffixes the 4 main functions (delete, exists, fetch, store)
with _ent. Subsequently perl has moved the PP functions used by the ops over to
the *_ent variants. Internally the *_ent variants are complete code
duplications of the originals, and as the OPs don't use the originals,
they aren't tested.

It seems that all 4 non-*_ent functions were broken for tied hashes with
utf8 keys. I've patched the core and XS::APItest as appended, and added
ext/XS/APItest/t/hash.t to test the non-*_ent variants.

I don't like the code duplication.
It makes hv.c (and hv.o) larger than they need to be (which may cause
cache misses)
It makes maintenance harder (eg see changes 21446 and 21469)
It makes bugs more likely
 
I'm proposing (in order)

1: To merge the implementations of the *_ent and non-ent functions.
   This may mean that 4 new static functions need to be created in hv.c,
   and the existing 8 functions call them

2: To merge fetch and store
   [Trace the code path for hv_fetch_ent called with lval true, where the
    key is not yet present.
    Now look at pp_helem in pp_hot.c, which does most hash assignments
    including new stores.
    D'oh!]

Comments?

Nicholas Clark

PS If anyone is wondering where constant subs => constants came from:
   http://www.xray.mpe.mpg.de/mailing-lists/perl5-porters/9606/msg00276.html

ext/XS/APItest/t/hash. is new, not appended, see
http://public.activestate.com/cgi-bin/perlbrowse?file=ext%2FXS%2FAPItest%2Ft%2Fhash.t&rev=


--- hv.c.orig	Sat Nov 15 15:18:11 2003
+++ hv.c	Sun Nov 16 21:23:14 2003
@@ -226,15 +226,19 @@ S_hv_fetch_flags(pTHX_ HV *hv, const cha
 	return 0;
 
     if (SvRMAGICAL(hv)) {
-        /* All this clause seems to be utf8 unaware.
-           By moving the utf8 stuff out to hv_fetch_flags I need to ensure
-           key doesn't leak. I've not tried solving the utf8-ness.
-           NWC.
-        */
 	if (mg_find((SV*)hv, PERL_MAGIC_tied) || SvGMAGICAL((SV*)hv)) {
 	    sv = sv_newmortal();
 	    sv_upgrade(sv, SVt_PVLV);
-	    mg_copy((SV*)hv, sv, key, klen);
+	    if (flags & HVhek_UTF8) {
+		/* This hack based on the code in hv_exists_ent seems to be
+		   the easiest way to pass the utf8 flag through and fix
+		   the bug in hv_exists for tied hashes with utf8 keys.  */
+		SV *keysv = sv_2mortal(newSVpvn(key, klen));
+		SvUTF8_on(keysv);
+		mg_copy((SV*)hv, sv, (char *)keysv, HEf_SVKEY);
+	    } else {
+		mg_copy((SV*)hv, sv, key, klen);
+	    }
             if (flags & HVhek_FREEKEY)
                 Safefree(key);
 	    LvTYPE(sv) = 't';
@@ -627,7 +631,16 @@ Perl_hv_store_flags(pTHX_ HV *hv, const 
 	bool needs_store;
 	hv_magic_check (hv, &needs_copy, &needs_store);
 	if (needs_copy) {
-	    mg_copy((SV*)hv, val, key, klen);
+	    if (flags & HVhek_UTF8) {
+		/* This hack based on the code in hv_exists_ent seems to be
+		   the easiest way to pass the utf8 flag through and fix
+		   the bug in hv_exists for tied hashes with utf8 keys.  */
+		SV *keysv = sv_2mortal(newSVpvn(key, klen));
+		SvUTF8_on(keysv);
+		mg_copy((SV*)hv, val, (char *)keysv, HEf_SVKEY);
+	    } else {
+		mg_copy((SV*)hv, val, key, klen);
+	    }
 	    if (!xhv->xhv_array /* !HvARRAY */ && !needs_store) {
                 if (flags & HVhek_FREEKEY)
                     Safefree(key);
@@ -957,7 +970,9 @@ Perl_hv_delete(pTHX_ HV *hv, const char 
 	bool needs_store;
 	hv_magic_check (hv, &needs_copy, &needs_store);
 
-	if (needs_copy && (svp = hv_fetch(hv, key, klen, TRUE))) {
+// XXX PerlIO_printf(PerlIO_stderr(), "%d %d\n", is_utf8, klen);
+	if (needs_copy
+	    && (svp = hv_fetch(hv, key, is_utf8 ? -klen : klen, TRUE))) {
 	    sv = *svp;
 	    if (SvMAGICAL(sv)) {
 	        mg_clear(sv);
@@ -1276,6 +1291,15 @@ Perl_hv_exists(pTHX_ HV *hv, const char 
     if (SvRMAGICAL(hv)) {
 	if (mg_find((SV*)hv, PERL_MAGIC_tied) || SvGMAGICAL((SV*)hv)) {
 	    sv = sv_newmortal();
+	    if (is_utf8) {
+		/* This hack based on the code in hv_exists_ent seems to be
+		   the easiest way to pass the utf8 flag through and fix
+		   the bug in hv_exists for tied hashes with utf8 keys.  */
+		SV *keysv = sv_2mortal(newSVpvn(key, klen));
+		SvUTF8_on(keysv);
+		key = (char *)keysv;
+		klen = HEf_SVKEY;
+	    }
 	    mg_copy((SV*)hv, sv, key, klen);
 	    magic_existspack(sv, mg_find(sv, PERL_MAGIC_tiedelem));
 	    return (bool)SvTRUE(sv);
--- ext/XS/APItest/APItest.xs.orig	Tue Nov 11 21:05:36 2003
+++ ext/XS/APItest/APItest.xs	Sun Nov 16 21:07:20 2003
@@ -2,6 +2,99 @@
 #include "perl.h"
 #include "XSUB.h"
 
+
+MODULE = XS::APItest:Hash		PACKAGE = XS::APItest::Hash
+
+bool
+exists(hash, key_sv)
+	PREINIT:
+	STRLEN len;
+	const char *key;
+	INPUT:
+	HV *hash
+	SV *key_sv
+	CODE:
+	key = SvPV(key_sv, len);
+	RETVAL = hv_exists(hash, key, SvUTF8(key_sv) ? -len : len);
+        OUTPUT:
+        RETVAL
+
+SV *
+delete(hash, key_sv)
+	PREINIT:
+	STRLEN len;
+	const char *key;
+	INPUT:
+	HV *hash
+	SV *key_sv
+	CODE:
+	key = SvPV(key_sv, len);
+	/* It's already mortal, so need to increase reference count.  */
+	RETVAL = SvREFCNT_inc(hv_delete(hash, key,
+					SvUTF8(key_sv) ? -len : len, 0));
+        OUTPUT:
+        RETVAL
+
+SV *
+store(hash, key_sv, value)
+	PREINIT:
+	STRLEN len;
+	const char *key;
+	SV *copy;
+	SV **result;
+	INPUT:
+	HV *hash
+	SV *key_sv
+	SV *value
+	CODE:
+	key = SvPV(key_sv, len);
+	copy = newSV(0);
+	result = hv_store(hash, key, SvUTF8(key_sv) ? -len : len, copy, 0);
+	SvSetMagicSV(*result, value);
+	if (!result) {
+	    SvREFCNT_dec(copy);
+	    XSRETURN_EMPTY;
+	}
+	/* It's about to become mortal, so need to increase reference count.
+	 */
+	RETVAL = SvREFCNT_inc(*result);
+        OUTPUT:
+        RETVAL
+
+
+SV *
+fetch(hash, key_sv)
+	PREINIT:
+	STRLEN len;
+	const char *key;
+	SV **result;
+	INPUT:
+	HV *hash
+	SV *key_sv
+	CODE:
+	key = SvPV(key_sv, len);
+	result = hv_fetch(hash, key, SvUTF8(key_sv) ? -len : len, 0);
+	if (!result) {
+	    XSRETURN_EMPTY;
+	}
+	/* Force mg_get  */
+	RETVAL = newSVsv(*result);
+        OUTPUT:
+        RETVAL
+
+=pod
+
+sub TIEHASH  { bless {}, $_[0] }
+sub STORE    { $_[0]->{$_[1]} = $_[2] }
+sub FETCH    { $_[0]->{$_[1]} }
+sub FIRSTKEY { my $a = scalar keys %{$_[0]}; each %{$_[0]} }
+sub NEXTKEY  { each %{$_[0]} }
+sub EXISTS   { exists $_[0]->{$_[1]} }
+sub DELETE   { delete $_[0]->{$_[1]} }
+sub CLEAR    { %{$_[0]} = () }
+
+=cut
+
 MODULE = XS::APItest		PACKAGE = XS::APItest
 
 PROTOTYPES: DISABLE

Thread Next


nntp.perl.org: Perl Programming lists via nntp and http.
Comments to Ask Bjørn Hansen at ask@perl.org | Group listing | About