develooper Front page | perl.perl5.porters | Postings from August 2012

Re: [perl.git] branch yves/superfasthash, updated. v5.17.3-95-g3b015b1

Thread Next
From:
demerphq
Date:
August 26, 2012 09:53
Subject:
Re: [perl.git] branch yves/superfasthash, updated. v5.17.3-95-g3b015b1
Message ID:
CANgJU+UvbNgWpPkVXe-RnBsLZfJfyj-CDY92hvQXwvz1ih9s0Q@mail.gmail.com
Im playing around with being able to choose a different hash function.
There are some weird errors if we use a different hash function, some
of which I cant explain at all yet, but which I suspect are bugs in
our hash usage. May not compile on all architectures right now, the
hash function is designed for x86.

I plan to rework these patch before I push any of them, if I push them at all.

Comments welcome, especial insight into any errors it causes.

Yves

On 26 August 2012 18:48, Yves Orton <demerphq@gmail.com> wrote:
> In perl.git, the branch yves/superfasthash has been updated
>
> <http://perl5.git.perl.org/perl.git/commitdiff/3b015b14c604d9f14083b25af9c7761e6c9bcb17?hp=79e381332b183e6685843de082c6f5359a1687fc>
>
> - Log -----------------------------------------------------------------
> commit 3b015b14c604d9f14083b25af9c7761e6c9bcb17
> Author: Yves Orton <demerphq@gmail.com>
> Date:   Sun Aug 26 18:48:01 2012 +0200
>
>     many changes to be rebased into separate commits later
> -----------------------------------------------------------------------
>
> Summary of changes:
>  cpan/CGI/lib/CGI.pm                   |    6 +++---
>  cpan/CGI/lib/CGI/Util.pm              |    2 +-
>  cpan/CGI/t/function.t                 |    2 +-
>  cpan/CGI/t/html.t                     |    2 +-
>  cpan/autodie/lib/autodie/exception.pm |    1 -
>  cpan/autodie/t/hints_pod_examples.t   |   11 +++++++----
>  hv.h                                  |   15 +++++++++++----
>  perl.c                                |   10 +++++++---
>  t/op/hash.t                           |   28 ++++------------------------
>  universal.c                           |   19 +++++++++++++++++++
>  10 files changed, 54 insertions(+), 42 deletions(-)
>
> diff --git a/cpan/CGI/lib/CGI.pm b/cpan/CGI/lib/CGI.pm
> index f510680..2fc31d3 100644
> --- a/cpan/CGI/lib/CGI.pm
> +++ b/cpan/CGI/lib/CGI.pm
> @@ -129,9 +129,6 @@ sub initialize_globals {
>
>  # ------------------ START OF THE LIBRARY ------------
>
> -#### Method: endform
> -# This method is DEPRECATED
> -*endform = \&end_form;
>
>  # make mod_perlhappy
>  initialize_globals();
> @@ -1974,6 +1971,9 @@ sub end_form {
>          }
>      }
>  }
> +#### Method: endform
> +# This method is DEPRECATED
> +*endform = \&end_form; # deprecated!
>  END_OF_FUNC
>
>  #### Method: end_multipart_form
> diff --git a/cpan/CGI/lib/CGI/Util.pm b/cpan/CGI/lib/CGI/Util.pm
> index b059281..1beccc0 100644
> --- a/cpan/CGI/lib/CGI/Util.pm
> +++ b/cpan/CGI/lib/CGI/Util.pm
> @@ -133,7 +133,7 @@ sub make_attributes {
>      my $quote = $do_not_quote ? '' : '"';
>
>      my(@att);
> -    foreach (keys %{$attr}) {
> +    foreach (sort keys %{$attr}) {
>         my($key) = $_;
>         $key=~s/^\-//;     # get rid of initial - if present
>
> diff --git a/cpan/CGI/t/function.t b/cpan/CGI/t/function.t
> index e0c0845..a07cf61 100644
> --- a/cpan/CGI/t/function.t
> +++ b/cpan/CGI/t/function.t
> @@ -103,4 +103,4 @@ test(30, !charset("") && header() eq "Content-Type: text/html${CRLF}${CRLF}", "E
>
>  test(31, header(-foo=>'bar') eq "Foo: bar${CRLF}Content-Type: text/html${CRLF}${CRLF}", "Custom header");
>
> -test(32, start_form(-action=>'one',name=>'two',onsubmit=>'three') eq qq(<form method="post" action="one" enctype="multipart/form-data" onsubmit="three" name="two">), "initial dash followed by undashe ... [14 chars truncated]
> +test(32, start_form(-action=>'one',name=>'two',onsubmit=>'three') eq qq(<form method="post" action="one" enctype="multipart/form-data" name="two" onsubmit="three">), "initial dash followed by undashe ... [14 chars truncated]
> diff --git a/cpan/CGI/t/html.t b/cpan/CGI/t/html.t
> index 09a3e33..1add22f 100644
> --- a/cpan/CGI/t/html.t
> +++ b/cpan/CGI/t/html.t
> @@ -98,7 +98,7 @@ is start_html(
>  <html xmlns="http://www.w3.org/1999/xhtml" lang="en-US" xml:lang="en-US">
>  <head>
>  <title>The world of foo</title>
> -<script src="foo.js" charset="utf-8" type="text/javascript"></script>
> +<script charset="utf-8" src="foo.js" type="text/javascript"></script>
>  <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
>  </head>
>  <body>
> diff --git a/cpan/autodie/lib/autodie/exception.pm b/cpan/autodie/lib/autodie/exception.pm
> index cd06639..7d6e1a3 100644
> --- a/cpan/autodie/lib/autodie/exception.pm
> +++ b/cpan/autodie/lib/autodie/exception.pm
> @@ -591,7 +591,6 @@ sub format_default {
>      }
>
>      # Format our beautiful error.
> -
>      return "Can't $call(".  join(q{, }, @args) . "): $!" ;
>
>      # TODO - Handle user-defined errors from hash.
> diff --git a/cpan/autodie/t/hints_pod_examples.t b/cpan/autodie/t/hints_pod_examples.t
> index a3c6f0f..069d1c9 100644
> --- a/cpan/autodie/t/hints_pod_examples.t
> +++ b/cpan/autodie/t/hints_pod_examples.t
> @@ -154,16 +154,19 @@ my $perl58_fix = (
>  # Some of the tests provide different hints for scalar or list context
>
>  while (my ($test, $exception_expected) = each %scalar_tests) {
> -    eval "
> +    my $ok= eval(my $code= "
>          $perl58_fix
>          my \$scalar = $test;
> -    ";
> +        1;
> +    ");
>
>      if ($exception_expected) {
> -        isnt("$@", "", "scalar test - $test");
> +        isnt($ok ? "" : "$@", "", "scalar test - $test")
> +            or diag($code);
>      }
>      else {
> -        is($@, "", "scalar test - $test");
> +        is($ok ? "" : "$@", "", "scalar test - $test")
> +            or diag($code);
>      }
>  }
>
> diff --git a/hv.h b/hv.h
> index fdfa7cf..766853d 100644
> --- a/hv.h
> +++ b/hv.h
> @@ -142,9 +142,9 @@ struct xpvhv {
>   * (http://burtleburtle.net/bob/hash/doobs.html) */
>  #define PERL_HASH_INTERNAL_(hash,str,len,internal) \
>       STMT_START        { \
> -       const char * const s_PeRlHaSh_tmp = str; \
> +       const char * const s_PeRlHaSh_tmp = (str); \
>         const unsigned char *s_PeRlHaSh = (const unsigned char *)s_PeRlHaSh_tmp; \
> -       I32 i_PeRlHaSh = len; \
> +       I32 i_PeRlHaSh = (len); \
>         U32 hash_PeRlHaSh = ((internal) ? PL_rehash_seed : PERL_HASH_SEED); \
>         while (i_PeRlHaSh--) { \
>             hash_PeRlHaSh += *s_PeRlHaSh++; \
> @@ -177,8 +177,8 @@ struct xpvhv {
>        STMT_START       { \
>         register const char * const strtmp_PeRlHaSh = (str); \
>         register const unsigned char *str_PeRlHaSh = (const unsigned char *)strtmp_PeRlHaSh; \
> -       register I32 len_PeRlHaSh = (len); \
> -       register U32 hash_PeRlHaSh = ((internal) ? PL_rehash_seed : PERL_HASH_SEED); \
> +       register U32 len_PeRlHaSh = (len); \
> +       register U32 hash_PeRlHaSh = ((internal) ? PL_rehash_seed : PERL_HASH_SEED) ^ len; \
>         register U32 tmp_PeRlHaSh; \
>         register int rem_PeRlHaSh= len_PeRlHaSh & 3; \
>          len_PeRlHaSh >>= 2; \
> @@ -214,6 +214,13 @@ struct xpvhv {
>          hash_PeRlHaSh += hash_PeRlHaSh >> 17; \
>          hash_PeRlHaSh ^= hash_PeRlHaSh << 25; \
>          (hash) = (hash_PeRlHaSh + (hash_PeRlHaSh >> 6)); \
> +        if (0){ \
> +            SV *tmp= newSV(len); \
> +            PerlIO_printf(Perl_debug_log,"internal: %d seed: %"UVuf" hashvalue %"UVuf" len: %"UVuf" str: %s\n", \
> +                internal, (UV)((internal) ? PL_rehash_seed : PERL_HASH_SEED),\
> +                (UV)(hash), (UV)len, pv_pretty(tmp, (str), (len), 0, NULL, NULL, 0)); \
> +            SvREFCNT_dec(tmp);\
> +        }\
>      } STMT_END
>  #endif
>
> diff --git a/perl.c b/perl.c
> index 9d2831c..b87f011 100644
> --- a/perl.c
> +++ b/perl.c
> @@ -1484,13 +1484,17 @@ perl_parse(pTHXx_ XSINIT_t xsinit, int argc, char **argv, char **env)
>       * If you set PL_rehash_seed (and presumably also PL_rehash_seed_set)
>       * yourself, it is your responsibility to provide a good random seed!
>       * You can also define PERL_HASH_SEED in compile time, see hv.h. */
> -    if (!PL_rehash_seed_set)
> +    if (!PL_rehash_seed_set) {
>          PL_rehash_seed = get_hash_seed();
> +         /* PL_hash_seed= get_hash_seed(); */
> +    }
>      {
>         const char * const s = PerlEnv_getenv("PERL_HASH_SEED_DEBUG");
>
> -       if (s && (atoi(s) == 1))
> -           PerlIO_printf(Perl_debug_log, "HASH_SEED = %"UVuf"\n", PL_rehash_seed);
> +       if (s && (atoi(s) == 1)) {
> +           PerlIO_printf(Perl_debug_log, "HASH_SEED = %"UVuf"\n", PL_hash_seed);
> +           PerlIO_printf(Perl_debug_log, "REHASH_SEED = %"UVuf"\n", PL_rehash_seed);
> +        }
>      }
>  #endif /* #if defined(USE_HASH_SEED) || defined(USE_HASH_SEED_EXPLICIT) */
>
> diff --git a/t/op/hash.t b/t/op/hash.t
> index 4093f2e..f101e83 100644
> --- a/t/op/hash.t
> +++ b/t/op/hash.t
> @@ -8,7 +8,7 @@ BEGIN {
>
>  use strict;
>
> -plan tests => 15;
> +plan tests => 16;
>
>  my %h;
>
> @@ -44,10 +44,12 @@ my %h2 = map {$_ => 1} 'a'..'cc';
>  ok (!Internals::HvREHASH(%h2),
>      "starting with pre-populated non-pathological hash (rehash flag if off)");
>
> +my $foo_hash= Internals::PERL_HASH(%h2,"foo");
>  my @keys = get_keys(\%h2);
>  $h2{$_}++ for @keys;
>  ok (Internals::HvREHASH(%h2),
>      scalar(@keys) . " colliding into the same bucket keys are triggering rehash");
> +isnt(Internals::PERL_HASH(%h2,"foo"), $foo_hash,"hash of 'foo' changes after rehash");
>
>  sub get_keys {
>      my $hr = shift;
> @@ -74,7 +76,7 @@ sub get_keys {
>      my $hash;
>      while (@keys < THRESHOLD+2) {
>          # next if exists $hash->{$s};
> -        $hash = hash($s);
> +        $hash = Internals::PERL_HASH(%$hr,$s);
>          next unless ($hash & $mask) == 0;
>          $c++;
>          printf "# %2d: %5s, %10s\n", $c, $s, $hash;
> @@ -87,28 +89,6 @@ sub get_keys {
>  }
>
>
> -# trying to provide the fastest equivalent of C macro's PERL_HASH in
> -# Perl - the main complication is that it uses U32 integer, which we
> -# can't do in perl, without doing some tricks
> -sub hash {
> -    my $s = shift;
> -    my @c = split //, $s;
> -    my $u = HASH_SEED;
> -    for (@c) {
> -        # (A % M) + (B % M) == (A + B) % M
> -        # This works because '+' produces a NV, which is big enough to hold
> -        # the intermediate result. We only need the % before any "^" and "&"
> -        # to get the result in the range for an I32.
> -        # and << doesn't work on NV, so using 1 << 10
> -        $u += ord;
> -        $u += $u * (1 << 10); $u %= MASK_U32;
> -        $u ^= $u >> 6;
> -    }
> -    $u += $u << 3;  $u %= MASK_U32;
> -    $u ^= $u >> 11; $u %= MASK_U32;
> -    $u += $u << 15; $u %= MASK_U32;
> -    $u;
> -}
>
>  # This will crash perl if it fails
>
> diff --git a/universal.c b/universal.c
> index cb49e0b..71b9318 100644
> --- a/universal.c
> +++ b/universal.c
> @@ -1122,6 +1122,24 @@ XS(XS_Internals_HvREHASH)        /* Subject to change  */
>      Perl_croak(aTHX_ "Internals::HvREHASH $hashref");
>  }
>
> +XS(XS_Internals_PERL_HASH)     /* Subject to change  */
> +{
> +    dVAR;
> +    dXSARGS;
> +    PERL_UNUSED_ARG(cv);
> +    if (items == 2 && SvROK(ST(0))) {
> +       const HV * const hv = (const HV *) SvRV(ST(0));
> +       if (SvTYPE(hv) == SVt_PVHV) {
> +           STRLEN len;
> +            char *pv= SvPV(ST(1),len);
> +            UV uv;
> +            PERL_HASH_INTERNAL_(uv,pv,len,HvREHASH(hv));
> +            XSRETURN_UV(uv);
> +       }
> +    }
> +    Perl_croak(aTHX_ "Internals::PERL_HASH $hashref, $value");
> +}
> +
>  XS(XS_re_is_regexp)
>  {
>      dVAR;
> @@ -1401,6 +1419,7 @@ struct xsub_details details[] = {
>      {"Internals::hash_seed", XS_Internals_hash_seed, ""},
>      {"Internals::rehash_seed", XS_Internals_rehash_seed, ""},
>      {"Internals::HvREHASH", XS_Internals_HvREHASH, "\\%"},
> +    {"Internals::PERL_HASH", XS_Internals_PERL_HASH, "\\%$"},
>      {"re::is_regexp", XS_re_is_regexp, "$"},
>      {"re::regname", XS_re_regname, ";$$"},
>      {"re::regnames", XS_re_regnames, ";$"},
>
> --
> Perl5 Master Repository



-- 
perl -Mre=debug -e "/just|another|perl|hacker/"

Thread Next


nntp.perl.org: Perl Programming lists via nntp and http.
Comments to Ask Bjørn Hansen at ask@perl.org | Group listing | About