develooper Front page | perl.perl5.porters | Postings from September 2017

Re: [perl #131683] Encode::ONLY_PRAGMA_WARNINGS in$PerlIO::encoding::fallback

Thread Previous | Thread Next
From:
pali
Date:
September 12, 2017 23:06
Subject:
Re: [perl #131683] Encode::ONLY_PRAGMA_WARNINGS in$PerlIO::encoding::fallback
Message ID:
201709130049.43954@pali
From 8e44ad64885c76f49e155ddb8cf6c5fcb5a2a011 Mon Sep 17 00:00:00 2001
From: Pali <pali@cpan.org>
Date: Wed, 13 Sep 2017 00:30:29 +0200
Subject: [PATCH] Rewrite encode, decode, encode_utf8, decode_utf8 and from_to
 functions to XS

---
 Encode.pm |   68 ---------------------
 Encode.xs |  196 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 196 insertions(+), 68 deletions(-)

diff --git a/Encode.pm b/Encode.pm
index ce30bd5..6ed4a77 100644
--- a/Encode.pm
+++ b/Encode.pm
@@ -171,74 +171,6 @@ sub clone_encoding($) {
     return Storable::dclone($obj);
 }
 
-sub encode($$;$) {
-    my $name = $_[0];
-    my $check = $_[2];
-    Carp::croak("Encoding name should not be undef") unless defined $name;
-    my $enc = find_encoding($name);
-    Carp::croak("Unknown encoding '$name'") unless defined $enc;
-    my $encode = $enc->can('encode');
-    Carp::croak("No function 'encode' for encoding '$name'") unless defined $encode;
-    $check ||= 0;
-    splice(@_, 0, 1, $enc);
-    if (ref $check or !$check or ($check & LEAVE_SRC)) {
-        my $string = $_[1];
-        splice(@_, 1, 1, $string);
-    }
-    splice(@_, 2, 1, $check);
-    goto &$encode;
-}
-*str2bytes = \&encode;
-
-sub decode($$;$) {
-    my $name = $_[0];
-    my $check = $_[2];
-    Carp::croak("Encoding name should not be undef") unless defined $name;
-    my $enc = find_encoding($name);
-    Carp::croak("Unknown encoding '$name'") unless defined $enc;
-    my $decode = $enc->can('decode');
-    Carp::croak("No function 'decode' for encoding '$name'") unless defined $decode;
-    $check ||= 0;
-    splice(@_, 0, 1, $enc);
-    if (ref $check or !$check or ($check & LEAVE_SRC)) {
-        my $octets = $_[1];
-        splice(@_, 1, 1, $octets);
-    }
-    splice(@_, 2, 1, $check);
-    goto &$decode;
-}
-*bytes2str = \&decode;
-
-sub from_to($$$;$) {
-    my ( $string, $from, $to, $check ) = @_;
-    Carp::croak("Encoding name should not be undef") unless defined $from and defined $to;
-    my $f = find_encoding($from);
-    Carp::croak("Unknown encoding '$from'") unless defined $f;
-    my $t = find_encoding($to);
-    Carp::croak("Unknown encoding '$to'") unless defined $t;
-    return undef unless defined $string;
-    $check ||= 0;
-    my $uni = $f->decode($string);
-    $_[0] = $string = $t->encode( $uni, $check );
-    return undef if ( $check && length($uni) );
-    return defined( $_[0] ) ? length($string) : undef;
-}
-
-sub encode_utf8($) {
-    my ($str) = @_;
-    return undef unless defined $str;
-    utf8::encode($str);
-    return $str;
-}
-
-my $utf8enc;
-
-sub decode_utf8($;$) {
-    $utf8enc ||= find_encoding('utf8');
-    unshift(@_, $utf8enc);
-    goto &{$utf8enc->can('decode')};
-}
-
 onBOOT;
 
 if ($ON_EBCDIC) {
diff --git a/Encode.xs b/Encode.xs
index c1222a1..2cd6f4b 100644
--- a/Encode.xs
+++ b/Encode.xs
@@ -35,6 +35,14 @@ UNIMPLEMENTED(_encoded_bytes_to_utf8, I32)
 #define SvIV_nomg SvIV
 #endif
 
+#ifndef SvTRUE_nomg
+#define SvTRUE_nomg SvTRUE
+#endif
+
+#ifndef SVfARG
+#define SVfARG(p) ((void*)(p))
+#endif
+
 static void
 Encode_XSEncoding(pTHX_ encode_t * enc)
 {
@@ -589,6 +597,83 @@ process_utf8(pTHX_ SV* dst, U8* s, U8* e, SV *check_sv,
     return s;
 }
 
+static SV *
+find_encoding(pTHX_ SV *enc)
+{
+    dSP;
+    I32 count;
+    SV *m_enc;
+    SV *obj = &PL_sv_undef;
+#ifndef SV_NOSTEAL
+    U32 tmp;
+#endif
+
+    ENTER;
+    SAVETMPS;
+    PUSHMARK(sp);
+
+    m_enc = sv_newmortal();
+#ifndef SV_NOSTEAL
+    tmp = SvFLAGS(enc) & SVs_TEMP;
+    SvTEMP_off(enc);
+    sv_setsv_flags(m_enc, enc, 0);
+    SvFLAGS(enc) |= tmp;
+#else
+    sv_setsv_flags(m_enc, enc, SV_NOSTEAL);
+#endif
+    XPUSHs(m_enc);
+
+    PUTBACK;
+
+    count = call_pv("Encode::find_encoding", G_SCALAR);
+
+    SPAGAIN;
+
+    if (count > 0) {
+        obj = POPs;
+        SvREFCNT_inc(obj);
+    }
+
+    PUTBACK;
+    FREETMPS;
+    LEAVE;
+    return sv_2mortal(obj);
+}
+
+static SV *
+call_encoding(pTHX_ const char *method, SV *obj, SV *src, SV *check)
+{
+    dSP;
+    I32 count;
+    SV *dst = &PL_sv_undef;
+
+    PUSHMARK(sp);
+
+    if (check)
+        check = sv_2mortal(newSVsv(check));
+
+    if (!check || SvROK(check) || !SvTRUE_nomg(check) || (SvIV_nomg(check) & ENCODE_LEAVE_SRC))
+        src = sv_2mortal(newSVsv(src));
+
+    XPUSHs(obj);
+    XPUSHs(src);
+    XPUSHs(check ? check : &PL_sv_no);
+
+    PUTBACK;
+
+    count = call_method(method, G_SCALAR);
+
+    SPAGAIN;
+
+    if (count > 0) {
+        dst = POPs;
+        SvREFCNT_inc(dst);
+    }
+
+    PUTBACK;
+    return dst;
+}
+
 
 MODULE = Encode		PACKAGE = Encode::utf8	PREFIX = Method_
 
@@ -1054,6 +1139,117 @@ CODE:
 OUTPUT:
     RETVAL
 
+SV *
+decode(encoding, octets, check = NULL)
+SV *	encoding
+SV *	octets
+SV *	check
+ALIAS:
+    bytes2str = 0
+PREINIT:
+    SV *obj;
+INIT:
+    SvGETMAGIC(encoding);
+CODE:
+    if (!SvOK(encoding))
+        croak("Encoding name should not be undef");
+    obj = find_encoding(aTHX_ encoding);
+    if (!SvOK(obj))
+        croak("Unknown encoding '%" SVf "'", SVfARG(encoding));
+    RETVAL = call_encoding(aTHX_ "decode", obj, octets, check);
+OUTPUT:
+    RETVAL
+
+SV *
+encode(encoding, string, check = NULL)
+SV *	encoding
+SV *	string
+SV *	check
+ALIAS:
+    str2bytes = 0
+PREINIT:
+    SV *obj;
+INIT:
+    SvGETMAGIC(encoding);
+CODE:
+    if (!SvOK(encoding))
+        croak("Encoding name should not be undef");
+    obj = find_encoding(aTHX_ encoding);
+    if (!SvOK(obj))
+        croak("Unknown encoding '%" SVf "'", SVfARG(encoding));
+    RETVAL = call_encoding(aTHX_ "encode", obj, string, check);
+OUTPUT:
+    RETVAL
+
+SV *
+decode_utf8(octets, check = NULL)
+SV *	octets
+SV *	check
+PREINIT:
+    HV *hv;
+    SV **sv;
+CODE:
+    hv = get_hv("Encode::Encoding", 0);
+    if (!hv)
+        croak("utf8 encoding was not found");
+    sv = hv_fetch(hv, "utf8", 4, 0);
+    if (!sv || !*sv || !SvOK(*sv))
+        croak("utf8 encoding was not found");
+    RETVAL = call_encoding(aTHX_ "decode", *sv, octets, check);
+OUTPUT:
+    RETVAL
+
+SV *
+encode_utf8(string)
+SV *	string
+CODE:
+    RETVAL = newSVsv(string);
+    if (SvOK(RETVAL))
+        sv_utf8_encode(RETVAL);
+OUTPUT:
+    RETVAL
+
+SV *
+from_to(octets, from, to, check = NULL)
+SV *	octets
+SV *	from
+SV *	to
+SV *	check
+PREINIT:
+    SV *from_obj;
+    SV *to_obj;
+    SV *string;
+    SV *new_octets;
+    U8 *ptr;
+    STRLEN len;
+INIT:
+    SvGETMAGIC(from);
+    SvGETMAGIC(to);
+CODE:
+    if (!SvOK(from) || !SvOK(to))
+        croak("Encoding name should not be undef");
+    from_obj = find_encoding(aTHX_ from);
+    if (!SvOK(from_obj))
+        croak("Unknown encoding '%" SVf "'", SVfARG(from));
+    to_obj = find_encoding(aTHX_ to);
+    if (!SvOK(to_obj))
+        croak("Unknown encoding '%" SVf "'", SVfARG(to));
+    string = sv_2mortal(call_encoding(aTHX_ "decode", from_obj, octets, NULL));
+    new_octets = sv_2mortal(call_encoding(aTHX_ "encode", to_obj, string, check));
+    SvGETMAGIC(new_octets);
+    if (SvOK(new_octets) && (!check || SvROK(check) || !SvTRUE_nomg(check) || sv_len(string) == 0)) {
+        ptr = (U8 *)SvPV_nomg(new_octets, len);
+        if (SvUTF8(new_octets))
+            len = utf8_length(ptr, ptr+len);
+        RETVAL = newSVuv(len);
+    } else {
+        RETVAL = &PL_sv_undef;
+    }
+    sv_setsv_nomg(octets, new_octets);
+    SvSETMAGIC(octets);
+OUTPUT:
+    RETVAL
+
 void
 onBOOT()
 CODE:
-- 
1.7.9.5


Thread Previous | Thread Next


nntp.perl.org: Perl Programming lists via nntp and http.
Comments to Ask Bjørn Hansen at ask@perl.org | Group listing | About