develooper Front page | perl.perl5.changes | Postings from January 2019

[perl.git] branch blead updated. v5.29.7-12-g20797ee136

From:
James Keenan
Date:
January 21, 2019 14:28
Subject:
[perl.git] branch blead updated. v5.29.7-12-g20797ee136
Message ID:
E1glaYe-0001mK-K6@git.dc.perl.space
In perl.git, the branch blead has been updated

<https://perl5.git.perl.org/perl.git/commitdiff/20797ee136d20e2b1a349821622af71c5da520cf?hp=515c395bcca24c55c85b5aeea239e5e836c36059>

- Log -----------------------------------------------------------------
commit 20797ee136d20e2b1a349821622af71c5da520cf
Author: Dan Kogai <dankogai@dan.co.jp>
Date:   Mon Jan 21 09:25:18 2019 -0500

    Encode: synch with CPAN version 2.99

-----------------------------------------------------------------------

Summary of changes:
 MANIFEST                       |    1 -
 Porting/Maintainers.pl         |    2 +-
 cpan/Encode/Encode.pm          |  139 +----
 cpan/Encode/Encode.xs          |  480 +++++++---------
 cpan/Encode/Encode/encode.h    | 1230 ++++++++++++++++++++++++++++++++++++++++
 cpan/Encode/Unicode/Unicode.pm |    2 +-
 cpan/Encode/Unicode/Unicode.xs |  173 +++---
 cpan/Encode/encengine.c        |    4 +-
 cpan/Encode/t/decode.t         |    9 +-
 cpan/Encode/t/enc_eucjp.t      |    4 +
 cpan/Encode/t/utf8messages.t   |   33 --
 cpan/Encode/t/utf8warnings.t   |  109 +---
 12 files changed, 1606 insertions(+), 580 deletions(-)
 delete mode 100644 cpan/Encode/t/utf8messages.t

diff --git a/MANIFEST b/MANIFEST
index 5d1d5cca7b..e282452028 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -629,7 +629,6 @@ cpan/Encode/t/undef.t
 cpan/Encode/t/unibench.pl		benchmark script
 cpan/Encode/t/Unicode.t			test script
 cpan/Encode/t/use-Encode-Alias.t
-cpan/Encode/t/utf8messages.t
 cpan/Encode/t/utf8ref.t			test script
 cpan/Encode/t/utf8strict.t		test script
 cpan/Encode/t/utf8warnings.t
diff --git a/Porting/Maintainers.pl b/Porting/Maintainers.pl
index 429ed4f673..4b0551b8aa 100755
--- a/Porting/Maintainers.pl
+++ b/Porting/Maintainers.pl
@@ -386,7 +386,7 @@ use File::Glob qw(:case);
     },
 
     'Encode' => {
-        'DISTRIBUTION' => 'DANKOGAI/Encode-2.97.tar.gz',
+        'DISTRIBUTION' => 'DANKOGAI/Encode-2.99.tar.gz',
         'FILES'        => q[cpan/Encode],
         'CUSTOMIZED'   => [
 	    # TODO test passes on blead
diff --git a/cpan/Encode/Encode.pm b/cpan/Encode/Encode.pm
index f90f929f07..ec625b9f20 100644
--- a/cpan/Encode/Encode.pm
+++ b/cpan/Encode/Encode.pm
@@ -1,5 +1,5 @@
 #
-# $Id: Encode.pm,v 2.97 2018/02/21 12:14:24 dankogai Exp $
+# $Id: Encode.pm,v 2.99 2019/01/21 03:11:41 dankogai Exp $
 #
 package Encode;
 use strict;
@@ -7,13 +7,14 @@ use warnings;
 use constant DEBUG => !!$ENV{PERL_ENCODE_DEBUG};
 our $VERSION;
 BEGIN {
-    $VERSION = sprintf "%d.%02d", q$Revision: 2.97 $ =~ /(\d+)/g;
+    $VERSION = sprintf "%d.%02d", q$Revision: 2.99 $ =~ /(\d+)/g;
     require XSLoader;
     XSLoader::load( __PACKAGE__, $VERSION );
 }
 
 use Exporter 5.57 'import';
 
+use Carp ();
 our @CARP_NOT = qw(Encode::Encoder);
 
 # Public, encouraged API is exported by default
@@ -170,134 +171,6 @@ sub clone_encoding($) {
     return Storable::dclone($obj);
 }
 
-sub encode($$;$) {
-    my ( $name, $string, $check ) = @_;
-    return undef unless defined $string;
-    $string .= '';    # stringify;
-    $check ||= 0;
-    unless ( defined $name ) {
-        require Carp;
-        Carp::croak("Encoding name should not be undef");
-    }
-    my $enc = find_encoding($name);
-    unless ( defined $enc ) {
-        require Carp;
-        Carp::croak("Unknown encoding '$name'");
-    }
-    # For Unicode, warnings need to be caught and re-issued at this level
-    # so that callers can disable utf8 warnings lexically.
-    my $octets;
-    if ( ref($enc) eq 'Encode::Unicode' ) {
-        my $warn = '';
-        {
-            local $SIG{__WARN__} = sub { $warn = shift };
-            $octets = $enc->encode( $string, $check );
-        }
-        warnings::warnif('utf8', $warn) if length $warn;
-    }
-    else {
-        $octets = $enc->encode( $string, $check );
-    }
-    $_[1] = $string if $check and !ref $check and !( $check & LEAVE_SRC );
-    return $octets;
-}
-*str2bytes = \&encode;
-
-sub decode($$;$) {
-    my ( $name, $octets, $check ) = @_;
-    return undef unless defined $octets;
-    $octets .= '';
-    $check ||= 0;
-    my $enc = find_encoding($name);
-    unless ( defined $enc ) {
-        require Carp;
-        Carp::croak("Unknown encoding '$name'");
-    }
-    # For Unicode, warnings need to be caught and re-issued at this level
-    # so that callers can disable utf8 warnings lexically.
-    my $string;
-    if ( ref($enc) eq 'Encode::Unicode' ) {
-        my $warn = '';
-        {
-            local $SIG{__WARN__} = sub { $warn = shift };
-            $string = $enc->decode( $octets, $check );
-        }
-        warnings::warnif('utf8', $warn) if length $warn;
-    }
-    else {
-        $string = $enc->decode( $octets, $check );
-    }
-    $_[1] = $octets if $check and !ref $check and !( $check & LEAVE_SRC );
-    return $string;
-}
-*bytes2str = \&decode;
-
-sub from_to($$$;$) {
-    my ( $string, $from, $to, $check ) = @_;
-    return undef unless defined $string;
-    $check ||= 0;
-    my $f = find_encoding($from);
-    unless ( defined $f ) {
-        require Carp;
-        Carp::croak("Unknown encoding '$from'");
-    }
-    my $t = find_encoding($to);
-    unless ( defined $t ) {
-        require Carp;
-        Carp::croak("Unknown encoding '$to'");
-    }
-
-    # For Unicode, warnings need to be caught and re-issued at this level
-    # so that callers can disable utf8 warnings lexically.
-    my $uni;
-    if ( ref($f) eq 'Encode::Unicode' ) {
-        my $warn = '';
-        {
-            local $SIG{__WARN__} = sub { $warn = shift };
-            $uni = $f->decode($string);
-        }
-        warnings::warnif('utf8', $warn) if length $warn;
-    }
-    else {
-        $uni = $f->decode($string);
-    }
-
-    if ( ref($t) eq 'Encode::Unicode' ) {
-        my $warn = '';
-        {
-            local $SIG{__WARN__} = sub { $warn = shift };
-            $_[0] = $string = $t->encode( $uni, $check );
-        }
-        warnings::warnif('utf8', $warn) if length $warn;
-    }
-    else {
-        $_[0] = $string = $t->encode( $uni, $check );
-    }
-
-    return undef if ( $check && length($uni) );
-    return defined( $_[0] ) ? length($string) : undef;
-}
-
-sub encode_utf8($) {
-    my ($str) = @_;
-    return undef unless defined $str;
-    utf8::encode($str);
-    return $str;
-}
-
-my $utf8enc;
-
-sub decode_utf8($;$) {
-    my ( $octets, $check ) = @_;
-    return undef unless defined $octets;
-    $octets .= '';
-    $check   ||= 0;
-    $utf8enc ||= find_encoding('utf8');
-    my $string = $utf8enc->decode( $octets, $check );
-    $_[0] = $octets if $check and !ref $check and !( $check & LEAVE_SRC );
-    return $string;
-}
-
 onBOOT;
 
 if ($ON_EBCDIC) {
@@ -824,6 +697,12 @@ code to do exactly that:
 This is the same as C<FB_QUIET> above, except that instead of being silent
 on errors, it issues a warning.  This is handy for when you are debugging.
 
+B<CAVEAT>: All warnings from Encode module are reported, independently of
+L<pragma warnings|warnings> settings. If you want to follow settings of
+lexical warnings configured by L<pragma warnings|warnings> then append
+also check value C<ENCODE::ONLY_PRAGMA_WARNINGS>. This value is available
+since Encode version 2.99.
+
 =head3 FB_PERLQQ FB_HTMLCREF FB_XMLCREF
 
 =over 2
diff --git a/cpan/Encode/Encode.xs b/cpan/Encode/Encode.xs
index 774c2b1fec..ddc1b1f366 100644
--- a/cpan/Encode/Encode.xs
+++ b/cpan/Encode/Encode.xs
@@ -1,8 +1,9 @@
 /*
- $Id: Encode.xs,v 2.43 2018/02/21 12:14:33 dankogai Exp dankogai $
+ $Id: Encode.xs,v 2.45 2019/01/21 03:13:35 dankogai Exp $
  */
 
 #define PERL_NO_GET_CONTEXT
+#define IN_ENCODE_XS
 #include "EXTERN.h"
 #include "perl.h"
 #include "XSUB.h"
@@ -20,26 +21,16 @@
    encode_method().  1 is recommended. 2 restores NI-S original */
 #define ENCODE_XS_USEFP   1
 
-#define UNIMPLEMENTED(x,y) static y x (SV *sv, char *encoding) {	\
-			Perl_croak_nocontext("panic_unimplemented");	\
-                        PERL_UNUSED_VAR(sv); \
-                        PERL_UNUSED_VAR(encoding); \
-             return (y)0; /* fool picky compilers */ \
-                         }
-/**/
-
-UNIMPLEMENTED(_encoded_utf8_to_bytes, I32)
-UNIMPLEMENTED(_encoded_bytes_to_utf8, I32)
-
 #ifndef SvIV_nomg
 #define SvIV_nomg SvIV
 #endif
 
-#ifndef UTF8_DISALLOW_ILLEGAL_INTERCHANGE
-#  define UTF8_DISALLOW_ILLEGAL_INTERCHANGE 0
-#  define UTF8_ALLOW_NON_STRICT (UTF8_ALLOW_FE_FF|UTF8_ALLOW_SURROGATE|UTF8_ALLOW_FFFF)
-#else
-#  define UTF8_ALLOW_NON_STRICT 0
+#ifndef SvTRUE_nomg
+#define SvTRUE_nomg SvTRUE
+#endif
+
+#ifndef SVfARG
+#define SVfARG(p) ((void*)(p))
 #endif
 
 static void
@@ -66,16 +57,6 @@ Encode_XSEncoding(pTHX_ encode_t * enc)
     SvREFCNT_dec(sv);
 }
 
-static void
-call_failure(SV * routine, U8 * done, U8 * dest, U8 * orig)
-{
-    /* Exists for breakpointing */
-    PERL_UNUSED_VAR(routine);
-    PERL_UNUSED_VAR(done);
-    PERL_UNUSED_VAR(dest);
-    PERL_UNUSED_VAR(orig);
-}
-
 static void
 utf8_safe_downgrade(pTHX_ SV ** src, U8 ** s, STRLEN * slen, bool modify)
 {
@@ -164,7 +145,7 @@ do_bytes_fallback_cb(pTHX_ U8 *s, STRLEN slen, SV *fallback_cb)
 
 static SV *
 encode_method(pTHX_ const encode_t * enc, const encpage_t * dir, SV * src, U8 * s, STRLEN slen,
-	      int check, STRLEN * offset, SV * term, int * retcode, 
+	      IV check, STRLEN * offset, SV * term, int * retcode, 
 	      SV *fallback_cb)
 {
     STRLEN tlen  = slen;
@@ -258,7 +239,7 @@ encode_method(pTHX_ const encode_t * enc, const encpage_t * dir, SV * src, U8 *
                    (UV)ch, enc->name[0]);
             return &PL_sv_undef; /* never reaches but be safe */
         }
-        if (check & ENCODE_WARN_ON_ERR){
+        if (encode_ckWARN(check, WARN_UTF8)) {
             Perl_warner(aTHX_ packWARN(WARN_UTF8),
                 ERR_ENCODE_NOMAP, (UV)ch, enc->name[0]);
         }
@@ -297,7 +278,7 @@ encode_method(pTHX_ const encode_t * enc, const encpage_t * dir, SV * src, U8 *
                               enc->name[0], (UV)s[slen]);
             return &PL_sv_undef; /* never reaches but be safe */
         }
-        if (check & ENCODE_WARN_ON_ERR){
+        if (encode_ckWARN(check, WARN_UTF8)) {
             Perl_warner(
             aTHX_ packWARN(WARN_UTF8),
             ERR_DECODE_NOMAP,
@@ -386,70 +367,6 @@ strict_utf8(pTHX_ SV* sv)
     return SvTRUE(*svp);
 }
 
-/* Modern perls have the capability to do this more efficiently and portably */
-#ifdef utf8n_to_uvchr_msgs
-# define CAN_USE_BASE_PERL
-#endif
-
-#ifndef CAN_USE_BASE_PERL
-
-/*
- * https://github.com/dankogai/p5-encode/pull/56#issuecomment-231959126
- */
-#ifndef UNICODE_IS_NONCHAR
-#define UNICODE_IS_NONCHAR(c) ((c >= 0xFDD0 && c <= 0xFDEF) || (c & 0xFFFE) == 0xFFFE)
-#endif
-
-#ifndef UNICODE_IS_SUPER
-#define UNICODE_IS_SUPER(c) (c > PERL_UNICODE_MAX)
-#endif
-
-#define UNICODE_IS_STRICT(c) (!UNICODE_IS_SURROGATE(c) && !UNICODE_IS_NONCHAR(c) && !UNICODE_IS_SUPER(c))
-
-#ifndef UTF_ACCUMULATION_OVERFLOW_MASK
-#ifndef CHARBITS
-#define CHARBITS CHAR_BIT
-#endif
-#define UTF_ACCUMULATION_OVERFLOW_MASK (((UV) UTF_CONTINUATION_MASK) << ((sizeof(UV) * CHARBITS) - UTF_ACCUMULATION_SHIFT))
-#endif
-
-/*
- * Convert non strict utf8 sequence of len >= 2 to unicode codepoint
- */
-static UV
-convert_utf8_multi_seq(U8* s, STRLEN len, STRLEN *rlen)
-{
-    UV uv;
-    U8 *ptr = s;
-    bool overflowed = 0;
-
-    uv = NATIVE_TO_UTF(*s) & UTF_START_MASK(UTF8SKIP(s));
-
-    len--;
-    s++;
-
-    while (len--) {
-        if (!UTF8_IS_CONTINUATION(*s)) {
-            *rlen = s-ptr;
-            return 0;
-        }
-        if (uv & UTF_ACCUMULATION_OVERFLOW_MASK)
-            overflowed = 1;
-        uv = UTF8_ACCUMULATE(uv, *s);
-        s++;
-    }
-
-    *rlen = s-ptr;
-
-    if (overflowed || *rlen > (STRLEN)UNISKIP(uv)) {
-        return 0;
-    }
-
-    return uv;
-}
-
-#endif  /* CAN_USE_BASE_PERL */
-
 static U8*
 process_utf8(pTHX_ SV* dst, U8* s, U8* e, SV *check_sv,
              bool encode, bool strict, bool stop_at_partial)
@@ -472,16 +389,20 @@ process_utf8(pTHX_ SV* dst, U8* s, U8* e, SV *check_sv,
     UV uv;
     STRLEN ulen;
     SV *fallback_cb;
-    int check;
+    IV check;
     U8 *d;
     STRLEN dlen;
     char esc[UTF8_MAXLEN * 6 + 1];
     STRLEN i;
     const U32 flags = (strict)
                     ? UTF8_DISALLOW_ILLEGAL_INTERCHANGE
-                    : UTF8_ALLOW_NON_STRICT;
+                    : 0;
 
-    if (SvROK(check_sv)) {
+    if (!SvOK(check_sv)) {
+	fallback_cb = &PL_sv_undef;
+	check = 0;
+    }
+    else if (SvROK(check_sv)) {
 	/* croak("UTF-8 decoder doesn't support callback CHECK"); */
 	fallback_cb = check_sv;
 	check = ENCODE_PERLQQ|ENCODE_LEAVE_SRC; /* same as perlqq */
@@ -501,9 +422,6 @@ process_utf8(pTHX_ SV* dst, U8* s, U8* e, SV *check_sv,
 
     while (s < e) {
 
-#ifdef CAN_USE_BASE_PERL    /* Use the much faster, portable implementation if
-                               available */
-
         /* If there were no errors, this will be 'e'; otherwise it will point
          * to the first byte of the erroneous input */
         const U8* e_or_where_failed;
@@ -531,63 +449,7 @@ process_utf8(pTHX_ SV* dst, U8* s, U8* e, SV *check_sv,
          * point, or the best substitution for it */
         uv = utf8n_to_uvchr(s, e - s, &ulen, UTF8_ALLOW_ANY);
 
-#else   /* Use code for earlier perls */
-
-        ((void)sizeof(flags));  /* Avoid compiler warning */
-
-        if (UTF8_IS_INVARIANT(*s)) {
-            *d++ = *s++;
-            continue;
-        }
-
-        uv = 0;
-        ulen = 1;
-        if (! UTF8_IS_CONTINUATION(*s)) {
-            /* Not an invariant nor a continuation; must be a start byte.  (We
-             * can't test for UTF8_IS_START as that excludes things like \xC0
-             * which are start bytes, but always lead to overlongs */
-
-            U8 skip = UTF8SKIP(s);
-            if ((s + skip) > e) {
-                /* just calculate ulen, in pathological cases can be smaller then e-s */
-                if (e-s >= 2)
-                    convert_utf8_multi_seq(s, e-s, &ulen);
-                else
-                    ulen = 1;
-
-                if (stop_at_partial && ulen == (STRLEN)(e-s))
-                    break;
-
-                goto malformed_byte;
-            }
-
-            uv = convert_utf8_multi_seq(s, skip, &ulen);
-            if (uv == 0)
-                goto malformed_byte;
-            else if (strict && !UNICODE_IS_STRICT(uv))
-                goto malformed;
-
-
-             /* Whole char is good */
-             memcpy(d, s, skip);
-             d += skip;
-             s += skip;
-             continue;
-        }
-
-        /* If we get here there is something wrong with alleged UTF-8 */
-        /* uv is used only when encoding */
-    malformed_byte:
-        if (uv == 0)
-            uv = (UV)*s;
-        if (encode || ulen == 0)
-            ulen = 1;
-
-    malformed:
-
-#endif  /* The two versions for processing come back together here, for the
-         * error handling code.
-         *
+        /*
          * Here, we are looping through the input and found an error.
          * 'uv' is the code point in error if calculable, or the REPLACEMENT
          *      CHARACTER if not.
@@ -602,7 +464,7 @@ process_utf8(pTHX_ SV* dst, U8* s, U8* e, SV *check_sv,
             else
                 Perl_croak(aTHX_ ERR_DECODE_STR_NOMAP, (strict ? "UTF-8" : "utf8"), esc);
         }
-        if (check & ENCODE_WARN_ON_ERR){
+        if (encode_ckWARN(check, WARN_UTF8)) {
             if (encode)
                 Perl_warner(aTHX_ packWARN(WARN_UTF8),
                             ERR_ENCODE_NOMAP, uv, (strict ? "UTF-8" : "utf8"));
@@ -667,6 +529,88 @@ process_utf8(pTHX_ SV* dst, U8* s, U8* e, SV *check_sv,
     return s;
 }
 
+static SV *
+find_encoding(pTHX_ SV *enc)
+{
+    dSP;
+    I32 count;
+    SV *m_enc;
+    SV *obj = &PL_sv_undef;
+#ifndef SV_NOSTEAL
+    U32 tmp;
+#endif
+
+    ENTER;
+    SAVETMPS;
+    PUSHMARK(sp);
+
+    m_enc = sv_newmortal();
+#ifndef SV_NOSTEAL
+    tmp = SvFLAGS(enc) & SVs_TEMP;
+    SvTEMP_off(enc);
+    sv_setsv_flags(m_enc, enc, 0);
+    SvFLAGS(enc) |= tmp;
+#else
+#if SV_NOSTEAL == 0
+    #error You have broken SV_NOSTEAL which cause memory corruption in sv_setsv_flags()
+    #error Most probably broken SV_NOSTEAL was defined by buggy version of ppport.h
+#else
+    sv_setsv_flags(m_enc, enc, SV_NOSTEAL);
+#endif
+#endif
+    XPUSHs(m_enc);
+
+    PUTBACK;
+
+    count = call_pv("Encode::find_encoding", G_SCALAR);
+
+    SPAGAIN;
+
+    if (count > 0) {
+        obj = POPs;
+        SvREFCNT_inc(obj);
+    }
+
+    PUTBACK;
+    FREETMPS;
+    LEAVE;
+    return sv_2mortal(obj);
+}
+
+static SV *
+call_encoding(pTHX_ const char *method, SV *obj, SV *src, SV *check)
+{
+    dSP;
+    I32 count;
+    SV *dst = &PL_sv_undef;
+
+    PUSHMARK(sp);
+
+    if (check)
+        check = sv_2mortal(newSVsv(check));
+
+    if (!check || SvROK(check) || !SvTRUE_nomg(check) || (SvIV_nomg(check) & ENCODE_LEAVE_SRC))
+        src = sv_2mortal(newSVsv(src));
+
+    XPUSHs(obj);
+    XPUSHs(src);
+    XPUSHs(check ? check : &PL_sv_no);
+
+    PUTBACK;
+
+    count = call_method(method, G_SCALAR);
+
+    SPAGAIN;
+
+    if (count > 0) {
+        dst = POPs;
+        SvREFCNT_inc(dst);
+    }
+
+    PUTBACK;
+    return dst;
+}
+
 
 MODULE = Encode		PACKAGE = Encode::utf8	PREFIX = Method_
 
@@ -683,7 +627,7 @@ PREINIT:
     U8 *e;
     SV *dst;
     bool renewed = 0;
-    int check;
+    IV check;
     bool modify;
     dSP;
 INIT:
@@ -744,7 +688,7 @@ PREINIT:
     U8 *s;
     U8 *e;
     SV *dst;
-    int check;
+    IV check;
     bool modify;
 INIT:
     SvGETMAGIC(src);
@@ -848,7 +792,7 @@ SV *	off
 SV *	term
 SV *    check_sv
 PREINIT:
-    int check;
+    IV check;
     SV *fallback_cb;
     bool modify;
     encode_t *enc;
@@ -886,7 +830,7 @@ SV *	obj
 SV *	src
 SV *	check_sv
 PREINIT:
-    int check;
+    IV check;
     SV *fallback_cb;
     bool modify;
     encode_t *enc;
@@ -917,7 +861,7 @@ SV *	obj
 SV *	src
 SV *	check_sv
 PREINIT:
-    int check;
+    IV check;
     SV *fallback_cb;
     bool modify;
     encode_t *enc;
@@ -988,102 +932,6 @@ MODULE = Encode         PACKAGE = Encode
 
 PROTOTYPES: ENABLE
 
-I32
-_bytes_to_utf8(sv, ...)
-SV *    sv
-PREINIT:
-    SV * encoding;
-INIT:
-    encoding = items == 2 ? ST(1) : Nullsv;
-CODE:
-    if (encoding)
-    RETVAL = _encoded_bytes_to_utf8(sv, SvPV_nolen(encoding));
-    else {
-    STRLEN len;
-    U8*    s = (U8*)SvPV(sv, len);
-    U8*    converted;
-
-    converted = bytes_to_utf8(s, &len); /* This allocs */
-    sv_setpvn(sv, (char *)converted, len);
-    SvUTF8_on(sv); /* XXX Should we? */
-    Safefree(converted);                /* ... so free it */
-    RETVAL = len;
-    }
-OUTPUT:
-    RETVAL
-
-I32
-_utf8_to_bytes(sv, ...)
-SV *    sv
-PREINIT:
-    SV * to;
-    SV * check;
-INIT:
-    to    = items > 1 ? ST(1) : Nullsv;
-    check = items > 2 ? ST(2) : Nullsv;
-CODE:
-    if (to) {
-    RETVAL = _encoded_utf8_to_bytes(sv, SvPV_nolen(to));
-    } else {
-    STRLEN len;
-    U8 *s = (U8*)SvPV(sv, len);
-
-    RETVAL = 0;
-    if (SvTRUE(check)) {
-        /* Must do things the slow way */
-        U8 *dest;
-            /* We need a copy to pass to check() */
-        U8 *src  = s;
-        U8 *send = s + len;
-        U8 *d0;
-
-        New(83, dest, len, U8); /* I think */
-        d0 = dest;
-
-        while (s < send) {
-                if (*s < 0x80){
-            *dest++ = *s++;
-                } else {
-            STRLEN ulen;
-            UV uv = *s++;
-
-            /* Have to do it all ourselves because of error routine,
-               aargh. */
-            if (!(uv & 0x40)){ goto failure; }
-            if      (!(uv & 0x20)) { ulen = 2;  uv &= 0x1f; }
-            else if (!(uv & 0x10)) { ulen = 3;  uv &= 0x0f; }
-            else if (!(uv & 0x08)) { ulen = 4;  uv &= 0x07; }
-            else if (!(uv & 0x04)) { ulen = 5;  uv &= 0x03; }
-            else if (!(uv & 0x02)) { ulen = 6;  uv &= 0x01; }
-            else if (!(uv & 0x01)) { ulen = 7;  uv = 0; }
-            else                   { ulen = 13; uv = 0; }
-        
-            /* Note change to utf8.c variable naming, for variety */
-            while (ulen--) {
-            if ((*s & 0xc0) != 0x80){
-                goto failure;
-            } else {
-                uv = (uv << 6) | (*s++ & 0x3f);
-            }
-          }
-          if (uv > 256) {
-          failure:
-              call_failure(check, s, dest, src);
-              /* Now what happens? */
-          }
-          *dest++ = (U8)uv;
-        }
-        }
-        RETVAL = dest - d0;
-        sv_usepvn(sv, (char *)dest, RETVAL);
-        SvUTF8_off(sv);
-    } else {
-        RETVAL = (utf8_to_bytes(s, &len) ? len : 0);
-    }
-    }
-OUTPUT:
-    RETVAL
-
 bool
 is_utf8(sv, check = 0)
 SV *	sv
@@ -1132,6 +980,117 @@ CODE:
 OUTPUT:
     RETVAL
 
+SV *
+decode(encoding, octets, check = NULL)
+SV *	encoding
+SV *	octets
+SV *	check
+ALIAS:
+    bytes2str = 0
+PREINIT:
+    SV *obj;
+INIT:
+    SvGETMAGIC(encoding);
+CODE:
+    if (!SvOK(encoding))
+        croak("Encoding name should not be undef");
+    obj = find_encoding(aTHX_ encoding);
+    if (!SvOK(obj))
+        croak("Unknown encoding '%" SVf "'", SVfARG(encoding));
+    RETVAL = call_encoding(aTHX_ "decode", obj, octets, check);
+OUTPUT:
+    RETVAL
+
+SV *
+encode(encoding, string, check = NULL)
+SV *	encoding
+SV *	string
+SV *	check
+ALIAS:
+    str2bytes = 0
+PREINIT:
+    SV *obj;
+INIT:
+    SvGETMAGIC(encoding);
+CODE:
+    if (!SvOK(encoding))
+        croak("Encoding name should not be undef");
+    obj = find_encoding(aTHX_ encoding);
+    if (!SvOK(obj))
+        croak("Unknown encoding '%" SVf "'", SVfARG(encoding));
+    RETVAL = call_encoding(aTHX_ "encode", obj, string, check);
+OUTPUT:
+    RETVAL
+
+SV *
+decode_utf8(octets, check = NULL)
+SV *	octets
+SV *	check
+PREINIT:
+    HV *hv;
+    SV **sv;
+CODE:
+    hv = get_hv("Encode::Encoding", 0);
+    if (!hv)
+        croak("utf8 encoding was not found");
+    sv = hv_fetch(hv, "utf8", 4, 0);
+    if (!sv || !*sv || !SvOK(*sv))
+        croak("utf8 encoding was not found");
+    RETVAL = call_encoding(aTHX_ "decode", *sv, octets, check);
+OUTPUT:
+    RETVAL
+
+SV *
+encode_utf8(string)
+SV *	string
+CODE:
+    RETVAL = newSVsv(string);
+    if (SvOK(RETVAL))
+        sv_utf8_encode(RETVAL);
+OUTPUT:
+    RETVAL
+
+SV *
+from_to(octets, from, to, check = NULL)
+SV *	octets
+SV *	from
+SV *	to
+SV *	check
+PREINIT:
+    SV *from_obj;
+    SV *to_obj;
+    SV *string;
+    SV *new_octets;
+    U8 *ptr;
+    STRLEN len;
+INIT:
+    SvGETMAGIC(from);
+    SvGETMAGIC(to);
+CODE:
+    if (!SvOK(from) || !SvOK(to))
+        croak("Encoding name should not be undef");
+    from_obj = find_encoding(aTHX_ from);
+    if (!SvOK(from_obj))
+        croak("Unknown encoding '%" SVf "'", SVfARG(from));
+    to_obj = find_encoding(aTHX_ to);
+    if (!SvOK(to_obj))
+        croak("Unknown encoding '%" SVf "'", SVfARG(to));
+    string = sv_2mortal(call_encoding(aTHX_ "decode", from_obj, octets, NULL));
+    new_octets = sv_2mortal(call_encoding(aTHX_ "encode", to_obj, string, check));
+    SvGETMAGIC(new_octets);
+    if (SvOK(new_octets) && (!check || SvROK(check) || !SvTRUE_nomg(check) || sv_len(string) == 0)) {
+        ptr = (U8 *)SvPV_nomg(new_octets, len);
+        if (SvUTF8(new_octets))
+            len = utf8_length(ptr, ptr+len);
+        RETVAL = newSVuv(len);
+    } else {
+        RETVAL = &PL_sv_undef;
+    }
+    sv_setsv_nomg(octets, new_octets);
+    SvSETMAGIC(octets);
+OUTPUT:
+    RETVAL
+
 void
 onBOOT()
 CODE:
@@ -1141,11 +1100,12 @@ CODE:
 
 BOOT:
 {
-    HV *stash = gv_stashpvn("Encode", strlen("Encode"), GV_ADD);
+    HV *stash = gv_stashpvn("Encode", (U32)strlen("Encode"), GV_ADD);
     newCONSTSUB(stash, "DIE_ON_ERR", newSViv(ENCODE_DIE_ON_ERR));
     newCONSTSUB(stash, "WARN_ON_ERR", newSViv(ENCODE_WARN_ON_ERR));
     newCONSTSUB(stash, "RETURN_ON_ERR", newSViv(ENCODE_RETURN_ON_ERR));
     newCONSTSUB(stash, "LEAVE_SRC", newSViv(ENCODE_LEAVE_SRC));
+    newCONSTSUB(stash, "ONLY_PRAGMA_WARNINGS", newSViv(ENCODE_ONLY_PRAGMA_WARNINGS));
     newCONSTSUB(stash, "PERLQQ", newSViv(ENCODE_PERLQQ));
     newCONSTSUB(stash, "HTMLCREF", newSViv(ENCODE_HTMLCREF));
     newCONSTSUB(stash, "XMLCREF", newSViv(ENCODE_XMLCREF));
diff --git a/cpan/Encode/Encode/encode.h b/cpan/Encode/Encode/encode.h
index df5554f1cb..8de56ebe21 100644
--- a/cpan/Encode/Encode/encode.h
+++ b/cpan/Encode/Encode/encode.h
@@ -99,6 +99,7 @@ extern void Encode_DefineEncoding(encode_t *enc);
 #define  ENCODE_WARN_ON_ERR    0x0002 /* warn on error; may proceed */
 #define  ENCODE_RETURN_ON_ERR  0x0004 /* immediately returns on NOREP */
 #define  ENCODE_LEAVE_SRC      0x0008 /* $src updated unless set */
+#define  ENCODE_ONLY_PRAGMA_WARNINGS 0x0010 /* when enabled report only warnings configured by pragma warnings, otherwise report all warnings; no effect without ENCODE_WARN_ON_ERR */
 #define  ENCODE_PERLQQ         0x0100 /* perlqq fallback string */
 #define  ENCODE_HTMLCREF       0x0200 /* HTML character ref. fb mode */
 #define  ENCODE_XMLCREF        0x0400 /* XML  character ref. fb mode */
@@ -112,4 +113,1233 @@ extern void Encode_DefineEncoding(encode_t *enc);
 #define  ENCODE_FB_HTMLCREF    (ENCODE_HTMLCREF|ENCODE_LEAVE_SRC)
 #define  ENCODE_FB_XMLCREF     (ENCODE_XMLCREF|ENCODE_LEAVE_SRC)
 
+#define encode_ckWARN(c, w) ((c & ENCODE_WARN_ON_ERR)                         \
+                        && (!(c & ENCODE_ONLY_PRAGMA_WARNINGS) || ckWARN(w)))
+
+#ifdef UTF8SKIP
+#  ifdef EBCDIC   /* The value on early perls is wrong */
+#    undef UTF8_MAXBYTES 
+#    define UTF8_MAXBYTES 14
+#  endif
+#  ifndef UNLIKELY
+#    define UNLIKELY(x) (x)
+#  endif
+#  ifndef LIKELY
+#    define LIKELY(x) (x)
+#  endif
+
+/* EBCDIC requires a later perl to work, so the next two definitions are for
+ * ASCII machines only */
+#  ifndef NATIVE_UTF8_TO_I8
+#    define NATIVE_UTF8_TO_I8(x) (x)
+#  endif
+#  ifndef I8_TO_NATIVE_UTF8
+#    define I8_TO_NATIVE_UTF8(x)  (x)
+#  endif
+#  ifndef OFFUNISKIP
+#    define OFFUNISKIP(x)  UNISKIP(x)
+#  endif
+#  ifndef uvoffuni_to_utf8_flags
+#    define uvoffuni_to_utf8_flags(a,b,c) uvuni_to_utf8_flags(a,b,c)
+#  endif
+#  ifndef WARN_SURROGATE    /* Use the overarching category if these
+                               subcategories are missing */
+#    define WARN_SURROGATE WARN_UTF8
+#    define WARN_NONCHAR WARN_UTF8
+#    define WARN_NON_UNICODE WARN_UTF8
+     /* If there's only one possible category, then packing is a no-op */
+#    define encode_ckWARN_packed(c, w) encode_ckWARN(c, w)
+#  else
+#    define encode_ckWARN_packed(c, w)                                      \
+            ((c & ENCODE_WARN_ON_ERR)                                       \
+        && (!(c & ENCODE_ONLY_PRAGMA_WARNINGS) || Perl_ckwarn(aTHX_ w)))
+#  endif
+
+/* All these formats take a single UV code point argument */
+static const char surrogate_cp_format[] = "UTF-16 surrogate U+%04" UVXf;
+static const char nonchar_cp_format[]   = "Unicode non-character U+%04" UVXf
+                                   " is not recommended for open interchange";
+static const char super_cp_format[]     = "Code point 0x%" UVXf " is not Unicode,"
+                                   " may not be portable";
+
+/* If the perl doesn't have the 5.28 functions, this file includes
+ * stripped-down versions of them but containing enough functionality to be
+ * suitable for Encode's needs.  Many of the comments have been removed.  But
+ * you can inspect the 5.28 source if you get stuck.
+ *
+ * These could be put in Devel::PPPort, but Encode is likely the only user */
+
+#if    (defined(IN_ENCODE_XS) || defined(IN_UNICODE_XS))                     \
+  && (! defined(utf8n_to_uvchr_msgs) && ! defined(uvchr_to_utf8_flags_msgs))
+
+#  ifndef hv_stores
+#    define hv_stores(hv, key, val) hv_store((hv), ("" key ""), (sizeof(key)-1), (val), 0)
+#  endif
+
+static HV *
+S_new_msg_hv(const char * const message, /* The message text */
+                   U32 categories)  /* Packed warning categories */
+{
+    /* Creates, populates, and returns an HV* that describes an error message
+     * for the translators between UTF8 and code point */
+
+    dTHX;
+    SV* msg_sv = newSVpv(message, 0);
+    SV* category_sv = newSVuv(categories);
+
+    HV* msg_hv = newHV();
+
+    (void) hv_stores(msg_hv, "text", msg_sv);
+    (void) hv_stores(msg_hv, "warn_categories",  category_sv);
+
+    return msg_hv;
+}
+
+#endif
+
+#if ! defined(utf8n_to_uvchr_msgs)                      \
+  && (defined(IN_ENCODE_XS) || defined(IN_UNICODE_XS))
+
+#  undef utf8n_to_uvchr     /* Don't use an earlier version: use the version
+                               defined in this file */
+#  define utf8n_to_uvchr(a,b,c,d) utf8n_to_uvchr_msgs(a, b, c, d, 0, NULL)
+
+#  undef UTF8_IS_START      /* Early perls wrongly accepted C0 and C1 */
+#  define UTF8_IS_START(c)  (((U8)(c)) >= 0xc2)
+#  ifndef isUTF8_POSSIBLY_PROBLEMATIC
+#    ifdef EBCDIC
+#      define isUTF8_POSSIBLY_PROBLEMATIC(c) ((U8) c > ' ')
+#    else
+#      define isUTF8_POSSIBLY_PROBLEMATIC(c) ((U8) c >= 0xED)
+#    endif
+#  endif
+#  ifndef UTF8_ALLOW_OVERFLOW
+#    define UTF8_ALLOW_OVERFLOW (1U<<31)    /* Choose highest bit to avoid
+                                               potential conflicts */
+#    define UTF8_GOT_OVERFLOW           UTF8_ALLOW_OVERFLOW
+#  endif
+#  undef UTF8_ALLOW_ANY     /* Early perl definitions don't work properly with
+                               the code in this file */
+#  define UTF8_ALLOW_ANY ( UTF8_ALLOW_CONTINUATION                              \
+                          |UTF8_ALLOW_NON_CONTINUATION                          \
+                          |UTF8_ALLOW_SHORT                                     \
+                          |UTF8_ALLOW_LONG                                      \
+                          |UTF8_ALLOW_OVERFLOW)
+
+/* The meanings of these were complemented at some point, but the functions
+ * bundled in this file use the complemented meanings */
+#  ifndef UTF8_DISALLOW_SURROGATE
+#    define UTF8_DISALLOW_SURROGATE     UTF8_ALLOW_SURROGATE
+#    define UTF8_DISALLOW_NONCHAR       UTF8_ALLOW_FFFF
+#    define UTF8_DISALLOW_SUPER         UTF8_ALLOW_FE_FF
+
+     /* In the stripped-down implementation in this file, disallowing is not
+      * independent of warning */
+#    define UTF8_WARN_SURROGATE     UTF8_DISALLOW_SURROGATE
+#    define UTF8_WARN_NONCHAR       UTF8_DISALLOW_NONCHAR
+#    define UTF8_WARN_SUPER         UTF8_DISALLOW_SUPER
+#  endif
+#  ifndef UTF8_DISALLOW_ILLEGAL_INTERCHANGE
+#    define UTF8_DISALLOW_ILLEGAL_INTERCHANGE                                   \
+     (UTF8_DISALLOW_SUPER|UTF8_DISALLOW_SURROGATE|UTF8_DISALLOW_NONCHAR)
+#  endif
+#  ifndef UTF8_WARN_ILLEGAL_INTERCHANGE
+#    define UTF8_WARN_ILLEGAL_INTERCHANGE                                       \
+         (UTF8_WARN_SUPER|UTF8_WARN_SURROGATE|UTF8_WARN_NONCHAR)
+#  endif
+#  ifndef FIRST_START_BYTE_THAT_IS_DEFINITELY_SUPER
+#    ifdef EBCDIC   /* On EBCDIC, these are actually I8 bytes */
+#      define FIRST_START_BYTE_THAT_IS_DEFINITELY_SUPER  0xFA
+#      define IS_UTF8_2_BYTE_SUPER(s0, s1) ((s0) == 0xF9 && (s1) >= 0xA2)
+
+#      define IS_UTF8_2_BYTE_SURROGATE(s0, s1)     ((s0) == 0xF1            \
+                                              && ((s1) & 0xFE ) == 0xB6)
+#    else
+#      define FIRST_START_BYTE_THAT_IS_DEFINITELY_SUPER  0xF5
+#      define IS_UTF8_2_BYTE_SUPER(s0, s1)       ((s0) == 0xF4 && (s1) >= 0x90)
+#      define IS_UTF8_2_BYTE_SURROGATE(s0, s1)   ((s0) == 0xED && (s1) >= 0xA0)
+#    endif
+#    if defined(UV_IS_QUAD) /* These assume IV_MAX is 2**63-1 */
+#      ifdef EBCDIC     /* Actually is I8 */
+#       define HIGHEST_REPRESENTABLE_UTF8                                       \
+                "\xFF\xA7\xBF\xBF\xBF\xBF\xBF\xBF\xBF\xBF\xBF\xBF\xBF\xBF"
+#      else
+#       define HIGHEST_REPRESENTABLE_UTF8                                       \
+                "\xFF\x80\x87\xBF\xBF\xBF\xBF\xBF\xBF\xBF\xBF\xBF\xBF"
+#      endif
+#    endif
+#  endif
+
+#  ifndef Newx
+#    define Newx(v,n,t) New(0,v,n,t)
+#  endif
+
+#  ifndef PERL_UNUSED_ARG
+#    define PERL_UNUSED_ARG(x) ((void)x)
+#  endif
+
+static const char malformed_text[] = "Malformed UTF-8 character";
+
+static char *
+_byte_dump_string(const U8 * const start, const STRLEN len)
+{
+    /* Returns a mortalized C string that is a displayable copy of the 'len' */
+
+    const STRLEN output_len = 4 * len + 1;  /* 4 bytes per each input, plus a
+                                               trailing NUL */
+    const U8 * s = start;
+    const U8 * const e = start + len;
+    char * output;
+    char * d;
+    dTHX;
+
+    Newx(output, output_len, char);
+    SAVEFREEPV(output);
+
+    d = output;
+    for (s = start; s < e; s++) {
+        const unsigned high_nibble = (*s & 0xF0) >> 4;
+        const unsigned low_nibble =  (*s & 0x0F);
+
+        *d++ = '\\';
+        *d++ = 'x';
+
+        if (high_nibble < 10) {
+            *d++ = high_nibble + '0';
+        }
+        else {
+            *d++ = high_nibble - 10 + 'a';
+        }
+
+        if (low_nibble < 10) {
+            *d++ = low_nibble + '0';
+        }
+        else {
+            *d++ = low_nibble - 10 + 'a';
+        }
+    }
+
+    *d = '\0';
+    return output;
+}
+
+static char *
+S_unexpected_non_continuation_text(const U8 * const s,
+
+                                         /* Max number of bytes to print */
+                                         STRLEN print_len,
+
+                                         /* Which one is the non-continuation */
+                                         const STRLEN non_cont_byte_pos,
+
+                                         /* How many bytes should there be? */
+                                         const STRLEN expect_len)
+{
+    /* Return the malformation warning text for an unexpected continuation
+     * byte. */
+
+    dTHX;
+    const char * const where = (non_cont_byte_pos == 1)
+                               ? "immediately"
+                               : Perl_form(aTHX_ "%d bytes",
+                                                 (int) non_cont_byte_pos);
+    const U8 * x = s + non_cont_byte_pos;
+    const U8 * e = s + print_len;
+
+    /* We don't need to pass this parameter, but since it has already been
+     * calculated, it's likely faster to pass it; verify under DEBUGGING */
+    assert(expect_len == UTF8SKIP(s));
+
+    /* As a defensive coding measure, don't output anything past a NUL.  Such
+     * bytes shouldn't be in the middle of a malformation, and could mark the
+     * end of the allocated string, and what comes after is undefined */
+    for (; x < e; x++) {
+        if (*x == '\0') {
+            x++;            /* Output this particular NUL */
+            break;
+        }
+    }
+
+    return Perl_form(aTHX_ "%s: %s (unexpected non-continuation byte 0x%02x,"
+                           " %s after start byte 0x%02x; need %d bytes, got %d)",
+                           malformed_text,
+                           _byte_dump_string(s, x - s),
+                           *(s + non_cont_byte_pos),
+                           where,
+                           *s,
+                           (int) expect_len,
+                           (int) non_cont_byte_pos);
+}
+
+static int
+S_does_utf8_overflow(const U8 * const s,
+                       const U8 * e,
+                       const bool consider_overlongs)
+{
+    /* Returns an int indicating whether or not the UTF-8 sequence from 's' to
+     * 'e' - 1 would overflow an IV on this platform. */
+
+#  if ! defined(UV_IS_QUAD)
+
+    const STRLEN len = e - s;
+    int is_overlong;
+
+    assert(s <= e && s + UTF8SKIP(s) >= e);
+    assert(! UTF8_IS_INVARIANT(*s) && e > s);
+
+#    ifdef EBCDIC
+
+    PERL_UNUSED_ARG(consider_overlongs);
+
+    if (*s != 0xFE) {
+        return 0;
+    }
+
+    if (len == 1) {
+        return -1;
+    }
+
+#    else
+
+    if (LIKELY(*s < 0xFE)) {
+        return 0;
+    }
+
+    if (! consider_overlongs) {
+        return 1;
+    }
+
+    if (len == 1) {
+        return -1;
+    }
+
+    is_overlong = S_is_utf8_overlong_given_start_byte_ok(s, len);
+
+    if (is_overlong == 0) {
+        return 1;
+    }
+
+    if (is_overlong < 0) {
+        return -1;
+    }
+
+    if (*s == 0xFE) {
+        return 0;
+    }
+
+#    endif
+
+    /* Here, ASCII and EBCDIC rejoin:
+    *  On ASCII:   We have an overlong sequence starting with FF
+    *  On EBCDIC:  We have a sequence starting with FE. */
+
+    {   /* For C89, use a block so the declaration can be close to its use */
+
+#    ifdef EBCDIC
+        const U8 conts_for_highest_30_bit[] = "\x41\x41\x41\x41\x41\x41\x42";
+#    else
+        const U8 conts_for_highest_30_bit[] = "\x80\x80\x80\x80\x80\x80\x81";
+#    endif
+        const STRLEN conts_len = sizeof(conts_for_highest_30_bit) - 1;
+        const STRLEN cmp_len = MIN(conts_len, len - 1);
+
+        if (cmp_len >= conts_len || memNE(s + 1,
+                                          conts_for_highest_30_bit,
+                                          cmp_len))
+        {
+            return memGT(s + 1, conts_for_highest_30_bit, cmp_len);
+        }
+
+        return -1;
+    }
+
+#  else /* Below is 64-bit word */
+
+    PERL_UNUSED_ARG(consider_overlongs);
+
+    {
+        const STRLEN len = e - s;
+        const U8 *x;
+        const U8 * y = (const U8 *) HIGHEST_REPRESENTABLE_UTF8;
+
+        for (x = s; x < e; x++, y++) {
+
+            if (UNLIKELY(NATIVE_UTF8_TO_I8(*x) == *y)) {
+                continue;
+            }
+            return NATIVE_UTF8_TO_I8(*x) > *y;
+        }
+
+        if (len < sizeof(HIGHEST_REPRESENTABLE_UTF8) - 1) {
+            return -1;
+        }
+
+        return 0;
+    }
+
+#  endif
+
+}
+
+static int
+S_isFF_OVERLONG(const U8 * const s, const STRLEN len);
+
+static int
+S_is_utf8_overlong_given_start_byte_ok(const U8 * const s, const STRLEN len)
+{
+    const U8 s0 = NATIVE_UTF8_TO_I8(s[0]);
+    const U8 s1 = NATIVE_UTF8_TO_I8(s[1]);
+
+    assert(len > 1 && UTF8_IS_START(*s));
+
+#         ifdef EBCDIC
+#             define F0_ABOVE_OVERLONG 0xB0
+#             define F8_ABOVE_OVERLONG 0xA8
+#             define FC_ABOVE_OVERLONG 0xA4
+#             define FE_ABOVE_OVERLONG 0xA2
+#             define FF_OVERLONG_PREFIX "\xfe\x41\x41\x41\x41\x41\x41\x41"
+#         else
+
+    if (s0 == 0xE0 && UNLIKELY(s1 < 0xA0)) {
+        return 1;
+    }
+
+#             define F0_ABOVE_OVERLONG 0x90
+#             define F8_ABOVE_OVERLONG 0x88
+#             define FC_ABOVE_OVERLONG 0x84
+#             define FE_ABOVE_OVERLONG 0x82
+#             define FF_OVERLONG_PREFIX "\xff\x80\x80\x80\x80\x80\x80"
+#         endif
+
+    if (   (s0 == 0xF0 && UNLIKELY(s1 < F0_ABOVE_OVERLONG))
+        || (s0 == 0xF8 && UNLIKELY(s1 < F8_ABOVE_OVERLONG))
+        || (s0 == 0xFC && UNLIKELY(s1 < FC_ABOVE_OVERLONG))
+        || (s0 == 0xFE && UNLIKELY(s1 < FE_ABOVE_OVERLONG)))
+    {
+        return 1;
+    }
+
+    /* Check for the FF overlong */
+    return S_isFF_OVERLONG(s, len);
+}
+
+int
+S_isFF_OVERLONG(const U8 * const s, const STRLEN len)
+{
+    if (LIKELY(memNE(s, FF_OVERLONG_PREFIX,
+                     MIN(len, sizeof(FF_OVERLONG_PREFIX) - 1))))
+    {
+        return 0;
+    }
+
+    if (len >= sizeof(FF_OVERLONG_PREFIX) - 1) {
+        return 1;
+    }
+
+    return -1;
+}
+
+#  ifndef UTF8_GOT_CONTINUATION
+#    define UTF8_GOT_CONTINUATION       UTF8_ALLOW_CONTINUATION
+#    define UTF8_GOT_EMPTY              UTF8_ALLOW_EMPTY
+#    define UTF8_GOT_LONG               UTF8_ALLOW_LONG
+#    define UTF8_GOT_NON_CONTINUATION   UTF8_ALLOW_NON_CONTINUATION
+#    define UTF8_GOT_SHORT              UTF8_ALLOW_SHORT
+#    define UTF8_GOT_SURROGATE          UTF8_DISALLOW_SURROGATE
+#    define UTF8_GOT_NONCHAR            UTF8_DISALLOW_NONCHAR
+#    define UTF8_GOT_SUPER              UTF8_DISALLOW_SUPER
+#  endif
+
+#  ifndef UNICODE_IS_SUPER
+#    define UNICODE_IS_SUPER(uv)    ((UV) (uv) > PERL_UNICODE_MAX)
+#  endif
+#  ifndef UNICODE_IS_32_CONTIGUOUS_NONCHARS
+#    define UNICODE_IS_32_CONTIGUOUS_NONCHARS(uv)      ((UV) (uv) >= 0xFDD0   \
+                                                   && (UV) (uv) <= 0xFDEF)
+#  endif
+#  ifndef UNICODE_IS_END_PLANE_NONCHAR_GIVEN_NOT_SUPER
+#    define UNICODE_IS_END_PLANE_NONCHAR_GIVEN_NOT_SUPER(uv)                  \
+                                          (((UV) (uv) & 0xFFFE) == 0xFFFE)
+#  endif
+#  ifndef is_NONCHAR_utf8_safe
+#    define is_NONCHAR_utf8_safe(s,e)     /*** GENERATED CODE ***/            \
+( ( ( LIKELY((e) > (s)) ) && ( LIKELY(((e) - (s)) >= UTF8SKIP(s)) ) ) ? ( ( 0xEF == ((const U8*)s)[0] ) ?\
+	    ( ( 0xB7 == ((const U8*)s)[1] ) ?                               \
+		( ( 0x90 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0xAF ) ? 3 : 0 )\
+	    : ( ( 0xBF == ((const U8*)s)[1] ) && ( ( ((const U8*)s)[2] & 0xFE ) == 0xBE ) ) ? 3 : 0 )\
+	: ( 0xF0 == ((const U8*)s)[0] ) ?                                   \
+	    ( ( ( ( ((const U8*)s)[1] == 0x9F || ( ( ((const U8*)s)[1] & 0xEF ) == 0xAF ) ) && ( 0xBF == ((const U8*)s)[2] ) ) && ( ( ((const U8*)s)[3] & 0xFE ) == 0xBE ) ) ? 4 : 0 )\
+	: ( 0xF1 <= ((const U8*)s)[0] && ((const U8*)s)[0] <= 0xF3 ) ?      \
+	    ( ( ( ( ( ((const U8*)s)[1] & 0xCF ) == 0x8F ) && ( 0xBF == ((const U8*)s)[2] ) ) && ( ( ((const U8*)s)[3] & 0xFE ) == 0xBE ) ) ? 4 : 0 )\
+	: ( ( ( ( 0xF4 == ((const U8*)s)[0] ) && ( 0x8F == ((const U8*)s)[1] ) ) && ( 0xBF == ((const U8*)s)[2] ) ) && ( ( ((const U8*)s)[3] & 0xFE ) == 0xBE ) ) ? 4 : 0 ) : 0 )
+#  endif
+
+#  ifndef UTF8_IS_NONCHAR
+#    define UTF8_IS_NONCHAR(s, e) (is_NONCHAR_utf8_safe(s,e) > 0)
+#  endif
+#  ifndef UNICODE_IS_NONCHAR
+#    define UNICODE_IS_NONCHAR(uv)                                    \
+    (   UNICODE_IS_32_CONTIGUOUS_NONCHARS(uv)                       \
+     || (   LIKELY( ! UNICODE_IS_SUPER(uv))                         \
+         && UNICODE_IS_END_PLANE_NONCHAR_GIVEN_NOT_SUPER(uv)))
+#  endif
+
+#  ifndef UTF8_MAXBYTES
+#    define UTF8_MAXBYTES UTF8_MAXLEN
+#  endif
+
+static UV
+utf8n_to_uvchr_msgs(const U8 *s,
+                    STRLEN curlen,
+                    STRLEN *retlen,
+                    const U32 flags,
+                    U32 * errors,
+                    AV ** msgs)
+{
+    const U8 * const s0 = s;
+    const U8 * send = NULL;
+    U32 possible_problems = 0;
+    UV uv = *s;
+    STRLEN expectlen   = 0;
+    U8 * adjusted_s0 = (U8 *) s0;
+    U8 temp_char_buf[UTF8_MAXBYTES + 1];
+    UV uv_so_far = 0;
+    dTHX;
+
+    assert(errors == NULL); /* This functionality has been stripped */
+
+    if (UNLIKELY(curlen == 0)) {
+        possible_problems |= UTF8_GOT_EMPTY;
+        curlen = 0;
+        uv = UNICODE_REPLACEMENT;
+	goto ready_to_handle_errors;
+    }
+
+    expectlen = UTF8SKIP(s);
+
+    if (retlen) {
+	*retlen = expectlen;
+    }
+
+    if (UTF8_IS_INVARIANT(uv)) {
+	return uv;
+    }
+
+    if (UNLIKELY(UTF8_IS_CONTINUATION(uv))) {
+	possible_problems |= UTF8_GOT_CONTINUATION;
+        curlen = 1;
+        uv = UNICODE_REPLACEMENT;
+	goto ready_to_handle_errors;
+    }
+
+    uv = NATIVE_UTF8_TO_I8(uv) & UTF_START_MASK(expectlen);
+
+    send = (U8*) s0;
+    if (UNLIKELY(curlen < expectlen)) {
+        possible_problems |= UTF8_GOT_SHORT;
+        send += curlen;
+    }
+    else {
+        send += expectlen;
+    }
+
+    for (s = s0 + 1; s < send; s++) {
+	if (LIKELY(UTF8_IS_CONTINUATION(*s))) {
+	    uv = UTF8_ACCUMULATE(uv, *s);
+            continue;
+        }
+
+        possible_problems |= UTF8_GOT_NON_CONTINUATION;
+        break;
+    } /* End of loop through the character's bytes */
+
+    curlen = s - s0;
+
+#     define UTF8_GOT_TOO_SHORT (UTF8_GOT_SHORT|UTF8_GOT_NON_CONTINUATION)
+
+    if (UNLIKELY(possible_problems & UTF8_GOT_TOO_SHORT)) {
+        uv_so_far = uv;
+        uv = UNICODE_REPLACEMENT;
+    }
+
+    if (UNLIKELY(0 < S_does_utf8_overflow(s0, s, 1))) {
+        possible_problems |= UTF8_GOT_OVERFLOW;
+        uv = UNICODE_REPLACEMENT;
+    }
+
+    if (     (   LIKELY(! possible_problems)
+              && UNLIKELY(expectlen > (STRLEN) OFFUNISKIP(uv)))
+        || (       UNLIKELY(possible_problems)
+            && (   UNLIKELY(! UTF8_IS_START(*s0))
+                || (   curlen > 1
+                    && UNLIKELY(0 < S_is_utf8_overlong_given_start_byte_ok(s0,
+                                                                s - s0))))))
+    {
+        possible_problems |= UTF8_GOT_LONG;
+
+        if (   UNLIKELY(   possible_problems & UTF8_GOT_TOO_SHORT)
+            &&   LIKELY(! (possible_problems & UTF8_GOT_OVERFLOW)))
+        {
+            UV min_uv = uv_so_far;
+            STRLEN i;
+
+            for (i = curlen; i < expectlen; i++) {
+                min_uv = UTF8_ACCUMULATE(min_uv,
+                                     I8_TO_NATIVE_UTF8(UTF_CONTINUATION_MARK));
+            }
+
+            adjusted_s0 = temp_char_buf;
+            (void) uvoffuni_to_utf8_flags(adjusted_s0, min_uv, 0);
+        }
+    }
+
+    /* Here, we have found all the possible problems, except for when the input
+     * is for a problematic code point not allowed by the input parameters. */
+
+                                /* uv is valid for overlongs */
+    if (   (   (      LIKELY(! (possible_problems & ~UTF8_GOT_LONG))
+                   && uv >= UNICODE_SURROGATE_FIRST)
+            || (   UNLIKELY(possible_problems)
+                && isUTF8_POSSIBLY_PROBLEMATIC(*adjusted_s0)))
+	&& ((flags & ( UTF8_DISALLOW_NONCHAR
+                      |UTF8_DISALLOW_SURROGATE
+                      |UTF8_DISALLOW_SUPER))))
+    {
+        if (LIKELY(! (possible_problems & ~UTF8_GOT_LONG))) {
+            if (UNLIKELY(UNICODE_IS_SURROGATE(uv))) {
+                possible_problems |= UTF8_GOT_SURROGATE;
+            }
+            else if (UNLIKELY(uv > PERL_UNICODE_MAX)) {
+                possible_problems |= UTF8_GOT_SUPER;
+            }
+            else if (UNLIKELY(UNICODE_IS_NONCHAR(uv))) {
+                possible_problems |= UTF8_GOT_NONCHAR;
+            }
+        }
+        else {
+            if (UNLIKELY(NATIVE_UTF8_TO_I8(*adjusted_s0)
+                                >= FIRST_START_BYTE_THAT_IS_DEFINITELY_SUPER))
+            {
+                possible_problems |= UTF8_GOT_SUPER;
+            }
+            else if (curlen > 1) {
+                if (UNLIKELY(IS_UTF8_2_BYTE_SUPER(
+                                      NATIVE_UTF8_TO_I8(*adjusted_s0),
+                                      NATIVE_UTF8_TO_I8(*(adjusted_s0 + 1)))))
+                {
+                    possible_problems |= UTF8_GOT_SUPER;
+                }
+                else if (UNLIKELY(IS_UTF8_2_BYTE_SURROGATE(
+                                      NATIVE_UTF8_TO_I8(*adjusted_s0),
+                                      NATIVE_UTF8_TO_I8(*(adjusted_s0 + 1)))))
+                {
+                    possible_problems |= UTF8_GOT_SURROGATE;
+                }
+            }
+        }
+    }
+
+  ready_to_handle_errors:
+
+    if (UNLIKELY(possible_problems)) {
+        bool disallowed = FALSE;
+        const U32 orig_problems = possible_problems;
+
+        if (msgs) {
+            *msgs = NULL;
+        }
+
+        while (possible_problems) { /* Handle each possible problem */
+            UV pack_warn = 0;
+            char * message = NULL;
+            U32 this_flag_bit = 0;
+
+            /* Each 'if' clause handles one problem.  They are ordered so that
+             * the first ones' messages will be displayed before the later
+             * ones; this is kinda in decreasing severity order.  But the
+             * overlong must come last, as it changes 'uv' looked at by the
+             * others */
+            if (possible_problems & UTF8_GOT_OVERFLOW) {
+
+                /* Overflow means also got a super; we handle both here */
+                possible_problems
+                  &= ~(UTF8_GOT_OVERFLOW|UTF8_GOT_SUPER);
+
+                /* Disallow if any of the categories say to */
+                if ( ! (flags &  UTF8_ALLOW_OVERFLOW)
+                    || (flags &  UTF8_DISALLOW_SUPER))
+                {
+                    disallowed = TRUE;
+                }
+
+                /* Likewise, warn if any say to */
+                if (  ! (flags & UTF8_ALLOW_OVERFLOW)) {
+
+                    /* The warnings code explicitly says it doesn't handle the
+                     * case of packWARN2 and two categories which have
+                     * parent-child relationship.  Even if it works now to
+                     * raise the warning if either is enabled, it wouldn't
+                     * necessarily do so in the future.  We output (only) the
+                     * most dire warning */
+                    if (! (flags & UTF8_CHECK_ONLY)) {
+                        if (msgs || ckWARN_d(WARN_UTF8)) {
+                            pack_warn = packWARN(WARN_UTF8);
+                        }
+                        else if (msgs || ckWARN_d(WARN_NON_UNICODE)) {
+                            pack_warn = packWARN(WARN_NON_UNICODE);
+                        }
+                        if (pack_warn) {
+                            message = Perl_form(aTHX_ "%s: %s (overflows)",
+                                            malformed_text,
+                                            _byte_dump_string(s0, curlen));
+                            this_flag_bit = UTF8_GOT_OVERFLOW;
+                        }
+                    }
+                }
+            }
+            else if (possible_problems & UTF8_GOT_EMPTY) {
+                possible_problems &= ~UTF8_GOT_EMPTY;
+
+                if (! (flags & UTF8_ALLOW_EMPTY)) {
+                    disallowed = TRUE;
+                    if (  (msgs
+                        || ckWARN_d(WARN_UTF8)) && ! (flags & UTF8_CHECK_ONLY))
+                    {
+                        pack_warn = packWARN(WARN_UTF8);
+                        message = Perl_form(aTHX_ "%s (empty string)",
+                                                   malformed_text);
+                        this_flag_bit = UTF8_GOT_EMPTY;
+                    }
+                }
+            }
+            else if (possible_problems & UTF8_GOT_CONTINUATION) {
+                possible_problems &= ~UTF8_GOT_CONTINUATION;
+
+                if (! (flags & UTF8_ALLOW_CONTINUATION)) {
+                    disallowed = TRUE;
+                    if ((   msgs
+                         || ckWARN_d(WARN_UTF8)) && ! (flags & UTF8_CHECK_ONLY))
+                    {
+                        pack_warn = packWARN(WARN_UTF8);
+                        message = Perl_form(aTHX_
+                                "%s: %s (unexpected continuation byte 0x%02x,"
+                                " with no preceding start byte)",
+                                malformed_text,
+                                _byte_dump_string(s0, 1), *s0);
+                        this_flag_bit = UTF8_GOT_CONTINUATION;
+                    }
+                }
+            }
+            else if (possible_problems & UTF8_GOT_SHORT) {
+                possible_problems &= ~UTF8_GOT_SHORT;
+
+                if (! (flags & UTF8_ALLOW_SHORT)) {
+                    disallowed = TRUE;
+                    if ((   msgs
+                         || ckWARN_d(WARN_UTF8)) && ! (flags & UTF8_CHECK_ONLY))
+                    {
+                        pack_warn = packWARN(WARN_UTF8);
+                        message = Perl_form(aTHX_
+                             "%s: %s (too short; %d byte%s available, need %d)",
+                             malformed_text,
+                             _byte_dump_string(s0, send - s0),
+                             (int)curlen,
+                             curlen == 1 ? "" : "s",
+                             (int)expectlen);
+                        this_flag_bit = UTF8_GOT_SHORT;
+                    }
+                }
+
+            }
+            else if (possible_problems & UTF8_GOT_NON_CONTINUATION) {
+                possible_problems &= ~UTF8_GOT_NON_CONTINUATION;
+
+                if (! (flags & UTF8_ALLOW_NON_CONTINUATION)) {
+                    disallowed = TRUE;
+                    if ((   msgs
+                         || ckWARN_d(WARN_UTF8)) && ! (flags & UTF8_CHECK_ONLY))
+                    {
+                        int printlen = s - s0;
+                        pack_warn = packWARN(WARN_UTF8);
+                        message = Perl_form(aTHX_ "%s",
+                            S_unexpected_non_continuation_text(s0,
+                                                            printlen,
+                                                            s - s0,
+                                                            (int) expectlen));
+                        this_flag_bit = UTF8_GOT_NON_CONTINUATION;
+                    }
+                }
+            }
+            else if (possible_problems & UTF8_GOT_SURROGATE) {
+                possible_problems &= ~UTF8_GOT_SURROGATE;
+
+                if (flags & UTF8_WARN_SURROGATE) {
+
+                    if (   ! (flags & UTF8_CHECK_ONLY)
+                        && (msgs || ckWARN_d(WARN_SURROGATE)))
+                    {
+                        pack_warn = packWARN(WARN_SURROGATE);
+
+                        /* These are the only errors that can occur with a
+                        * surrogate when the 'uv' isn't valid */
+                        if (orig_problems & UTF8_GOT_TOO_SHORT) {
+                            message = Perl_form(aTHX_
+                                    "UTF-16 surrogate (any UTF-8 sequence that"
+                                    " starts with \"%s\" is for a surrogate)",
+                                    _byte_dump_string(s0, curlen));
+                        }
+                        else {
+                            message = Perl_form(aTHX_ surrogate_cp_format, uv);
+                        }
+                        this_flag_bit = UTF8_GOT_SURROGATE;
+                    }
+                }
+
+                if (flags & UTF8_DISALLOW_SURROGATE) {
+                    disallowed = TRUE;
+                }
+            }
+            else if (possible_problems & UTF8_GOT_SUPER) {
+                possible_problems &= ~UTF8_GOT_SUPER;
+
+                if (flags & UTF8_WARN_SUPER) {
+
+                    if (   ! (flags & UTF8_CHECK_ONLY)
+                        && (msgs || ckWARN_d(WARN_NON_UNICODE)))
+                    {
+                        pack_warn = packWARN(WARN_NON_UNICODE);
+
+                        if (orig_problems & UTF8_GOT_TOO_SHORT) {
+                            message = Perl_form(aTHX_
+                                    "Any UTF-8 sequence that starts with"
+                                    " \"%s\" is for a non-Unicode code point,"
+                                    " may not be portable",
+                                    _byte_dump_string(s0, curlen));
+                        }
+                        else {
+                            message = Perl_form(aTHX_ super_cp_format, uv);
+                        }
+                        this_flag_bit = UTF8_GOT_SUPER;
+                    }
+                }
+
+                if (flags & UTF8_DISALLOW_SUPER) {
+                    disallowed = TRUE;
+                }
+            }
+            else if (possible_problems & UTF8_GOT_NONCHAR) {
+                possible_problems &= ~UTF8_GOT_NONCHAR;
+
+                if (flags & UTF8_WARN_NONCHAR) {
+
+                    if (  ! (flags & UTF8_CHECK_ONLY)
+                        && (msgs || ckWARN_d(WARN_NONCHAR)))
+                    {
+                        /* The code above should have guaranteed that we don't
+                         * get here with errors other than overlong */
+                        assert (! (orig_problems
+                                        & ~(UTF8_GOT_LONG|UTF8_GOT_NONCHAR)));
+
+                        pack_warn = packWARN(WARN_NONCHAR);
+                        message = Perl_form(aTHX_ nonchar_cp_format, uv);
+                        this_flag_bit = UTF8_GOT_NONCHAR;
+                    }
+                }
+
+                if (flags & UTF8_DISALLOW_NONCHAR) {
+                    disallowed = TRUE;
+                }
+            }
+            else if (possible_problems & UTF8_GOT_LONG) {
+                possible_problems &= ~UTF8_GOT_LONG;
+
+                if (flags & UTF8_ALLOW_LONG) {
+                    uv = UNICODE_REPLACEMENT;
+                }
+                else {
+                    disallowed = TRUE;
+
+                    if ((   msgs
+                         || ckWARN_d(WARN_UTF8)) && ! (flags & UTF8_CHECK_ONLY))
+                    {
+                        pack_warn = packWARN(WARN_UTF8);
+
+                        /* These error types cause 'uv' to be something that
+                         * isn't what was intended, so can't use it in the
+                         * message.  The other error types either can't
+                         * generate an overlong, or else the 'uv' is valid */
+                        if (orig_problems &
+                                        (UTF8_GOT_TOO_SHORT|UTF8_GOT_OVERFLOW))
+                        {
+                            message = Perl_form(aTHX_
+                                    "%s: %s (any UTF-8 sequence that starts"
+                                    " with \"%s\" is overlong which can and"
+                                    " should be represented with a"
+                                    " different, shorter sequence)",
+                                    malformed_text,
+                                    _byte_dump_string(s0, send - s0),
+                                    _byte_dump_string(s0, curlen));
+                        }
+                        else {
+                            U8 tmpbuf[UTF8_MAXBYTES+1];
+                            const U8 * const e = uvoffuni_to_utf8_flags(tmpbuf,
+                                                                        uv, 0);
+                            /* Don't use U+ for non-Unicode code points, which
+                             * includes those in the Latin1 range */
+                            const char * preface = (    uv > PERL_UNICODE_MAX
+#  ifdef EBCDIC
+                                                     || uv <= 0xFF
+#  endif
+                                                    )
+                                                   ? "0x"
+                                                   : "U+";
+                            message = Perl_form(aTHX_
+                                "%s: %s (overlong; instead use %s to represent"
+                                " %s%0*" UVXf ")",
+                                malformed_text,
+                                _byte_dump_string(s0, send - s0),
+                                _byte_dump_string(tmpbuf, e - tmpbuf),
+                                preface,
+                                ((uv < 256) ? 2 : 4), /* Field width of 2 for
+                                                         small code points */
+                                UNI_TO_NATIVE(uv));
+                        }
+                        this_flag_bit = UTF8_GOT_LONG;
+                    }
+                }
+            } /* End of looking through the possible flags */
+
+            /* Display the message (if any) for the problem being handled in
+             * this iteration of the loop */
+            if (message) {
+                if (msgs) {
+                    assert(this_flag_bit);
+
+                    if (*msgs == NULL) {
+                        *msgs = newAV();
+                    }
+
+                    av_push(*msgs, newRV_noinc((SV*) S_new_msg_hv(message,
+                                                                pack_warn)));
+                }
+                else if (PL_op)
+                    Perl_warner(aTHX_ pack_warn, "%s in %s", message,
+                                                 OP_DESC(PL_op));
+                else
+                    Perl_warner(aTHX_ pack_warn, "%s", message);
+            }
+        }   /* End of 'while (possible_problems)' */
+
+        if (retlen) {
+            *retlen = curlen;
+        }
+
+        if (disallowed) {
+            if (flags & UTF8_CHECK_ONLY && retlen) {
+                *retlen = ((STRLEN) -1);
+            }
+            return 0;
+        }
+    }
+
+    return UNI_TO_NATIVE(uv);
+}
+
+static STRLEN
+S_is_utf8_char_helper(const U8 * const s, const U8 * e, const U32 flags)
+{
+    STRLEN len;
+    const U8 *x;
+
+    assert(0 == (flags & ~UTF8_DISALLOW_ILLEGAL_INTERCHANGE));
+    assert(! UTF8_IS_INVARIANT(*s));
+
+    if (UNLIKELY(! UTF8_IS_START(*s))) {
+        return 0;
+    }
+
+    /* Examine a maximum of a single whole code point */
+    if (e - s > UTF8SKIP(s)) {
+        e = s + UTF8SKIP(s);
+    }
+
+    len = e - s;
+
+    if (flags && isUTF8_POSSIBLY_PROBLEMATIC(*s)) {
+        const U8 s0 = NATIVE_UTF8_TO_I8(s[0]);
+
+        if (  (flags & UTF8_DISALLOW_SUPER)
+            && UNLIKELY(s0 >= FIRST_START_BYTE_THAT_IS_DEFINITELY_SUPER))
+        {
+            return 0;           /* Above Unicode */
+        }
+
+        if (len > 1) {
+            const U8 s1 = NATIVE_UTF8_TO_I8(s[1]);
+
+            if (   (flags & UTF8_DISALLOW_SUPER)
+                &&  UNLIKELY(IS_UTF8_2_BYTE_SUPER(s0, s1)))
+            {
+                return 0;       /* Above Unicode */
+            }
+
+            if (   (flags & UTF8_DISALLOW_SURROGATE)
+                &&  UNLIKELY(IS_UTF8_2_BYTE_SURROGATE(s0, s1)))
+            {
+                return 0;       /* Surrogate */
+            }
+
+            if (  (flags & UTF8_DISALLOW_NONCHAR)
+                && UNLIKELY(UTF8_IS_NONCHAR(s, e)))
+            {
+                return 0;       /* Noncharacter code point */
+            }
+        }
+    }
+
+    for (x = s + 1; x < e; x++) {
+        if (UNLIKELY(! UTF8_IS_CONTINUATION(*x))) {
+            return 0;
+        }
+    }
+
+    if (len > 1 && S_is_utf8_overlong_given_start_byte_ok(s, len) > 0) {
+        return 0;
+    }
+
+    if (0 < S_does_utf8_overflow(s, e, 0)) {
+        return 0;
+    }
+
+    return UTF8SKIP(s);
+}
+
+#  undef is_utf8_valid_partial_char_flags
+
+static bool
+is_utf8_valid_partial_char_flags(const U8 * const s, const U8 * const e, const U32 flags)
+{
+
+    return S_is_utf8_char_helper(s, e, flags) > 0;
+}
+
+#  undef is_utf8_string_loc_flags
+
+static bool
+is_utf8_string_loc_flags(const U8 *s, STRLEN len, const U8 **ep, const U32 flags)
+{
+    const U8* send = s + len;
+
+    assert(0 == (flags & ~UTF8_DISALLOW_ILLEGAL_INTERCHANGE));
+
+    while (s < send) {
+        if (UTF8_IS_INVARIANT(*s)) {
+            s++;
+        }
+        else if (     UNLIKELY(send - s < UTF8SKIP(s))
+                 || ! S_is_utf8_char_helper(s, send, flags))
+        {
+            *ep = s;
+            return 0;
+        }
+        else {
+            s += UTF8SKIP(s);
+        }
+    }
+
+    *ep = send;
+
+    return 1;
+}
+
+#endif
+
+#if defined(IN_UNICODE_XS) && ! defined(uvchr_to_utf8_flags_msgs)
+
+#  define MY_SHIFT   UTF_ACCUMULATION_SHIFT
+#  define MY_MARK    UTF_CONTINUATION_MARK
+#  define MY_MASK    UTF_CONTINUATION_MASK
+
+static const char cp_above_legal_max[] =
+                        "Use of code point 0x%" UVXf " is not allowed; the"
+                        " permissible max is 0x%" UVXf;
+
+/* These two can be dummys, as they are not looked at by the function, which
+ * has hard-coded into it what flags it is expecting are */
+#  ifndef UNICODE_DISALLOW_ILLEGAL_INTERCHANGE
+#    define UNICODE_DISALLOW_ILLEGAL_INTERCHANGE 0
+#  endif
+#  ifndef UNICODE_WARN_ILLEGAL_INTERCHANGE
+#    define UNICODE_WARN_ILLEGAL_INTERCHANGE 0
+#  endif
+
+#  ifndef OFFUNI_IS_INVARIANT
+#    define OFFUNI_IS_INVARIANT(cp) UNI_IS_INVARIANT(cp)
+#  endif
+#  ifndef MAX_EXTERNALLY_LEGAL_CP
+#    define MAX_EXTERNALLY_LEGAL_CP ((UV) (IV_MAX))
+#  endif
+#  ifndef LATIN1_TO_NATIVE
+#    define LATIN1_TO_NATIVE(a) ASCII_TO_NATIVE(a)
+#  endif
+#  ifndef I8_TO_NATIVE_UTF8
+#    define I8_TO_NATIVE_UTF8(a) NATIVE_TO_UTF(a)
+#  endif
+#  ifndef MAX_UTF8_TWO_BYTE
+#    define MAX_UTF8_TWO_BYTE (32 * (1U << UTF_ACCUMULATION_SHIFT) - 1)
+#  endif
+#  ifndef UNICODE_IS_32_CONTIGUOUS_NONCHARS
+#    define UNICODE_IS_32_CONTIGUOUS_NONCHARS(uv)    ((UV) (uv) >= 0xFDD0   \
+                                                 && (UV) (uv) <= 0xFDEF)
+#  endif
+#  ifndef UNICODE_IS_END_PLANE_NONCHAR_GIVEN_NOT_SUPER
+#    define UNICODE_IS_END_PLANE_NONCHAR_GIVEN_NOT_SUPER(uv)                \
+                                          (((UV) (uv) & 0xFFFE) == 0xFFFE)
+#  endif
+#  ifndef UNICODE_IS_SUPER
+#    define UNICODE_IS_SUPER(uv)    ((UV) (uv) > PERL_UNICODE_MAX)
+#  endif
+#  ifndef OFFUNISKIP
+#    define OFFUNISKIP(cp)    UNISKIP(NATIVE_TO_UNI(cp))
+#  endif
+
+#  define HANDLE_UNICODE_SURROGATE(uv, flags, msgs)                 \
+    STMT_START {                                                    \
+        U32 category = packWARN(WARN_SURROGATE);                    \
+        const char * format = surrogate_cp_format;                  \
+        *msgs = S_new_msg_hv(Perl_form(aTHX_ format, uv),           \
+                                 category);                         \
+        return NULL;                                                \
+    } STMT_END;
+
+#  define HANDLE_UNICODE_NONCHAR(uv, flags, msgs)                   \
+    STMT_START {                                                    \
+        U32 category = packWARN(WARN_NONCHAR);                      \
+        const char * format = nonchar_cp_format;                    \
+        *msgs = S_new_msg_hv(Perl_form(aTHX_ format, uv),           \
+                                 category);                         \
+        return NULL;                                                \
+    } STMT_END;
+
+static U8 *
+uvchr_to_utf8_flags_msgs(U8 *d, UV uv, const UV flags, HV** msgs)
+{
+    dTHX;
+
+    assert(msgs);
+
+    PERL_UNUSED_ARG(flags);
+
+    uv = NATIVE_TO_UNI(uv);
+
... 661 lines suppressed ...

-- 
Perl5 Master Repository



nntp.perl.org: Perl Programming lists via nntp and http.
Comments to Ask Bjørn Hansen at ask@perl.org | Group listing | About