develooper Front page | perl.cvs.parrot | Postings from January 2009

[svn:parrot] r35950 - in branches/strings/pseudocode: . t

From:
simon
Date:
January 24, 2009 00:21
Subject:
[svn:parrot] r35950 - in branches/strings/pseudocode: . t
Message ID:
20090124082103.4D043CB9AE@x12.develooper.com
Author: simon
Date: Sat Jan 24 00:21:01 2009
New Revision: 35950

Modified:
   branches/strings/pseudocode/Charsets.pm
   branches/strings/pseudocode/Encodings.pm
   branches/strings/pseudocode/ParrotString.pm
   branches/strings/pseudocode/t/create.t

Log:
First stab at appending graphemes; we can now convert strings from UTF8 to ParrotNative with NFG.


Modified: branches/strings/pseudocode/Charsets.pm
==============================================================================
--- branches/strings/pseudocode/Charsets.pm	(original)
+++ branches/strings/pseudocode/Charsets.pm	Sat Jan 24 00:21:01 2009
@@ -22,7 +22,7 @@
         # want, but it's easier than that: the forms are hierarchical;
         # to go to NFC you have to go through NFD, and to go to NFG you
         # have to go through NFC. So...
-        if (!$str.normalization or $str.normalization !~~ Normalization::NFD) {
+        if (!$str.normalization or $str.normalization !~~ ParrotNormalization::NFD) {
             # Everyone starts in fully decomposed form (NFD)
             # This is code-heavy and we'll do it in ICU in Parrot.
             # Here, we just pretend we've already done it.

Modified: branches/strings/pseudocode/Encodings.pm
==============================================================================
--- branches/strings/pseudocode/Encodings.pm	(original)
+++ branches/strings/pseudocode/Encodings.pm	Sat Jan 24 00:21:01 2009
@@ -1,5 +1,6 @@
 class ParrotEncoding::Base::Fixed {
     our $.width;
+    method setup($str) { }
     method string_length($str) { return $str.strlen / $str.encoding.width; }
 
     method string_char_iterate($str, $callback, $parameter) {
@@ -25,6 +26,7 @@
 }
 
 class ParrotEncoding::Base::Variable {
+    method setup($str) { }
     method string_length($str) {
         # This code written funny to be a bit more C-like
         my $data = 0; 
@@ -109,6 +111,19 @@
 class ParrotEncoding::ParrotNative is ParrotEncoding::Base::Fixed {
     our $.width = 1;
 
+    method setup($str) { $str.normalization = ParrotNormalization::NFG.new(); }
+    method append_grapheme ($str, $g) {
+        my $item;
+        if (@($g) > 1) {
+            $item = $str.normalization.get_grapheme_table_entry(@($g));
+        } else {
+            ($item) = @($g);
+        }
+        $str.buffer.push($item);
+        $str.bufused++;
+        $str.strlen++;
+    }
+
     method string_char_iterate ($str, $callback, $parameter) {
         for (0..$str.bufused-1) { 
             my $grapheme = grapheme_at_index($str, $_);
@@ -131,7 +146,7 @@
         }
         my $c = $str.buffer[$index];
         if $c >= 0 { return [ $c ]; }
-        return $str.charset.normalization.grapheme_table.[-$c];
+        return $str.normalization.grapheme_table.[-$c];
         # We are allowed to be pally with the normalization internals
         # because NFG is specific to ParrotEncoding.
     }

Modified: branches/strings/pseudocode/ParrotString.pm
==============================================================================
--- branches/strings/pseudocode/ParrotString.pm	(original)
+++ branches/strings/pseudocode/ParrotString.pm	Sat Jan 24 00:21:01 2009
@@ -8,11 +8,12 @@
     has $.hashval is rw;
     has ParrotString::Encoding      $.encoding is rw;
     has ParrotString::Charset       $.charset  is rw;
-    has ParrotString::Normalization $.normalization is rw;
+    has $.normalization is rw;
 };
 
 use Charsets;
 use Encodings;
+use Normalizations;
 
 ## COW stuff
 sub Parrot_string_new_COW($src) { ... }
@@ -42,6 +43,7 @@
     my $news = ParrotString.new();
     $news.charset  = $charset;
     $news.encoding = $encoding;
+    $news.encoding.setup($news);
     $news.buffer   = map { ord $_ }, split("", $s);
     $news.bufused = $news.strlen = $len || $s.chars;
     return $news;
@@ -81,7 +83,7 @@
      if ($src.encoding ~~ $dst.encoding and $src.charset ~~ $dst.charset) {
         return Parrot_string_append($src, $dst);
      }
-     my $append_to = sub ($g, $dst) { $dst.encoding.append_grapheme($src, $g) };
+     my $append_to = sub ($g, $dst) { $dst.encoding.append_grapheme($dst, $g) };
      $src.encoding.string_grapheme_iterate($src, $append_to, $dst);
      return $src;
 }

Modified: branches/strings/pseudocode/t/create.t
==============================================================================
--- branches/strings/pseudocode/t/create.t	(original)
+++ branches/strings/pseudocode/t/create.t	Sat Jan 24 00:21:01 2009
@@ -1,6 +1,6 @@
 use Test;
 use ParrotString;
-plan 10;
+plan 11;
 
 my $str = Parrot_string_new_init("flurble", 4, ParrotCharset::ASCII, ParrotEncoding::Byte);
 ok($str.charset ~~ ParrotCharset::ASCII, "Charset set properly");
@@ -21,3 +21,4 @@
 $str = Parrot_string_new_init("ABC \xd0\xb8\xcc\x8f", 8, ParrotCharset::Unicode, ParrotEncoding::UTF8);
 my $str2 = Parrot_string_new_init("", 0, ParrotCharset::Unicode, ParrotEncoding::ParrotNative);
 Parrot_string_grapheme_copy($str, $str2);
+is(Parrot_string_grapheme_length($str2), 5, "Four UTF8 bytes = one grapheme");



nntp.perl.org: Perl Programming lists via nntp and http.
Comments to Ask Bjørn Hansen at ask@perl.org | Group listing | About