develooper Front page | perl.cvs.parrot | Postings from January 2009

[svn:parrot] r35620 - branches/strings/pseudocode

From:
simon
Date:
January 16, 2009 03:34
Subject:
[svn:parrot] r35620 - branches/strings/pseudocode
Message ID:
20090116113303.453BDCB9AE@x12.develooper.com
Author: simon
Date: Fri Jan 16 03:33:02 2009
New Revision: 35620

Modified:
   branches/strings/pseudocode/Encodings.pm

Log:
Abstract fixed-width encodings into a base class.


Modified: branches/strings/pseudocode/Encodings.pm
==============================================================================
--- branches/strings/pseudocode/Encodings.pm	(original)
+++ branches/strings/pseudocode/Encodings.pm	Fri Jan 16 03:33:02 2009
@@ -1,3 +1,37 @@
+class ParrotEncoding::Base::Fixed {
+    our $.width;
+    method string_length($str) { return $str.bufused / $str.encoding.width; }
+
+    method string_char_iterate($str, $callback, $parameter) {
+        for (0..self.string_length($str)-1) { 
+            $callback(self.char_at_index($str,$_), $parameter); 
+        }
+    }
+
+    # We assume in the base case that grapheme==char, which is true for
+    # legacy, non-Unicode fixed width formats. Unicode fixed width
+    # formats that care about graphemes can override.
+   
+    method grapheme_at_index($str, $index) { 
+        return [ self.char_at_index($str, $index) ]; 
+    }
+    method string_grapheme_iterate($str, $callback, $parameter) {
+        for (0..self.string_length($str)-1) { 
+            $callback($str.encoding.grapheme_at_index($str,$_), $parameter); 
+        }
+    }
+}
+
+class ParrotEncoding::Base::Variable {
+    method string_length($str) {
+        # This code written funny to be a bit more C-like
+        my $data = 0; 
+        my $callback = sub ($char, $data is rw) { $data++ };
+        $str.encoding.string_char_iterate($str, $callback, $data);
+        return $data;
+    }
+}
+
 class ParrotEncoding::UTF8   {  
     sub _skip($c) {
         if $c <= 191 { return 1 }
@@ -37,7 +71,9 @@
 class ParrotEncoding::UTF16  {  };
 class ParrotEncoding::UTF32  {  };
 class ParrotEncoding::EBCDIC {  };
-class ParrotEncoding::ParrotNative {
+
+class ParrotEncoding::ParrotNative is ParrotEncoding::Base::Fixed {
+    our $.width = 1;
 
     method string_char_iterate ($str, $callback, $parameter) {
         for (0..$str.bufused-1) { 
@@ -48,11 +84,10 @@
         }
     }
 
-    method string_grapheme_iterate($str, $callback, $parameter) {
-        for (0..$str.bufused-1) { $callback($str.buffer.[$_], $parameter); }
-    }
-
     method char_at_index($str, $index) { 
+        # We need to look inside each grapheme, since NFG stores individual
+        # graphemes and graphemes are composed of multiple characters - 
+        # this could be improved with caching later but we will 
         ...
     }
 
@@ -67,5 +102,8 @@
         # because NFG is specific to ParrotEncoding.
     }
 };
-class ParrotEncoding::Byte is ParrotEncoding::ParrotNative; # Just a bit thinner
 
+class ParrotEncoding::Byte is ParrotEncoding::Base::Fixed {
+    our $.width = 1;
+    method char_at_index($str, $index) { return $str.buffer[$index]; }
+};



nntp.perl.org: Perl Programming lists via nntp and http.
Comments to Ask Bjørn Hansen at ask@perl.org | Group listing | About