Front page | perl.perl5.porters |
Postings from June 2001
[PATCH] Encode.pm to use escape-sequence encoding
Thread Next
From:
SADAHIRO Tomoyuki
Date:
June 29, 2001 15:34
Subject:
[PATCH] Encode.pm to use escape-sequence encoding
Message ID:
20010630073226.7C79.BQW10602@nifty.com
Hello, here is a patch for Encode.pm
to use escape-sequence encoding.
Known problems:
(1)
For present, any compiled encodings
(ASCII, ISO-8859-*, etc.) are not available
for the code extension of escape-sequence encoding.
(2) encodings with SINGLE SHIFTs (SS2, SS3)
are not avaliable.
Modification:
(1) iso2022-jp.enc and iso2022-kr.enc may contain
the GR characters ("\xA0" .. "\xFF").
According to RFC1554 (ISO-2022-JP-2) and
RFC1557 (Korean Character Encoding for Internet Messages),
they must be in 7 bit format.
So, the following files are added.
7bit.enc (ASCII, not including ESC, SI, SO)
7bit-jis.enc
7bit-kana.enc
7bit-kr.enc
(these names might be not so good...
please comment and/or tell better names)
(2) A new parameter, 'standard'. It means the
escape sequence omitted at the beginning of the string
and added at the end of the string if neccessary
(but not always. if the last character is an ASCII,
the final \x1b(B is not appended).
(ex.)
# Encoding file: 7bit-jis, escape-driven
E
name 7bit-jis
init {}
final {}
standard \x1b(B
7bit \x1b(B
7bit \x1b(J
(snip..)
==============
diff -Pur Encode.orig/7bit-jis.enc Encode/7bit-jis.enc
--- Encode.orig/7bit-jis.enc Thu Jan 01 09:00:00 1970
+++ Encode/7bit-jis.enc Sat Jun 30 05:55:08 2001
@@ -0,0 +1,13 @@
+# Encoding file: 7bit-jis, escape-driven
+E
+name 7bit-jis
+init {}
+final {}
+standard \x1b(B
+7bit \x1b(B
+7bit \x1b(J
+7bit-kana \x1b(I
+jis0208 \x1b$B
+jis0208 \x1b$@
+jis0208 \x1b&@\x1b$B
+jis0212 \x1b$(D
diff -Pur Encode.orig/7bit-kana.enc Encode/7bit-kana.enc
--- Encode.orig/7bit-kana.enc Thu Jan 01 09:00:00 1970
+++ Encode/7bit-kana.enc Sat Jun 30 07:21:10 2001
@@ -0,0 +1,20 @@
+# Encoding file: 7bit-kana, single-byte
+S
+0025 0 1
+00
+0000000100020003000400050006000700080009000A000B000C000D00000000
+0010001100120013001400150016001700180019001A001B001C001D0000001F
+0000FF61FF62FF63FF64FF65FF66FF67FF68FF69FF6AFF6BFF6CFF6DFF6EFF6F
+FF70FF71FF72FF73FF74FF75FF76FF77FF78FF79FF7AFF7BFF7CFF7DFF7EFF7F
+FF80FF81FF82FF83FF84FF85FF86FF87FF88FF89FF8AFF8BFF8CFF8DFF8EFF8F
+FF90FF91FF92FF93FF94FF95FF96FF97FF98FF99FF9AFF9BFF9CFF9DFF9EFF9F
+0000000000000000000000000000000000000000000000000000000000000000
+0000000000000000000000000000000000000000000000000000000000000000
+0000000000000000000000000000000000000000000000000000000000000000
+0000000000000000000000000000000000000000000000000000000000000000
+0000000000000000000000000000000000000000000000000000000000000000
+0000000000000000000000000000000000000000000000000000000000000000
+0000000000000000000000000000000000000000000000000000000000000000
+0000000000000000000000000000000000000000000000000000000000000000
+0000000000000000000000000000000000000000000000000000000000000000
+0000000000000000000000000000000000000000000000000000000000000000
diff -Pur Encode.orig/7bit-kr.enc Encode/7bit-kr.enc
--- Encode.orig/7bit-kr.enc Thu Jan 01 09:00:00 1970
+++ Encode/7bit-kr.enc Sat Jun 30 05:54:52 2001
@@ -0,0 +1,7 @@
+# Encoding file: 7bit-kr, escape-driven
+E
+name 7bit-kr
+init \x1b$)C
+final {}
+7bit \x0f
+ksc5601 \x0e
diff -Pur Encode.orig/7bit.enc Encode/7bit.enc
--- Encode.orig/7bit.enc Thu Jan 01 09:00:00 1970
+++ Encode/7bit.enc Sat Jun 30 06:59:28 2001
@@ -0,0 +1,20 @@
+# Encoding file: 7bit (ASCII for E encodings), single-byte
+S
+003F 0 1
+00
+0000000100020003000400050006000700080009000A000B000C000D00000000
+0010001100120013001400150016001700180019001A001B001C001D0000001F
+0020002100220023002400250026002700280029002A002B002C002D002E002F
+0030003100320033003400350036003700380039003A003B003C003D003E003F
+0040004100420043004400450046004700480049004A004B004C004D004E004F
+0050005100520053005400550056005700580059005A005B005C005D005E005F
+0060006100620063006400650066006700680069006A006B006C006D006E006F
+0070007100720073007400750076007700780079007A007B007C007D007E007F
+0000000000000000000000000000000000000000000000000000000000000000
+0000000000000000000000000000000000000000000000000000000000000000
+0000000000000000000000000000000000000000000000000000000000000000
+0000000000000000000000000000000000000000000000000000000000000000
+0000000000000000000000000000000000000000000000000000000000000000
+0000000000000000000000000000000000000000000000000000000000000000
+0000000000000000000000000000000000000000000000000000000000000000
+0000000000000000000000000000000000000000000000000000000000000000
diff -Pur Encode.orig/Tcl.pm Encode/Tcl.pm
--- Encode.orig/Tcl.pm Tue Jun 26 22:26:56 2001
+++ Encode/Tcl.pm Sat Jun 30 07:27:46 2001
@@ -229,27 +229,115 @@
sub read
{
- my ($class,$fh,$name) = @_;
- my %self = (Name => $name, Num => 0);
+ my ($obj,$fh,$name) = @_;
+ my(%tbl, @esc, $enc);
while (<$fh>)
{
my ($key,$val) = /^(\S+)\s+(.*)$/;
$val =~ s/^\{(.*?)\}/$1/g;
$val =~ s/\\x([0-9a-f]{2})/chr(hex($1))/ge;
- $self{$key} = $val;
+ if($enc = Encode->getEncoding($key)){
+ $tbl{$val} = ref($enc) eq 'Encode::Tcl' ? $enc->loadEncoding : $enc;
+ push @esc, $val;
+ }else{
+ $obj->{$key} = $val;
+ }
}
- return bless \%self,$class;
+ $obj->{'Ctl'} = \@esc;
+ $obj->{'Tbl'} = \%tbl;
+ return $obj;
}
sub decode
{
- croak("Not implemented yet");
+ my ($obj,$str,$chk) = @_;
+ my $tbl = $obj->{'Tbl'};
+ my $ctl = $obj->{'Ctl'};
+ my $ini = $obj->{'init'};
+ my $fin = $obj->{'final'};
+ my $std = $ctl->[0];
+ my $cur = $std;
+ my $uni = '';
+ while (length($str)){
+ my $uch = substr($str,0,1,'');
+ if($uch eq "\e"){
+ $str =~ s/^([\x20-\x2F]*[\x30-\x7E](?:\x1b[\x20-\x2F]*[\x30-\x7E])*)//;
+ my $esc = "\e$1";
+ if($tbl->{$esc}){ $cur = $esc }
+ elsif($esc eq $ini || $esc eq $fin){ $cur = $std }
+ else{carp "unknown escape sequence" }
+ next;
+ }
+ if($uch eq "\x0e" || $uch eq "\x0f"){
+ $cur = $uch and next;
+ }
+ my $x;
+ my $ch = ord($uch);
+ my $rep = $tbl->{$cur}->{'Rep'};
+ my $touni = $tbl->{$cur}->{'ToUni'};
+ if (&$rep($ch) eq 'C')
+ {
+ $x = $touni->[0][$ch];
+ }
+ else
+ {
+ $x = $touni->[$ch][ord(substr($str,0,1,''))];
+ }
+ unless (defined $x)
+ {
+ last if $chk;
+ # What do we do here ?
+ $x = '';
+ }
+ $uni .= $x;
+ }
+ $_[1] = $str if $chk;
+ return $uni;
}
sub encode
{
- croak("Not implemented yet");
-}
+ my ($obj,$uni,$chk) = @_;
+ my $tbl = $obj->{'Tbl'};
+ my $ctl = $obj->{'Ctl'};
+ my $ini = $obj->{'init'};
+ my $fin = $obj->{'final'};
+ my $std = $obj->{'standard'} || '';
+ my $str = $ini;
+ my $pre = $std;
+ my $cur = $pre;
+ while (length($uni)){
+ my $ch = chr(ord(substr($uni,0,1,'')));
+ my $x = $tbl->{$pre}->{FmUni}->{$ch};
+ unless(defined $x){
+ foreach my $esc (@$ctl){
+ $x = $tbl->{$esc}->{FmUni}->{$ch};
+ $cur = $esc and last if defined $x;
+ }
+ }
+ if($x == 0x0d && !($ini eq '' && $fin eq '') && substr($uni,0,1) eq "\x0a")
+ {
+ $str .= $cur unless $cur eq $pre;
+ $str .= $fin."\x0d\x0a".$ini;
+ substr($uni,0,1,'');
+ $pre = $std;
+ next;
+ }
+ my $def = $tbl->{$cur}->{'Def'};
+ my $rep = $tbl->{$cur}->{'Rep'};
+ unless (defined $x){
+ last if ($chk);
+ $x = $def;
+ }
+ $str .= $cur unless $cur eq $pre;
+ $str .= pack(&$rep($x),$x);
+ $pre = $cur;
+ }
+ $str .= $std unless $cur eq $std;
+ $str .= $fin;
+ $_[1] = $uni if $chk;
+ return $str;
+}
1;
__END__
regards,
SADAHIRO Tomoyuki
E-mail: bqw10602@nifty.com
URL: http://homepage1.nifty.com/nomenclator/perl/
Thread Next
-
[PATCH] Encode.pm to use escape-sequence encoding
by SADAHIRO Tomoyuki