develooper Front page | perl.perl5.porters | Postings from September 2005

[PATCH] Re: [BUG 5.8.7] Another major bug in PerlIO layer

Thread Previous | Thread Next
From:
Ilya Zakharevich
Date:
September 27, 2005 05:26
Subject:
[PATCH] Re: [BUG 5.8.7] Another major bug in PerlIO layer
Message ID:
20050927090734.GB3687@math.berkeley.edu
On Fri, Sep 23, 2005 at 10:40:27PM -0700, Ilya Zakharevich wrote:
> Let me repeat it yet again: it should be easier to rewrite PerlIO
> stuff from scratch than try to fix bugs in it.
> 
> Example (on unix):
> 
>  unix2dos .tcshrc | perl -we 'binmode STDIN, ":crlf" or die; print while read STDIN, $_, 32000'

Patched files:
   MANIFEST perlio.c
New files
   t/io/crlf_through.t t/io/through.t

0) This patch adds 2 new test files to the distribution.

a) checking eof() when no data is present in the buffer and the pending char
   is "\n" loses this character.  To reproduce:

	perl -we "$|=1;binmode STDOUT; sleep 1, print for split //, qq(a\nb)"
	 | perl -wle "binmode STDIN, q(:crlf); print ord while $_=getc STDIN"

   I indicated this test case and the mechanism of this bug several years
   ago already, and explained how to code a "workaround" for this design
   flaw of PerlIO.  The fact that this design flaw was not fixed in these
   years is just another indication of how broken the PerlIO architecture
   is...

   The edit to PerlIOCrlf_unread() is not "a real fix"; it just implements
   the workaround mentioned above.

b) The bug mentioned in the beginning of this thread is worked around
   by the PerlIOBase_read() edit.  Again, it is not a real fix.

c) I also added some comments with minimal explanations of semantic of
   several entry points to PerlIO.  I do not think they are documented in
   any other place...

d) Search for "XXXX" in this patch to find other potential bugs in PerlIO.

e) Finally, as you know, PerlIO comes without any test of its basic
   functionality.  I designed some placeholder which at least could find
   two bugs fixed by this patch.

Hope this helps,
Ilya

--- ./MANIFEST-pre	Fri May 27 15:49:02 2005
+++ ./MANIFEST	Tue Sep 27 01:26:54 2005
@@ -2489,6 +2489,7 @@ thread.h			Threading header
 t/io/argv.t			See if ARGV stuff works
 t/io/binmode.t			See if binmode() works
 t/io/crlf.t			See if :crlf works
+t/io/crlf_through.t		See if pipe passes data intact with :crlf
 t/io/dup.t			See if >& works right
 t/io/fflush.t			See if auto-flush on fork/exec/system/qx works
 t/io/fs.t			See if directory manipulations work
@@ -2502,6 +2503,7 @@ t/io/pipe.t			See if secure pipes work
 t/io/print.t			See if print commands work
 t/io/read.t			See if read works
 t/io/tell.t			See if file seeking works
+t/io/through.t			See if pipe passes data intact
 t/io/utf8.t			See if file seeking works
 t/japh/abigail.t		Obscure tests
 t/lib/1_compile.t		See if the various libraries and extensions compile
--- ./perlio.c-pre	Fri Apr  8 02:31:44 2005
+++ ./perlio.c	Tue Sep 27 01:30:00 2005
@@ -2050,6 +2050,8 @@ PerlIOBase_read(pTHX_ PerlIO *f, void *v
 	    return 0;
 	}
 	while (count > 0) {
+	 get_cnt:
+	  {
 	    SSize_t avail = PerlIO_get_cnt(f);
 	    SSize_t take = 0;
 	    if (avail > 0)
@@ -2060,11 +2062,14 @@ PerlIOBase_read(pTHX_ PerlIO *f, void *v
 		PerlIO_set_ptrcnt(f, ptr + take, (avail -= take));
 		count -= take;
 		buf += take;
+		if (avail == 0)		/* set_ptrcnt could have reset avail */
+		    goto get_cnt;
 	    }
 	    if (count > 0 && avail <= 0) {
 		if (PerlIO_fill(f) != 0)
 		    break;
 	    }
+	  }
 	}
 	return (buf - (STDCHAR *) vbuf);
     }
@@ -3506,7 +3511,11 @@ PerlIOBuf_open(pTHX_ PerlIO_funcs *self,
 
 /*
  * This "flush" is akin to sfio's sync in that it handles files in either
- * read or write state
+ * read or write state.  For write state, we put the postponed data through
+ * the next layers.  For read state, we seek() the next layers to the
+ * offset given by current position in the buffer, and discard the buffer
+ * state (XXXX supposed to be for seek()able buffers only, but now it is done
+ * in any case?).  Then the pass the stick further in chain.
  */
 IV
 PerlIOBuf_flush(pTHX_ PerlIO *f)
@@ -3565,6 +3574,10 @@ PerlIOBuf_flush(pTHX_ PerlIO *f)
     return code;
 }
 
+/* This discards the content of the buffer after b->ptr, and rereads
+ * the buffer from the position off in the layer downstream; here off
+ * is at offset corresponding to b->ptr - b->buf.
+ */
 IV
 PerlIOBuf_fill(pTHX_ PerlIO *f)
 {
@@ -3575,7 +3588,7 @@ PerlIOBuf_fill(pTHX_ PerlIO *f)
      * Down-stream flush is defined not to loose read data so is harmless.
      * we would not normally be fill'ing if there was data left in anycase.
      */
-    if (PerlIO_flush(f) != 0)
+    if (PerlIO_flush(f) != 0)	/* XXXX Check that its seek() succeeded?! */
 	return -1;
     if (PerlIOBase(f)->flags & PERLIO_F_TTY)
 	PerlIOBase_flush_linebuf(aTHX);
@@ -4052,6 +4065,14 @@ PerlIO_funcs PerlIO_pending = {
  * crlf - translation On read translate CR,LF to "\n" we do this by
  * overriding ptr/cnt entries to hand back a line at a time and keeping a
  * record of which nl we "lied" about. On write translate "\n" to CR,LF
+ *
+ * c->nl points on the first byte of CR LF pair when it is temporarily
+ * replaced by LF, or to the last CR of the buffer.  In the former case
+ * the caller thinks that the buffer ends at c->nl + 1, in the latter
+ * that it ends at c->nl; these two cases can be distinguished by
+ * *c->nl.  c->nl is set during _getcnt() call, and unset during
+ * _unread() and _flush() calls.
+ * It only matters for read operations.
  */
 
 typedef struct {
@@ -4096,7 +4117,7 @@ SSize_t
 PerlIOCrlf_unread(pTHX_ PerlIO *f, const void *vbuf, Size_t count)
 {
     PerlIOCrlf *c = PerlIOSelf(f, PerlIOCrlf);
-    if (c->nl) {
+    if (c->nl) {	/* XXXX Shouldn't it be done only if b->ptr > c->nl? */
 	*(c->nl) = 0xd;
 	c->nl = NULL;
     }
@@ -4126,8 +4147,10 @@ PerlIOCrlf_unread(pTHX_ PerlIO *f, const
 			count--;
 		    }
 		    else {
-			buf++;
-			break;
+		    /* If b->ptr - 1 == b->buf, we are undoing reading 0xa */
+			*--(b->ptr) = 0xa;	/* Works even if 0xa == '\r' */
+			unread++;
+			count--;
 		    }
 		}
 		else {
@@ -4141,6 +4164,7 @@ PerlIOCrlf_unread(pTHX_ PerlIO *f, const
     }
 }
 
+/* XXXX This code assumes that buffer size >=2, but does not check it... */
 SSize_t
 PerlIOCrlf_get_cnt(pTHX_ PerlIO *f)
 {
--- ./t/io/crlf_through.t-pre	Tue Sep 27 01:27:26 2005
+++ ./t/io/crlf_through.t	Tue Sep 27 01:25:18 2005
@@ -0,0 +1,9 @@
+#!./perl
+
+BEGIN {
+    chdir 't' if -d 't';
+    @INC = '../lib';
+}
+
+$main::use_crlf = 1;
+do './io/through.t' or die "no kid script";
--- ./t/io/through.t-pre	Fri Sep 23 17:14:12 2005
+++ ./t/io/through.t	Tue Sep 27 01:24:54 2005
@@ -0,0 +1,139 @@
+#!./perl
+
+BEGIN {
+    chdir 't' if -d 't';
+    @INC = '../lib';
+}
+
+use strict;
+require './test.pl';
+
+my $Perl = which_perl();
+
+my $data = <<'EOD';
+x
+ yy
+z
+EOD
+
+(my $data2 = $data) =~ s/\n/\n\n/g;
+
+my $t1 = { data => $data,  write_c => [1,2,length $data],  read_c => [1,2,3,length $data]};
+my $t2 = { data => $data2, write_c => [1,2,length $data2], read_c => [1,2,3,length $data2]};
+
+$_->{write_c} = [1..length($_->{data})],
+  $_->{read_c} = [1..length($_->{data})+1, 0xe000]  # Need <0xffff for REx
+    for (); # $t1, $t2;
+
+my $c;	# len write tests, for each: one _all test, and 3 each len+2
+$c += @{$_->{write_c}} * (1 + 3*@{$_->{read_c}}) for $t1, $t2;
+$c *= 3*2*2;	# $how_w, file/pipe, 2 reports
+
+$c += 6;	# Tests with sleep()...
+
+print "1..$c\n";
+
+my $set_out = '';
+$set_out = "binmode STDOUT, ':crlf'" if $main::use_crlf = 1;
+
+sub testread ($$$$$$$) {
+  my ($fh, $str, $read_c, $how_r, $write_c, $how_w, $why) = @_;
+  my $buf = '';
+  if ($how_r eq 'readline_all') {
+    $buf .= $_ while <$fh>;
+  } elsif ($how_r eq 'readline') {
+    $/ = \$read_c;
+    $buf .= $_ while <$fh>;
+  } elsif ($how_r eq 'read') {
+    my($in, $c);
+    $buf .= $in while $c = read($fh, $in, $read_c);
+  } elsif ($how_r eq 'sysread') {
+    my($in, $c);
+    $buf .= $in while $c = sysread($fh, $in, $read_c);
+  } else {
+    die "Unrecognized read: '$how_r'";
+  }
+  close $fh or die "close: $!";
+  # The only contamination allowed is with sysread/prints
+  $buf =~ s/\r\n/\n/g if $how_r eq 'sysread' and $how_w =~ /print/;
+  is(length $buf, length $str, "length with wrc=$write_c, rdc=$read_c, $how_w, $how_r, $why");
+  is($buf, $str, "content with wrc=$write_c, rdc=$read_c, $how_w, $how_r, $why");
+}
+
+sub testpipe ($$$$$$) {
+  my ($str, $write_c, $read_c, $how_w, $how_r, $why) = @_;
+  (my $quoted = $str) =~ s/\n/\\n/g;;
+  my $fh;
+  if ($how_w eq 'print') {	# AUTOFLUSH???
+    # Should be shell-neutral:
+    open $fh, '-|', qq[$Perl -we "$set_out;print for grep length, split /(.{1,$write_c})/s, qq($quoted)"] or die "open: $!";
+  } elsif ($how_w eq 'print/flush') {
+    # shell-neutral and miniperl-enabled autoflush? qq(\x24\x7c) eq '$|'
+    open $fh, '-|', qq[$Perl -we "$set_out;eval qq(\\x24\\x7c = 1) or die;print for grep length, split /(.{1,$write_c})/s, qq($quoted)"] or die "open: $!";
+  } elsif ($how_w eq 'syswrite') {
+    ### How to protect \$_
+    open $fh, '-|', qq[$Perl -we "$set_out;eval qq(sub w {syswrite STDOUT, \\x24_} 1) or die; w() for grep length, split /(.{1,$write_c})/s, qq($quoted)"] or die "open: $!";
+  } else {
+    die "Unrecognized write: '$how_w'";
+  }
+  binmode $fh, ':crlf' if $main::use_crlf = 1;
+  testread($fh, $str, $read_c, $how_r, $write_c, $how_w, "pipe$why");
+}
+
+sub testfile ($$$$$$) {
+  my ($str, $write_c, $read_c, $how_w, $how_r, $why) = @_;
+  my @data = grep length, split /(.{1,$write_c})/s, $str;
+
+  open my $fh, '>', 'io_io.tmp' or die;
+  select $fh;
+  binmode $fh, ':crlf' if $main::use_crlf = 1;
+  if ($how_w eq 'print') {	# AUTOFLUSH???
+    $| = 0;
+    print $fh $_ for @data;
+  } elsif ($how_w eq 'print/flush') {
+    $| = 1;
+    print $fh $_ for @data;
+  } elsif ($how_w eq 'syswrite') {
+    syswrite $fh, $_ for @data;
+  } else {
+    die "Unrecognized write: '$how_w'";
+  }
+  close $fh or die "close: $!";
+  open $fh, '<', 'io_io.tmp' or die;
+  binmode $fh, ':crlf' if $main::use_crlf = 1;
+  testread($fh, $str, $read_c, $how_r, $write_c, $how_w, "file$why");
+}
+
+# shell-neutral and miniperl-enabled autoflush? qq(\x24\x7c) eq '$|'
+open my $fh, '-|', qq[$Perl -we "eval qq(\\x24\\x7c = 1) or die; binmode STDOUT; sleep 1, print for split //, qq(a\nb\n\nc\n\n\n)"] or die "open: $!";
+ok(1, 'open pipe');
+binmode $fh, q(:crlf);
+ok(1, 'binmode');
+my (@c, $c);
+push @c, ord $c while $c = getc $fh;
+ok(1, 'got chars');
+is(scalar @c, 9, 'got 9 chars');
+is("@c", '97 10 98 10 10 99 10 10 10', 'got expected chars');
+ok(close($fh), 'close');
+
+for my $s (1..2) {
+  my $t = ($t1, $t2)[$s-1];
+  my $str = $t->{data};
+  my $r = $t->{read_c};
+  my $w = $t->{write_c};
+  for my $read_c (@$r) {
+    for my $write_c (@$w) {
+      for my $how_r (qw(readline_all readline read sysread)) {
+	next if $how_r eq 'readline_all' and $read_c != 1;
+        for my $how_w (qw(print print/flush syswrite)) {
+	  testfile($str, $write_c, $read_c, $how_w, $how_r, $s);
+	  testpipe($str, $write_c, $read_c, $how_w, $how_r, $s);
+        }
+      }
+    }
+  }
+}
+
+unlink 'io_io.tmp';
+
+1;

Thread Previous | Thread Next


nntp.perl.org: Perl Programming lists via nntp and http.
Comments to Ask Bjørn Hansen at ask@perl.org | Group listing | About