Front page | perl.perl5.porters |
Postings from April 2008
patch fixes several problems in a2p
Thread Next
From:
Gregg Weber
Date:
April 22, 2008 14:56
Subject:
patch fixes several problems in a2p
Message ID:
480E5F1A.20103@tdl.com
Hi,
Here are fixes for some problems in the a2p program.
- Gregg Weber
1. "\061" gets converted to "\\061" should be "\061"
2. getline does not split correctly when FS is " ", and line has fields
separated by 2 or more spaces.
3. exit was not implemented correctly.
4. split fails if FS=' ' and more than one adjacent space in string.
5. assigning to $0 does not re-split $_ into @Fld.
6. assigning to $n (n>0) does not re-build $_ using OFS and $Fld.
7. tolower and toupper should be converted to lc and uc
Also added two gawk, (or recent awk) extentions:
1. fflush
2. delete array
Here is an awk program that tests 11 things that work in awk, failed in
old a2p, fixed in my new a2p.
FYI, I am using GNU Awk 3.1.3 on Linux - Centos 4.3
BEGIN {
failed=0;
# test constant with backslashed octal number
result="\061\062\063";
check_result(0,"123");
# test field splitting
FS=":";
$0 = "abc:def:ghi:jkl mno:pqr";
result=sprintf("%s,%s", $2,$4);
check_result(1,"def,jkl mno");
FS=" ";
$0 = "abc def ghi jkl mno pqr";
result=sprintf("%s,%s", $2,$4);
check_result(2,"def,jkl");
# test field splitting in getline with fields separated by multiple spaces
FS=" ";
"echo 'xabc xdef xghi xjkl'" | getline;
result=sprintf("%s,%s", $2,$4);
check_result(3,"xdef,xjkl");
FS="[ \t]+";
"echo 'yabc ydef yghi yjkl'" | getline;
result=sprintf("%s,%s", $2,$4);
check_result(4,"ydef,yjkl");
# test split operation
s=" abc def ghi jkl mno";
fs=" ";
split(s,a,fs);
result=sprintf("%s,%s", a[2],a[4]);
check_result(5,"def,jkl");
# test assignment to $2, should rebuild $0 using OFS and $1..n
FS="/";
OFS="=";
$0="a:b:c/d:e:f/g:h:i";
FS=":";
$2 = "xyz";
result=sprintf("%s,%s,%s,%s", $0,$1,$2,$3);
check_result(6,"a:b:c=xyz=g:h:i,a:b:c,xyz,g:h:i");
$3 = "swd";
result=sprintf("%s,%s,%s,%s", $0,$1,$2,$3);
check_result(7,"a:b:c=xyz=swd,a:b:c,xyz,swd");
# test assignment to $0, should re-split using current FS
$0=$0;
result=sprintf("%s,%s,%s,%s", $0,$1,$2,$3);
check_result(8,"a:b:c=xyz=swd,a,b,c=xyz=swd");
# test exit in begin block
exit(-3);
print "You should not see this line.";
}
END {
# test tolower() toupper()
result=tolower("ABC");
check_result(9,"abc");
result=toupper("def");
check_result(10,"DEF");
print ntests " tests run, " failed " failed."
}
function check_result(test,expected ) {
ntests++;
if (expected != result) {
failed++;
print "Test " test " Error: expected " expected " got " result
}
}
-------------------------------------------------------
Here is a gawk program that exercises fflush and delete array
BEGIN {
# test fflush
somefile="data.dat";
fflush();
fflush("");
fflush(somefile);
# test delete array
somearray["abc"] = 4;
delete somearray["abc"];
delete somearray;
}
Here are the diffs against perl-5.8.8
note - a2p.c diffs not included, since you should just type "make
run_byacc" to make a2p.c
diff -ruN perl-5.8.8/x2p/a2p.h perl-5.8.8.orig/x2p/a2p.h
--- perl-5.8.8/x2p/a2p.h 2008-04-21 16:51:58.000000000 -0700
+++ perl-5.8.8.orig/x2p/a2p.h 2005-11-07 04:53:14.000000000 -0800
@@ -238,10 +238,6 @@
#define ORETURN 86
#define ODEFINED 87
#define OSTAR 88
-#define OTOUPPER 89
-#define OTOLOWER 90
-#define ODELETEARRAY 91
-#define OFFLUSH 92
#ifdef DOINIT
char *opname[] = {
@@ -334,11 +330,7 @@
"RETURN",
"DEFINED",
"STAR",
- "TOUPPER",
- "TOLOWER",
- "DELETEARRAY",
- "FFLUSH",
- "93"
+ "89"
};
#else
extern char *opname[];
@@ -353,7 +345,7 @@
#if defined(iAPX286) || defined(M_I286) || defined(I80286) /* 80286
hack */
#define OPSMAX (64000/sizeof(union u_ops)) /* approx. max segment
size */
#else
-#define OPSMAX 200000
+#define OPSMAX 50000
#endif /* 80286 hack */
EXT union u_ops ops[OPSMAX];
@@ -430,10 +422,8 @@
EXT bool saw_RS INIT(FALSE);
EXT bool saw_OFS INIT(FALSE);
EXT bool saw_ORS INIT(FALSE);
-EXT bool saw_fflush INIT(FALSE);
EXT bool saw_line_op INIT(FALSE);
EXT bool in_begin INIT(TRUE);
-EXT bool saw_end INIT(FALSE);
EXT bool do_opens INIT(FALSE);
EXT bool do_fancy_opens INIT(FALSE);
EXT bool lval_field INIT(FALSE);
diff -ruN perl-5.8.8/x2p/a2p.y perl-5.8.8.orig/x2p/a2p.y
--- perl-5.8.8/x2p/a2p.y 2008-04-20 15:59:25.000000000 -0700
+++ perl-5.8.8.orig/x2p/a2p.y 2005-09-21 06:38:24.000000000 -0700
@@ -58,7 +58,7 @@
;
end : END '{' maybe states '}'
- { ends = oper3(OJUNK,ends,$3,$4); saw_end = TRUE; $$ = Nullop; }
+ { ends = oper3(OJUNK,ends,$3,$4); $$ = Nullop; }
| end NEWLINE
{ $$ = $1; }
;
@@ -396,8 +396,6 @@
{ $$ = oper1(ORETURN,$2); }
| DELETE VAR '[' expr_list ']'
{ $$ = oper2(ODELETE,aryrefarg($2),$4); }
- | DELETE VAR
- { $$ = oper1(ODELETEARRAY,aryrefarg($2)); }
;
redir : '>' %prec FIELD
diff -ruN perl-5.8.8/x2p/a2py.c perl-5.8.8.orig/x2p/a2py.c
--- perl-5.8.8/x2p/a2py.c 2008-04-20 16:17:26.000000000 -0700
+++ perl-5.8.8.orig/x2p/a2py.c 2005-11-07 04:53:14.000000000 -0800
@@ -523,6 +523,7 @@
if (strEQ(d,"else"))
XTERM(ELSE);
if (strEQ(d,"exit")) {
+ saw_line_op = TRUE;
XTERM(EXIT);
}
if (strEQ(d,"exp")) {
@@ -556,10 +557,6 @@
}
ID(tokenbuf);
}
- if (strEQ(d,"fflush")) {
- yylval = OFFLUSH;
- XTERM(FUN1);
- }
if (strEQ(d,"for"))
XTERM(FOR);
else if (strEQ(d,"function"))
@@ -781,14 +778,6 @@
ID(d);
case 't': case 'T':
SNARFWORD;
- if (strEQ(d,"toupper")) {
- yylval = OTOUPPER;
- XTERM(FUN1);
- }
- if (strEQ(d,"tolower")) {
- yylval = OTOLOWER;
- XTERM(FUN1);
- }
if (strEQ(d,"tr"))
*d = toUPPER(*d);
else if (strEQ(d,"tell"))
diff -ruN perl-5.8.8/x2p/walk.c perl-5.8.8.orig/x2p/walk.c
--- perl-5.8.8/x2p/walk.c 2008-04-22 13:08:10.000000000 -0700
+++ perl-5.8.8.orig/x2p/walk.c 2005-11-07 04:53:14.000000000 -0800
@@ -18,9 +18,6 @@
bool saw_FNR = FALSE;
bool saw_argv0 = FALSE;
bool saw_fh = FALSE;
-
-int re_split;
-int re_build_line;
int maxtmp = 0;
char *lparen;
char *rparen;
@@ -148,8 +145,6 @@
}
else if (old_awk)
str_cat(str,"while (<>) { } # (no line actions)\n");
- if (saw_end)
- str_cat(str,"\nendlabel:\n");
if (ops[node+4].ival) {
realexit = TRUE;
str_cat(str,"\n");
@@ -213,42 +208,7 @@
sub Pick {\n\
local($mode,$name,$pipe) = @_;\n\
$fh = $name;\n\
- if (! $opened{$name}) {\n\
- open($name,$mode.$name.$pipe);\n\
- $opened{$name} = 1;\n\
- if (substr($mode,1,1) eq \">\" or substr($mode,1,1) eq \"|\") {\n\
- $opened_for_output{$name} = 1;\n\
- }\n\
- }\n\
-}\n\
-");
- }
- if (saw_fflush) {
- str_cat(str,"\n\
-sub Fflush {\n\
- $nargs = scalar(@_);\n\
- if ($nargs == 0) {\n\
- select STDOUT;\n\
- $| = 1;\n\
- $| = 0;\n\
- }\n\
- else {\n\
- $fh = shift;\n\
- if ($fh eq '') {\n\
- for $fh (keys %opened_for_output) {\n\
- select $fh;\n\
- $| = 1;\n\
- $| = 0;\n\
- }\n\
- select STDOUT;\n\
- }\n\
- else {\n\
- select $fh;\n\
- $| = 1;\n\
- $| = 0;\n\
- select STDOUT;\n\
- }\n\
- }\n\
+ open($name,$mode.$name.$pipe) unless $opened{$name}++;\n\
}\n\
");
}
@@ -490,10 +450,7 @@
break;
case OASSIGN:
prec = P_ASSIGN;
- re_split = re_build_line = 0;
str = walk(0,level,ops[node+2].ival,&numarg,prec+1);
- if (strEQ(str->str_ptr,"$_")) re_split = 1;
- if (strncmp(str->str_ptr,"$Fld[",5) == 0) re_build_line = 1;
str_cat(str," ");
tmpstr = walk(0,level,ops[node+1].ival,&numarg,P_MIN);
str_scat(str,tmpstr);
@@ -506,16 +463,6 @@
numeric |= numarg;
if (strEQ(str->str_ptr,"$/ = ''"))
str_set(str, "$/ = \"\\n\\n\"");
- // if awk $0 is lval, have to re-build fields using current FS, and $_
- if (re_split) {
- str_cat(str,";\n\
-if ($FS eq ' ') { @Fld = split(' ', $_, -1) }\n\
-else { @Fld = split(/$FS/, $_, -1) }");
- }
- // if awk $1..n is lval, have to re-build $0 using current $, and @Fld
- if (re_build_line) {
- str_cat(str,";\n$_ = join (\"$,\", @Fld)");
- }
break;
case OADD:
prec = P_ADD;
@@ -745,13 +692,11 @@
str_free(fstr);
}
else if (const_FS) {
- if (const_FS == ' ')
- sprintf(tokenbuf,"' '");
- else sprintf(tokenbuf,"/[%c\\n]/",const_FS);
+ sprintf(tokenbuf,"/[%c\\n]/",const_FS);
str_cat(str,tokenbuf);
}
else if (saw_FS)
- str_cat(str,"($FS eq ' ') ? '[ \t]+' : $FS");
+ str_cat(str,"$FS");
else {
str_cat(str,"' '");
limit = ")";
@@ -972,14 +917,10 @@
s = "\"";
*d++ = *t++ + 128;
switch (*t) {
- case '0': case '1': case '2': case '3': case '4': case '5':
case '6': case '7':
- break;
- case 'a': case 'b': case 'f': case 'r': case 'v': case 'x':
- break;
case '\\': case '"': case 'n': case 't': case '$':
break;
default: /* hide this from perl */
- *d++ = '\\' + (char)128;
+ *d++ = '\\' + (char)128;
}
}
*d = *t + 128;
@@ -1000,64 +941,6 @@
str = str_new(0);
str_set(str,"delete $");
goto addvar;
- case ODELETEARRAY:
- str = str_new(0);
- str_set(str,"%");
- str_scat(str,tmpstr=walk(1,level,ops[node+1].ival,&numarg,P_MIN));
- if (len == 1) {
- tmp2str = hfetch(symtab,tmpstr->str_ptr);
- if (tmp2str && atoi(tmp2str->str_ptr))
- numeric = 2;
- if (strEQ(str->str_ptr,"$FNR")) {
- numeric = 1;
- saw_FNR++;
- str_set(str,"($.-$FNRbase)");
- }
- else if (strEQ(str->str_ptr,"$NR")) {
- numeric = 1;
- str_set(str,"$.");
- }
- else if (strEQ(str->str_ptr,"$NF")) {
- numeric = 1;
- str_set(str,"$#Fld");
- }
- else if (strEQ(str->str_ptr,"$0"))
- str_set(str,"$_");
- else if (strEQ(str->str_ptr,"$ARGC"))
- str_set(str,"($#ARGV+1)");
- }
- else {
-#ifdef NOTDEF
- if (curargs) {
- sprintf(tokenbuf,"$%s,",tmpstr->str_ptr);
- ??? if (instr(curargs->str_ptr,tokenbuf))
- str_cat(str,"\377"); /* can't translate yet */
- }
-#endif
- str_cat(tmpstr,"[]");
- tmp2str = hfetch(symtab,tmpstr->str_ptr);
- if (tmp2str && atoi(tmp2str->str_ptr))
- str_cat(str,"[");
- else
- str_cat(str,"{");
- str_scat(str,fstr=walk(1,level,ops[node+2].ival,&numarg,P_MIN));
- str_free(fstr);
- if (strEQ(str->str_ptr,"$ARGV[0")) {
- str_set(str,"$ARGV0");
- saw_argv0++;
- }
- else {
- if (tmp2str && atoi(tmp2str->str_ptr))
- strcpy(tokenbuf,"]");
- else
- strcpy(tokenbuf,"}");
- *tokenbuf += (char)128;
- str_cat(str,tokenbuf);
- }
- }
- str_free(tmpstr);
- str_cat(str,"=()");
- break;
case OSTAR:
str = str_new(0);
str_set(str,"*");
@@ -1239,8 +1122,8 @@
str_cat(str,tokenbuf);
}
else {
- sprintf(tokenbuf,"delete $opened{%s} && close(%s);\ndelete
$opened_for_output{%s}",
- tmpstr->str_ptr, tmpstr->str_ptr, tmpstr->str_ptr);
+ sprintf(tokenbuf,"delete $opened{%s} && close(%s)",
+ tmpstr->str_ptr, tmpstr->str_ptr);
str_free(tmpstr);
str_set(str,tokenbuf);
}
@@ -1391,55 +1274,6 @@
case OSQRT:
str = str_make("sqrt(");
goto maybe0;
- case OTOUPPER:
- str = str_make("uc(");
- goto maybe0;
- case OTOLOWER:
- str = str_make("lc(");
- goto maybe0;
- case OFFLUSH:
- saw_fflush = TRUE;
- str = str_make("&Fflush(");
- numeric = 1;
- if (len > 0)
- tmpstr = walk(1,level,ops[node+1].ival,&numarg,P_MIN);
- else
- tmpstr = str_new(0);
- if (!tmpstr->str_ptr || !*tmpstr->str_ptr) {
- if (lval_field) {
- t = (char*)(saw_OFS ? "$," : "' '");
- if (split_to_array) {
- sprintf(tokenbuf,"join(%s,@Fld)",t);
- str_cat(tmpstr,tokenbuf);
- }
- else {
- sprintf(tokenbuf,"join(%s, ",t);
- str_cat(tmpstr,tokenbuf);
- for (i = 1; i < maxfld; i++) {
- if (i <= arymax)
- sprintf(tokenbuf,"$%s,",nameary[i]);
- else
- sprintf(tokenbuf,"$Fld%d,",i);
- str_cat(tmpstr,tokenbuf);
- }
- if (maxfld <= arymax)
- sprintf(tokenbuf,"$%s)",nameary[maxfld]);
- else
- sprintf(tokenbuf,"$Fld%d)",maxfld);
- str_cat(tmpstr,tokenbuf);
- }
- }
- }
- if (strEQ(tmpstr->str_ptr,"$_")) {
- if (type == OLENGTH && !do_chop) {
- str = str_make("(length(");
- str_cat(tmpstr,") - 1");
- }
- }
- str_scat(str,tmpstr);
- str_free(tmpstr);
- str_cat(str,")");
- break;
case OINT:
str = str_make("int(");
maybe0:
@@ -1508,20 +1342,14 @@
}
else {
if (len == 1) {
- if (saw_end) {
- str_set(str,"$ExitValue = ");
- exitval = TRUE;
- }
- else {
- str_set(str,"exit ");
- }
+ str_set(str,"$ExitValue = ");
+ exitval = TRUE;
str_scat(str,
fstr=walk(1,level,ops[node+1].ival,&numarg,P_ASSIGN));
str_free(fstr);
str_cat(str,"; ");
}
- if (saw_end)
- str_cat(str,"goto endlabel");
+ str_cat(str,"last line");
}
break;
case OCONTINUE:
@@ -1761,41 +1589,33 @@
emit_split(register STR *str, int level)
{
register int i;
- STR *tmpstr;
- tmpstr = str_new(0);
if (split_to_array)
- str_cat(tmpstr,"@Fld");
+ str_cat(str,"@Fld");
else {
- str_cat(tmpstr,"(");
+ str_cat(str,"(");
for (i = 1; i < maxfld; i++) {
if (i <= arymax)
sprintf(tokenbuf,"$%s,",nameary[i]);
else
sprintf(tokenbuf,"$Fld%d,",i);
- str_cat(tmpstr,tokenbuf);
+ str_cat(str,tokenbuf);
}
if (maxfld <= arymax)
sprintf(tokenbuf,"$%s)",nameary[maxfld]);
else
sprintf(tokenbuf,"$Fld%d)",maxfld);
- str_cat(tmpstr,tokenbuf);
- }
- if (const_FS) {
- if (const_FS == ' ')
- sprintf(tokenbuf,"%s = split(' ', $_, -1)",tmpstr->str_ptr);
- else sprintf(tokenbuf,"%s = split(/[%c\\n]/, $_,
-1);\n",tmpstr->str_ptr,const_FS);
str_cat(str,tokenbuf);
}
- else if (saw_FS) {
- sprintf(tokenbuf,"if ($FS eq ' ') { %s = split(' ', $_, -1) }\n\
- else { %s = split(/$FS/, $_, -1) }\n", tmpstr->str_ptr,
tmpstr->str_ptr);
+ if (const_FS) {
+ sprintf(tokenbuf," = split(/[%c\\n]/, $_, -1);\n",const_FS);
str_cat(str,tokenbuf);
}
+ else if (saw_FS)
+ str_cat(str," = split($FS, $_, -1);\n");
else
str_cat(str," = split(' ', $_, -1);\n");
tab(str,level);
- free(tmpstr);
}
int
@@ -2072,7 +1892,6 @@
break;
case ODEFINED:
case ODELETE:
- case ODELETEARRAY:
case OSTAR:
case OVAR:
prewalk(0,level,ops[node+1].ival,&numarg);
@@ -2151,12 +1970,6 @@
goto maybe0;
case OSQRT:
goto maybe0;
- case OTOUPPER:
- goto maybe0;
- case OTOLOWER:
- goto maybe0;
- case OFFLUSH:
- goto maybe0;
case OINT:
maybe0:
numeric = 1;
That's all folks.
Thread Next
-
patch fixes several problems in a2p
by Gregg Weber