develooper Front page | perl.perl6.internals | Postings from July 2002

Re: Perl6 grammar (take IV)

Thread Previous | Thread Next
From:
Sean O'Rourke
Date:
July 6, 2002 12:50
Subject:
Re: Perl6 grammar (take IV)
Message ID:
Pine.GSO.4.33.0207061227140.23583-200000@beowulf.ucsd.edu
use Data::Dumper;
use Getopt::Long;
use strict;
$Data::Dumper::Terse = 1;
$Data::Dumper::Indent = 1;
use Term::ReadLine;

######################################################################
# Argument context for functions and control structures
######################################################################

%::WANT = ();

##############################
# Functions (list operators):
# XXX: many of these need their own special want_* rules
my $FUNCTION_ARGS = 'maybe_comma';

my @builtin_funcs = qw(crypt index pack rindex sprintf substr
		       join unpack split
		       push unshift splice
		       warn die print printf read select syscall sysread
		       sysseek syswrite truncate write
		       vec
		       chmod chown fcntl ioctl link open opendir
		       rename symlink sysopen unlink
		       return fail
		       not);
@::WANT{@builtin_funcs} = ($FUNCTION_ARGS) x @builtin_funcs;

sub ::add_function {
    my $fname = shift->[1];
    $::WANT{$fname} = shift || $FUNCTION_ARGS;
    1;
}

##############################
# Loop control
my @loop_control = qw(redo last next continue);
@::WANT{@loop_control} = ('maybe_namepart') x @loop_control;

##############################
# Unary operators
# XXX: need to handle default $_
my @unary_ops = qw(chop chomp chr hex lc lcfirst length
		   ord reverse uc ucfirst
		   abs atan2 cos exp hex int log oct rand sin sqrt srand
		   pop shift
		   delete each exists keys values
		   defined undef
		   chdir chroot glob mkdir rmdir stat umask
		   close);
@::WANT{@unary_ops} = ('prefix') x @unary_ops;

##############################
# Control operators
my @control = qw(for given when default if elsif else grep map);
@::WANT{@control} = map { "want_for_$_" } @control;

##############################
# Named blocks
my @special_blocks = qw(CATCH BEGIN END INIT AUTOLOAD
			PRE POST NEXT LAST FIRST
			try do);
@::WANT{@special_blocks} = ('closure') x @special_blocks;

##############################
# Classes (builtin and otherwise)
%::CLASSES = ();
my @builtin_types = qw(int real str HASH ARRAY SCALAR
		       true false); # XXX: these are really constants
@::CLASSES{@builtin_types} = @builtin_types;

sub ::add_class {		# seen class.
    my $c = shift->[1];
    $::CLASSES{$c} = $c;
    1;
}

# HACK to distinguish between "my ($a, $b) ..." and "foo ($a, $b)".
# Don't need all keywords in here, but only the ones that cause
# problems.
%::KEYWORDS = ();
@::KEYWORDS{qw(my our temp)} = 1;

# (see Parse::RecDescent::Consumer)
sub ::consumer {
    my $t = shift;
    my $old_len = length $t;
    return sub {
	my $len = length($_[0]);
	return substr($t, 0, ($old_len - $len));
    };
}

my %since;

sub ::check_end {
    my ($type, $text) = @_;
    if ($since{$type}) {
	local $_ = $since{$type}->($text);
	return m/\A[\s\n]+\z/ || undef;
    }
    return undef;
}

sub ::mark_end {
    my ($type, $text) = @_;
    $since{$type} = ::consumer($text);
}

######################################################################
my $literals = <<'END';
sv_literal:	  /(?:\d+(?:\.\d+)?|\.\d+)(?:[Ee]-?\d+)?/
		| '{' <commit> hv_seq '}'
		| '[' <commit> av_seq(?) ']'
		| <perl_quotelike>

av_seq:		  semi /[;,]?/
av_literal:	  '(' av_seq(?) ')'

hv_seq:		  <leftop: pair ',' pair> /,?/
hv_literal:	  '(' hv_seq ')'
END

######################################################################
$::NAMEPART = qr/[a-zA-Z_][\w_]*/;
my $variables = <<'END';
variable:	  sigil <skip:''> varname

sigil:		  /[\@\%\$\&]/

varname:	  name
		| /\d+/
		| /[\!_]/
		| '^' <commit> namepart
		| ('*')(?) '{' <skip:'\s*'> (scalar_expr | name) '}'

name:		  /(?:::|\.|\*)?$::NAMEPART(::$::NAMEPART)*/o

namepart:	  /$::NAMEPART/o

END

######################################################################
$::COMPARE = qr{cmp | eq | [gnl]e | [gl]t
		| <=> | [<>=!]= | < | > }x;
$::CONTEXT = qr{[\%\@\$\&*_?] | \+(?!\+)}x;
$::MULDIV = qr{[\%*x] | /(?!/)}x;
$::PREFIX = qr{[!~\\] | -(?![->])}x;
$::ADDSUB = qr{[-+_]};
$::LOG_OR = qr{x?or | err}x;
$::LOGOR = qr{\|\| | ~~ | //}x;
$::FILETEST = qr{-[rwxoRWXOezsfdlpSbctugkTBMAC]+};
$::ASSIGN = qr{(?:
	       ! | :			# != and :=
	       | //			# defined
	       | &&? | \|\|? | ~~?      # Logical and bitwise operators
	       | << | >>		# bitshifts
	       | $::ADDSUB
	       | $::MULDIV
	       | \*\*			# pow
	      )?
	       =}x;
my $operators = <<'END';
hype:		  '^' <commit> <skip:''> <matchrule:$arg[0]>
		| <matchrule:$arg[0]>

maybe_namepart:	  namepart |
maybe_comma:	  comma[$arg[0]] | 	

hv_indices:	  /[\w_]+/ | comma

arglist:	  '(' comma(?) ')'

subscript:	  <skip:''> '{' <commit> <skip:$item[1]> hv_indices '}'
		| <skip:''> '[' <commit> <skip:$item[1]> av_seq ']'
		| '(' comma(?) ')'

subscriptable:	  name <commit>
			{ exists($::KEYWORDS{$item[1][1]}) ? undef : 1 }
			arglist
		| '.' <commit> <skip:''> namepart(?)
		| '(' <commit> av_seq(?) ')'
		| variable

context:	  /$::CONTEXT/o
# context:	  '%' | '@' | '$' | '&' | '*' | '_' | '?'
# 		| /\+(?!\+)/ # numeric context...

term:		  '<' <commit> expr(?) '>'
		| subscriptable <commit> subscript(s?)
		| /$::CONTEXT/o <commit> term
		| sv_literal
		| class
		| closure

apply_rhs:	  namepart <commit> subscript(s?)
		| subscript(s)

apply:		  <leftop: term hype['apply_op'] apply_rhs>
apply_op:	  '.'

incr:		  hype['incr_op'] <commit> apply
		| apply hype['incr_op'](?)
incr_op:	  '++' | '--'

pow:		  <leftop: incr hype['pow_op'] prefix>
pow_op:		  '**'

prefix:		  filetest_op <commit> prefix
		| hype['prefix_op'] <commit> prefix
		| name { $::WANT{$item[1][1]} } <matchrule:$item[2]>
		| pow

# prefix_op:	  '!' | '~' | '\\' | /-(?![->])/
prefix_op:	  /$::PREFIX/o
filetest_op:	  /$::FILETEST/o

pair:		  namepart '=>' <commit> prefix
		| prefix '=>' prefix
maybe_pair:	  namepart '=>' <commit> prefix
		| prefix ('=>' prefix)(?)

match:		  <leftop: maybe_pair hype['match_op'] maybe_pair>
match_op:	  '=~' | '!~'

muldiv:		  <leftop: match hype['muldiv_op'] match>
# muldiv_op:	  '*' | '/' | '%' | 'x'
muldiv_op:	  /$::MULDIV/o

addsub:		  <leftop: muldiv hype['addsub_op'] muldiv>
# addsub_op:	  '+' | '-' | '_'
addsub_op:	  /$::ADDSUB/o

bitshift:	  <leftop: addsub hype['bitshift_op'] addsub>
bitshift_op:	  '<<' | '>>'

compare:	  <leftop: bitshift hype['compare_op'] bitshift>
compare_op:	  /$::COMPARE/o
# compare_op:	  '<=>' | '<=' | '==' | '>=' | '<' | '>' | '!='
# 		| 'eq' | 'ge' | 'ne' | 'le' | 'lt' | 'gt' | 'cmp'

bitand:		  <leftop: compare hype['bitand_op'] compare>
bitand_op:	  '&'

bitor:		  <leftop: bitand hype['bitor_op'] bitand>
bitor_op:	  '|' | '~'

logand:		  <leftop: bitor hype['logand_op'] bitor>
logand_op:	  '&&'

logor:		  <leftop: logand hype['logor_op'] logand>
# logor_op:	  '||' | '~~' | '//'
logor_op:	  /$::LOGOR/o

range:		  logor (range_op logor)(?)
range_op:	  '..'

ternary:	  range ('??' ternary '::' ternary)(?)

scope:		  'my' | 'temp' | 'our'
class:		  name { $::CLASSES{$item[1][1]} }
			{ bless ['class', $item[1]], 'Perl6::class' }

scope_class:	  scope <commit> class(?)
		| class

property:	  name { ($item[1][1] ne $arg[0]) || undef }
			arglist(?)

and_prop:	  "$arg[0]" <commit> property[$arg[0]]
		| property[$arg[0]]
props:		  "$arg[0]" <commit> property and_prop[$arg[0]](s?)
		| # nothing
decl:		  '(' <commit> <leftop: variable ',' variable> ')' props['are']
		|  variable props['is']

assign:		  assign_lhs assign_rhs(s?)

assign_lhs:	  scope_class decl
		| ternary
assign_rhs:	  hype['assign_op'] scalar_expr

assign_op:	  /$::ASSIGN/o
# assign_op:	  /[!:]?=/ <commit>
# 		| assignable_op <skip:''> '='
# assignable_op:	  '//'
# 		| logand_op | logor_op
# 		| bitand_op | bitor_op | bitshift_op
# 		| addsub_op | muldiv_op | pow_op

scalar_expr:	  assign but(s?)
but:		  'but' assign

comma:		  <leftop: <matchrule:@{[$arg[0] || 'scalar_expr']}>
			comma_op <matchrule:@{[$arg[0] || 'scalar_expr']}> >
comma_op:	  ','

semi:		  <leftop: comma semi_op comma>
semi_op:	  ';'

adverb:		  scalar_expr adv_clause(?)
adv_clause:	  /:(?!:)/ comma['scalar_expr']

log_AND:	  <leftop: adverb hype['log_AND_op'] adverb>
log_AND_op:	  'and'

log_OR:		  <leftop: log_AND hype['log_OR_op'] log_AND>
# log_OR_op:	  'or' | 'xor' | 'err'
log_OR_op:	  /$::LOG_OR/o

expr:		  log_OR

END

######################################################################
my $declarations = <<'END';
params:		  '(' (_params ',')(?) '*' <commit> '@' namepart ')'
		| '(' <commit> _params(?) (';' _params)(?) ')'
		| # nothing

_params:	  <leftop: _param ',' _param>
_param:		  scope_class(?) variable props['is'] initializer(?)
initializer:	  hype['assign_op'] expr

END

######################################################################
my $statements = <<'END';
prog:		  /\A/ stmts /\z/
		| <error>

stmts:	  	  <leftop: stmt stmt_sep stmt> stmt_sep(?)
		| # nothing

stmt_sep:	  ';'
		| { ::check_end('block', $text) }
		| { ::check_end('label', $text) }

stmt:		  namepart ':' { ::mark_end('label', $text);1 } ''
		| directive <commit> name comma(?)
		| 'method' <commit> name params props['is'] block
		| 'loop' <commit>
			'(' scalar_expr ';' scalar_expr ';' scalar_expr ')'
			block
		| scope(?) 'sub' <commit> name { ::add_function($item[4]);1 }
			params props['is'] block
		| scope(?) 'class' <commit> name { ::add_class($item[4]);1 }
			props['is'] block
		| expr guard(?)

directive:	  'package' | 'module' | 'use'
guard:		  ('if' | 'unless' | 'while') <commit> scalar_expr
		|  'for' expr

block:		  start_block '...' <commit> '}'
			{ ::mark_end('block', $text);1; } ''
		| start_block stmts '}'
			{ ::mark_end('block', $text);1; } ''

start_block:	  <skip:''> /\s*(?<![^\n\s]){\s*/m

closure:	  '->' '(' <commit> _closure_args(?) ')' block
		| '->' <commit> _closure_args(?) block
		| block

_closure_args:	  <leftop: comma['variable'](?) ';' comma['variable']>
END

######################################################################
my $wants = <<'END';
want_for_for:	  av_seq closure
want_for_given:	  scalar_expr closure
want_for_when:	  comma closure
want_for_default: closure

want_for_if:	  scalar_expr closure elsif(s?) else(?)
elsif:		  'elsif' scalar_expr closure
else:		  'else' closure

want_for_grep:	  scalar_expr comma
want_for_map:	  scalar_expr comma
END

######################################################################
# Parse tree simplification:

sub preorder(&$) {			# walk parse tree pre-order
    my $f = shift;
    $f->($_[0]);
    if (UNIVERSAL::isa($_[0], 'ARRAY')) {
	preorder($f, $_) for @{$_[0]};
    }
}

sub postorder(&$) {			# walk post-order
    my $f = shift;
    if (UNIVERSAL::isa($_[0], 'ARRAY')) {
	postorder($f, $_) for @{$_[0]};
    }
    $f->($_[0]);
}

# postorder_filter BLOCK $tree, \@classes
#
#	call BLOCK on each node whose class is in \@classes, or on
#	every node if \@classes is not given.
#
sub postorder_filter(&$;$) {
    my ($f, $tree, $filter) = @_;
    my %doit;
    my $nofilter;
    if (ref $filter) {
	@doit{map { 'Perl6::'.$_ } @$filter} = (1)x@$filter;
    } else {
	$nofilter = 1;
    }
    postorder {
	if ($nofilter || exists $doit{ref($_[0])}) {
	    $f->($_[0]);
	}
    } $tree;
}

# remove_if BLOCK $tree, \@classes
#
#	call BLOCK on each node whose class is in \@classes, or on
#	every node if \@classes is not given.  If BLOCK returns true,
#	remove the node.
#
sub remove_if(&$;$) {
    my ($f, $tree, $filter) = @_;
    postorder_filter {
	my $x = $_[0];
	if (UNIVERSAL::isa($x, 'ARRAY')) {
	    my $i = 0;
	    while ($i <= $#{$x}) {
		if ($f->($x->[$i])) {
		    splice @$x, $i, 1;
		} else {
		    ++$i;
		}
	    }
	}
    } $tree, $filter;
}

sub remove_names {			# remove first item from each node.
    preorder {
	if(ref($_[0]) =~ /^Perl6::/) {
	    shift @{$_[0]};
	}
    } $_[0];
}

# A couple of specialized pruning functions:
sub Perl6::block::tidy {
    my $x = $_[0];
    my $y = $x->[2];
    splice @$x, 1, scalar(@$x), $y;
}

sub Perl6::and_prop::tidy {
    if (@{$_[0]} == 4) {
	my $y = $_[0][-1];
	splice @{$_[0]}, 1, scalar(@{$_[0]}) - 1, $y;
    }
}

sub prune {
    my $tree = $_[0];
    
    # Custom filtering first:
    preorder {
	$_[0]->tidy if UNIVERSAL::can($_[0], 'tidy')
    } $tree;

    # Remove redundant names:
    remove_names $tree;

    # Elements we never care about:
    remove_if {
	(UNIVERSAL::isa($_[0], 'Perl6::stmt_sep') ||
	 $_[0] eq '\s*' ||
	 UNIVERSAL::isa($_[0], 'ARRAY') && @{$_[0]} == 0)
    } $tree;

    # Uninteresting literals:
    remove_if { !ref($_[0]) } $tree,
	[qw(arglist prog closure _param props)];

    # Ones from successful directives:
    remove_if { $_[0] == 1 } $tree,
	[qw(stmt stmts term class property scope_class incr
	    param params prefix subscript)];

    # flatten infix operators with single operands:
    postorder_filter {
	if (@{$_[0][0]} == 1) {
	    $_[0] = $_[0][0][0];
	} else {
	    splice @{$_[0]}, 0, 1, @{$_[0][0]};
	}
    } $tree, [qw(hv_seq apply pow match muldiv addsub bitshift compare
		 bitand bitor logand logor comma semi log_AND log_OR 
		 _params stmts _closure_args)];

    # Flatten final altnernative:
    postorder_filter {
	my $y = $_[0][-1];
	if (@{$_[0]} > 1 && UNIVERSAL::isa($y, 'ARRAY')) {
	    splice @{$_[0]}, -1, 1, @$y;
	}
    } $tree, [qw(assign props property _param scope_class)];

    # Flatten items that add no information:
    postorder_filter {
	$_[0] = $_[0][0] if @{$_[0]} == 1
    } $tree,
	[qw(sigil name namepart maybe_comma maybe_namepart
	    varname subscriptable term incr prefix maybe_pair
	    expr range ternary assign_lhs assign scalar_expr adverb
	    and_prop hype prog stmt)];
}

######################################################################
# Interaction
my %o;
(GetOptions(\%o, qw(dumper rule=s batch help cache trace silent no-hitem))
 && !$o{help})
    || die <<END;
Usage: $0 [options]
	--batch		read batch on STDIN, write to STDOUT
	--dumper	use Data::Dumper to generate output
	--rule NAME	start with rule NAME (default = 'prog')
	--cache		use precompiled grammar
	--silent	don't do any output (not too useful)
	--trace		set \$::RD_TRACE
	--no-hitem	don't keep track of \%item hash

    In interactive mode, output is terminated by a blank line.
END

$::RD_TRACE = $o{trace};
$::RD_NO_HITEM = $o{"no-hitem"};
$::rule = $o{rule} || 'prog';

my $parser;
my $gname = 'Perl6grammar';
$::RD_AUTOACTION = q { bless [@item], 'Perl6::'.$item[0] };

if ($o{cache} && eval("require $gname")) {
    $parser = eval "new $gname" or die "$gname: $@";
} else {
    print STDERR "Constructing parser...";
    use Parse::RecDescent;
    my $grammar = <<'END';
{
    $SIG{__DIE__} = sub { use Carp 'confess'; confess @_ };
}
END
    $grammar .= $variables
		 .$literals
		 .$operators
		 .$declarations
		 .$statements
		 .$wants;
    if ($o{cache}) {
	Parse::RecDescent->Precompile($grammar, $gname);
	eval "require $gname" or die $@;
	$parser = new $gname;
    } else {
	$parser = new Parse::RecDescent($grammar);
    }
    print STDERR "done\n";
}

sub as_sexp {
    my $x = shift;
    if (UNIVERSAL::isa($x, 'ARRAY')) {
	(my $t = ref($x)) =~ s/.*:://;
	return '('.join(' ', $t, map { as_sexp($_) } @$x).')';
    } else {
	return $x;
    }
}

sub pretty {
    my $x = shift;
    prune($x);
    if ($o{dumper}) {
	Dumper($x);
    } else {
	as_sexp($x);
    }
}

my $in = '';
if ($o{batch}) {
    local $/ = undef;
    $in = <STDIN>;
    my $result = $parser->$::rule($in);
    print STDERR "done\n";
    print pretty($result) unless $o{silent};
    exit;
}

my $term = new Term::ReadLine;
my $prompt = '> ';
while (defined(my $l = $term->readline($prompt))) {
    if ($in =~ /^:(.*)/) {
	print eval $1, "\n";
	$in = '';
	next;
    }
    unless ($l =~ /^$/) {
	$in .= "$l\n";
	$prompt = '? ';
	next;
    }
    print "as $::rule:\n";
    my $result = $parser->$::rule($in);
    print STDERR "done\n";
    if ($result) {
	print pretty($result);
    } else {
	print "parse error\n";
    }
    print "\n";
    $in = '';
    $prompt = '> ';
}

Thread Previous | Thread Next


nntp.perl.org: Perl Programming lists via nntp and http.
Comments to Ask Bjørn Hansen at ask@perl.org | Group listing | About