#!/usr/bin/perl # Multi-purpose, GRASS 4.x Site_list program. Based on the perl # module Parse::RecDescent (a recursive descent parser generator). # # Basically, a language for describing a GRASS 4.x site_list file # is used for working with GRASS 4.x site_list files. # # -v Vet - just determine if file is a proper site_list # -i Description field has integer data (can be used with -v) # -s Description field is string (can be used with -v) # -S Strict interpretation # -d If re-writing data, discard anything which doesn't fit. # otherwise, if gets appended to the description line. # If the name listed on name line doesn't match file name, # the old_name gets put on desc line and name field is updated. # -t Run internal tests. # -p Print grammar and exit. # # The language doesn't handle blank lines (only end-of-line), so # the file reading code skips those. Everything else gets parsed. #I can be reached at #Gordon Haverland my $Strict = 0; use Getopt::Std; getopts('dDhipsStv'); &usage() if( $opt_h ); &usage('Not both -i and -s') if( $opt_i && $opt_s ); $Strict = 1 if( $opt_S ); # **************************************************** # We are going to build a grammar for parsing Grass 4.x # site_list files from pieces, depending on intent. # The parser is built by the perl Parse::RecDescent module # based on the grammar we build up. use Parse::RecDescent; &init_grammar_elements(); my( @easting, @northing, @description, %hdr, @no_match ); my $dline = 0; my $i_dline = 0; my $s_dline = 0; my $G4_SL_Grammar = ''; # Okay, assemble our grammar $G4_SL_Grammar = &build_grammar(); if( $opt_p ) { print $G4_SL_Grammar; exit 0; } if( $opt_D ) { $::RD_HINT++; $::RD_ERRORS++; $::RD_WARN++; $::RD_TRACE++; #$Parse::RecDescent::skip = ''; # Don't skip leading whitespace. #$::RD_++; #$::RD_++; } # Initialize a Recursive Descent parser for the above grammar $parser = Parse::RecDescent->new( $G4_SL_Grammar ); &test() if( $opt_t ); # Are we reading a file be name, or just reading STDIN? if( $#ARGV == -1 ) { $input = '-'; } else { $input = $ARGV[0]; &usage() unless( (-e $input) && (-r $input) ); # ($atime, $mtime) = (stat($input))[8,9]; # utime( $atime, $mtime, $output_file ); } open( INPUT, "< $input" ) || die "Error: can't open $input for read\n"; if( $opt_v ) { &vet(); } else { &rewrite(); } sub init_grammar_elements { # Strictly speaking, the two choices of labels must be lower case. # We can allow any case $G4_Site_List_Grammar{Header_Label_Strict} = q{ hdr_label : /(name|desc)/ }; $G4_Site_List_Grammar{Header_Label} = q{ hdr_label : /(name|desc)/i }; # The two coordinates (easting and northing) can be integer or # float (will be promoted to %12.3f floats if written). Description # fields in header or data just match anything. $G4_Site_List_Grammar{Components} = q{ integer : /[-+]?\\d+/ float : /[-+]?((\\d+\\.\\d*)|(\\d*\\.\\d+))/ description : /.*/ blank : /^\\s*$/ comment : /^\\s*\\#.*$/ coord : float | integer easting : coord northing : coord }; $G4_Site_List_Grammar{Separator} = q{ separator : '|' }; # If you are looking for either Integer_Data point records or # String_Data point records, Integer must come first in parser # grammar. First match is best. $G4_Site_List_Grammar{Integer_Data} = q{ point_record : easting separator northing separator '#' integer description { #PLACEHOLDER $main::dline++; $main::i_dline++; 1; } }; $G4_Site_List_Grammar{String_Data} = q{ point_record : easting separator northing separator description { #PLACEHOLDER $main::dline++; $main::s_dline++; 1; } }; # Comments are allowed (and ignored) by GRASS functions. $G4_Site_List_Grammar{Comment} = q{ comment_line : blank | comment }; # To Vet structure, we need (at most) a single instance of a # name| or desc| line, before the data. This requires # keeping track of what labels have been seen, and incrementing # the number of data lines seen. Returning undef allows parser # to fail. $G4_Site_List_Grammar{Header_Vet} = q{ hdr_line : hdr_label separator description { if( exists( $main::hdr{lc($item[1])} ) ) { print STDERR "Error: GRASS Site_list cannot have multiple $item[1] \\n"; undef ; } else { $main::hdr{lc($item[1])} = $item[-1] ; if( $main::dline > 0 ) { undef ; } else { 1; } } } }; # If we need a sink for miscellaneous lines (usually seen to be # just after the desc line (extraneous carriage returns)). $G4_Site_List_Grammar{Everything_Else} = q{ no_match : description { push( @main::no_match, sprintf('%d %s', $thisline, $item[1] ) ); 1; } }; # This is the starting point for parsing the file. $G4_Site_List_Grammar{Start} = q{ input : point_record | hdr_line | comment_line }; # As above, but with sink added. $G4_Site_List_Grammar{Start_With_Everything} = q{ input : point_record | hdr_line | comment_line | no_match }; } sub build_grammar { my $G4_SL_Grammar; if( $opt_S ) { $G4_SL_Grammar .= $G4_Site_List_Grammar{Header_Label_Strict}; } else { $G4_SL_Grammar .= $G4_Site_List_Grammar{Header_Label}; } $G4_SL_Grammar .= $G4_Site_List_Grammar{Separator}; $G4_SL_Grammar .= $G4_Site_List_Grammar{Components}; if( $opt_i ) { $string = $G4_Site_List_Grammar{Integer_Data}; } else { $string = $G4_Site_List_Grammar{String_Data}; } if( $opt_v ) { $replace = ''; } else { if( $opt_i ) { $replace = q{ push( @main::easting, $item[1] ); push( @main::northing, $item[3] ); push( @main::description, sprintf('%10d %s', $item[6], $item[7]) ); }; } else { $replace = q{ push( @main::easting, $item[1] ); push( @main::northing, $item[3] ); push( @main::description, $item[5] ); }; } } $PLACEHOLDER = ' #PLACEHOLDER'; $string =~ s/$PLACEHOLDER/$replace/; $G4_SL_Grammar .= $string; $G4_SL_Grammar .= $G4_Site_List_Grammar{Comment}; $G4_SL_Grammar .= $G4_Site_List_Grammar{Header_Vet}; if( $opt_v ) { $G4_SL_Grammar .= $G4_Site_List_Grammar{Start}; } else { $G4_SL_Grammar .= $G4_Site_List_Grammar{Everything_Else}; $G4_SL_Grammar .= $G4_Site_List_Grammar{Start_With_Everything}; } return( $G4_SL_Grammar ); } sub process_line { my $input = shift @_ || ''; $parser->input( $input ) or return undef; return 1; } sub usage { while( @_ ) { print STDERR "$_[0]\n"; shift( @_ ); } print STDERR <<' EOS'; Usage: $0 [-v] [-i|s] [-S] [-d] [-t] -v Vet the file to see if it is a GRASS 4.x site_list -S Strict interpretation of site_list -d If re-writing file, discard non-matching stuff. -i File has integer description field -s File has string description field (anything) -t Run internal tests EOS ; exit 1; } sub test { my( @hdr, @comment, @point); &init_tests(); foreach (@hdr, @comment, @point) { print "$_\n"; if( &process_line( $_ ) ) { print "Legal\n"; } else { print "Not legal\n"; } if( $opt_t ) { if( exists( $main::hdr{name} ) ) { print "name|$main::hdr{name}\n"; delete( $main::hdr{name} ); } if( exists( $main::hdr{desc} ) ) { print "desc|$main::hdr{desc}\n"; delete( $main::hdr{desc} ); } print "\n"; } } } sub init_tests { # **************** GRASS-4.x ******************* # Under GRASS-4.x, we can have (1 or) 2 (optional) header lines # name | or desc | $hdr[0] = 'name|some_file_name'; $hdr[1] = 'desc|some_description'; $hdr[2] = 'name |some_file_name'; $hdr[3] = 'desc |some_description'; $hdr[4] = 'Name|some_file_name'; $hdr[5] = 'DESC|some_description'; $hdr[6] = 'naMe|some_file_name'; $hdr[7] = 'DESC |some_description'; $hdr[8] = 'NAME |some_file_name'; $hdr[9] = 'deSc |some_description'; $hdr[10] = 'name:some_file_name'; $hdr[11] = 'desc:some_description'; # We are allowed to kinds of "comment lines", blank lines or # lines starting with a "#". $comment[0] = ''; $comment[1] = ' '; $comment[2] = ' '; $comment[3] = '#'; $comment[4] = '# '; $comment[5] = '# '; $comment[6] = ' #'; $comment[7] = ' # '; $comment[8] = ' #'; $comment[9] = ' # '; $comment[10] = ' # '; # Remaining lines are called "Point Records", and are of the # format east | north | description # Iff the description field starts with a "#", what follows # is read as an integer (no rounding, converssion stops at # non-numeric). Otherwise, this field is a string. $point[0] = '1|2|hello'; $point[1] = '1|2 |hello'; $point[2] = '1| 2|hello'; $point[3] = '1 |2|hello'; $point[4] = ' 1|2|hello'; $point[5] = '1| 2 |hello'; $point[6] = '1 |2 |hello'; $point[7] = ' 1| 2 |hello'; $point[8] = '1 | 2 |hello'; $point[9] = ' 1 | 2 |hello'; $point[10] = '1 | 2|hello'; $point[11] = ' 1 | 2|hello'; $point[12] = ' 1|2 |hello'; $point[13] = '1:2:hello'; $point[14] = '1:2 :hello'; $point[15] = '1: 2:hello'; $point[16] = '1 :2:hello'; $point[17] = ' 1:2:hello'; $point[18] = '1: 2 :hello'; $point[19] = '1 :2 :hello'; $point[20] = ' 1: 2 :hello'; $point[21] = '1 : 2 :hello'; $point[22] = ' 1 : 2 :hello'; $point[23] = '1 : 2:hello'; $point[24] = ' 1 : 2:hello'; $point[25] = ' 1:2 :hello'; $point[26] = '1|2|#3.4'; $point[26] = '1|2 |#5'; $point[27] = '1| 2|#3.4'; $point[28] = '1 |2|#5'; $point[29] = ' 1|2|#3.4'; $point[30] = '1| 2 |#5'; $point[31] = '1 |2 |#3.4'; $point[32] = ' 1| 2 |#5'; $point[33] = '1 | 2 |#3.4'; $point[34] = ' 1 | 2 |#5'; $point[35] = '1 | 2|#3.4'; $point[36] = ' 1 | 2|#5'; $point[37] = ' 1|2 |#3.4'; $point[38] = '1:2:#3.4'; $point[39] = '1:2 :#5'; $point[40] = '1: 2:#3.4'; $point[41] = '1 :2:#5'; $point[42] = ' 1:2:#3.4'; $point[43] = '1: 2 :#5'; $point[44] = '1 :2 :#3.4'; $point[45] = ' 1: 2 :#5'; $point[46] = '1 : 2 :#3.4'; $point[47] = ' 1 : 2 :#5'; $point[48] = '1 : 2:#3.4'; $point[49] = ' 1 : 2:#5'; $point[50] = ' 1:2 :#3.4'; } sub vet { my $line; while( $line = ) { if( ! &process_line( $line ) ) { exit 1 if( $dline < 1 ); exit 2; } } exit 0; } sub rewrite { my $line; while( $line = ) { &process_line( $line ) || die "Error, not a GRASS Site_list file\n"; } # Is internal name the same as external name? if( $input ne '-' ) { if( exists( $main::hdr{name} ) ) { chomp( $main::hdr{name} ); # More complicated for -S (strict) if( $main::hdr{name} =~ /^\s*(\S+)/ ) { my $name = $1; if( $name ne $input ) { $name = $main::hdr{name}; $main::hdr{name} = $input; my $desc = $main::hdr{desc}; chomp( $desc ); $desc .= "oldname( $name )"; $main::hdr{desc} = $desc; } } } else { $main::hdr{name} = $input; } } # Unless we are dropping extra stuff, append to description unless( $opt_d ) { chomp( $main::hdr{desc} ) if( exists( $main::hdr{desc} ) ); foreach ( @main::no_match ) { chomp( $_ ); $extra .= $_; } $main::hdr{desc} .= "nomatch( $extra )"; } print "name|$main::hdr{name}\n"; print "desc|$main::hdr{desc}\n"; if( ($main::dline > 0) && ($main::dline == $main::i_dline) ) { for( $i = 0; $i <= $#main::easting; $i++ ) { $main::description[$i] =~ /^(.{11})(.*)$/; $int = $1; $desc = $2; $int =~ /(\d+)/; $int = $1; $desc =~ s/^\s+//; $desc =~ s/\s+$//; printf "%12.3f | %12.3f |# %10d %s\n", $main::easting[$i], $main::northing[$i], $int, $desc; } } else { for( $i = 0; $i <= $#main::easting; $i++ ) { printf "%12.3f | %12.3f | %s\n", $main::easting[$i], $main::northing[$i], $main::description[$i]; } } }