#!/usr/bin/perl -w use strict; use Date::Manip; Date_Init("TZ=AKST"); # this call must occur somewhere in the code (I guess) use Getopt::Long; my $csv; my $spdheadings = 0; my $dirheadings = 0; my $nocenterdir = 0; my $showdegrees = 0; my $showdirnum = 0; my $showtotals = 0; my $percent = 0; my $hidezero = 0; my $squeezezero = 0; my $dirbincount = 16; # default 16 bins my $dirbinsize = 360 / $dirbincount; # default 22.5 degrees per bin my $ispeed; # units for input speed data: mph, mps, knots my $ospeed; # output speed units my $speedbins = '9,13,16,19,25,32,39'; # ... in output speed units my $verbose = 0; my $hidecalm; my $intervalmode; my $notimestamp; my $tabs; my $quiet; my $title; my $hidetotalsummary; my $precision = 3; # output decimal places my $rounddir; die unless GetOptions( 'table' => sub { undef $csv }, # ... though this is default 'csv' => \$csv, # comma-separated output in lieu of default table 'tab' => \$tabs, # or tab-separated output table 'showdegrees' => \$showdegrees, # don't show textual direction tags 'showdirnum' => \$showdirnum, # show direction as ordinal bin number 'spdheadings' => \$spdheadings, 'dirheadings' => \$dirheadings, 'nocenterdir' => \$nocenterdir, 'showtotals' => \$showtotals, # display row & column sums (or percent) 'percent' => \$percent, # display values as percent frequency 'hidezero' => \$hidezero, # don't show zero values 'squeezezero' => \$squeezezero, # change "0.000" to "0 ", etc. 'hidecalm' => \$hidecalm, # don't show calm total 'quiet' => \$quiet, # don't show information in header 'hidetotalsummary' => \$hidetotalsummary, # don't show total summary 'verbose' => \$verbose, # print additional stuff after output table 'dirbincount=i' => sub { # number of direction bins (per 360 deg) $dirbincount = $_[1]; $dirbinsize = 360 / $dirbincount; }, 'dirbinsize=f' => sub { # size of direction bins (degrees) $dirbinsize = $_[1]; $dirbincount = 360 / $dirbinsize; }, 'ispeed=s' => \$ispeed, # speed input data units 'ospeed=s' => \$ospeed, # output speed units 'speedbins=s' => \$speedbins, # comma-separated list of min bin speeds 'help|h' => sub { system("pod2text $0"); exit 0 }, 'interval' => sub { $intervalmode = $percent = $notimestamp = 1 }, 'notimestamp' => \$notimestamp, 'title=s' => \$title, 'precision=i' => \$precision, 'rounddir' => \$rounddir, ); # check for integral number of bins: die "invalid direction bin size: $dirbinsize\n" if 360 % $dirbinsize % 2; $speedbins =~ tr/ //d; # lose spaces, just in case... my @minspeed = split ',', $speedbins; do { # validate speed bins my $last; foreach my $spd ( @minspeed ) { # validate die "bin speed ($spd) not a number\n" unless $spd =~ m/^\d*\.?\d+/; die "bin speeds not in increasing order ($spd !> $last)\n" if defined $last && $last >= $spd; $last = $spd; } }; my $i2ospeed = 1; # convert input speed data to output units if ( defined $ispeed ) { # validate, then determine conversion die "must specify output speed units if input specified\n" unless defined $ospeed; for my $units ( $ispeed, $ospeed ) { next if $units =~ m/^(mph)|(knots)|(mps)$/; die qq(speed units "$units" not supported\n); } if ( $ispeed eq 'knots' && $ospeed eq 'mph' ) { $i2ospeed = 6076.1/5280; } elsif ( $ispeed eq 'mps' && $ospeed eq 'mph' ) { $i2ospeed = 2.23694; } elsif ( $ispeed eq 'mps' && $ospeed eq 'knots' ) { $i2ospeed = 1.94385; } else { die qq(conversion "$ispeed to $ospeed" not supported\n); } } sub datetime { # convert input format to seconds since epoch, etc. my ($yyyy, $ddd, $hhmm) = @_; die "ERROR-need day, year, hhmm" unless $ddd && $yyyy && defined $hhmm; my ($hh, $mm) = $hhmm =~ m/^(\d?\d)?(\d?\d)$/; $hh = 0 unless defined $hh; $ddd += ($hh * 60 + $mm) * 60 / 86400; # $day is now decimal value my ($y,$m,$d,$h,$mn,$s) = Date_NthDayOfYear($yyyy, $ddd); unless ( defined $y ) { # kludge workaround for midnight December 31... ($y,$m,$d,$h,$mn,$s) = Date_NthDayOfYear($yyyy, $ddd-.00001); } my $secs = Date_SecsSince1970($m,$d,$y,$h,$mn,$s); $s = int($s); # ad hoc fix for invalid fractional sec for localtime() return ($secs, $y, $m, $d, $h, $mn, $s); } ## prep bins my @bin; # bin matrix, DIR X SPD my $calm_bin = 0; # one bin for calm values (direction is meaningless here) do { # (note: block used to localize @initspeedbins my @initspeedbins; push(@initspeedbins, 0) foreach @minspeed; push(@bin, [ @initspeedbins, # count=0 for each speed bin, then ... 360/$dirbincount * $_, # degrees azimuth for direction identifier ]) for 0 .. $dirbincount-1; }; unless ( $showdegrees ) { # replace degrees identifier with textual tag my @dirtags; # descriptive identifier for direction bins if ( $showdirnum ) { @dirtags = ( 1 .. $dirbincount ); } elsif ( $dirbincount == 4 ) { @dirtags = qw(N E S W); } elsif ( $dirbincount == 8 ) { @dirtags = qw(N NE E SE S SW W NW); } elsif ( $dirbincount == 16 ) { @dirtags = qw(N NNE NE ENE E ESE SE SSE S SSW SW WSW W WNW NW NNW); } if ( @dirtags ) { # replace degrees if tags are defined for ( my $i = 0; $i < $dirbincount; $i++ ) { $bin[$i]->[-1] = $dirtags[$i]; } } } my $f = shift or die "usage: $0 datafile\nfor help: $0 --help\n"; open DATA, "<$f" or die qq(error opening file "$f"); my $output = shift; if ( $output ) { open STDOUT, ">$output" or die qq(unable to redirect STDOUT to "$output"); } my ($firstrec, $lastrec, $lasttime); my $max_interval = 0; my $records = 0; my @culled; my $totaltime = 0; my %errcounts; while ( my $line = ) { chomp $line; if ( $line =~ m/^#(.*)/ ) { # allow comments & meta info unless ( $title ) { # ignore if title already set $title = $1 if $line =~ m/^#TITLE (.*)/; } next; } my $error; my ( $yyyy, $ddd, $hhmm, $spd, $dir, $interval ) = $line =~ m/(\d{4}),(\d{1,3}),(\d{1,4}),(-?\d*\.?\d*),(-?\d*\.?\d*)(,\d+)?/ or $error = 'pattern_error'; $dir = sprintf "%.0f", $dir if $rounddir; $error = 'bad dir input' if $dir =~ m/6999/; $error = 'dir under range' if $dir < 0; $error = 'dir over range' if $dir > 360; $error = 'bad spd input' if $spd =~ m/6999/; $error = 'spd under range' if $spd < 0; $error = 'no interval' if $intervalmode && !$interval; $spd *= $i2ospeed; $interval =~ s/^,// if $interval; # kludge -- remove artifact if ( $error ) { # report, count, and move on push @culled, $line . ' -- ERROR: ' . $error; $error =~ s/ /_/g; $errcounts{$error}++; next; } $ddd = sprintf "%03d", $ddd; # pad day & time with zeros $hhmm = sprintf "%04d", $hhmm; my ($time, $year, $month, $day, $hour, $min, $sec) = datetime($yyyy, $ddd, $hhmm); # store timestamps for header display: $lastrec = "$month/$day/$year $hour:$min:$sec"; $firstrec = $lastrec unless defined $firstrec; ## calc max time interval between records: $lasttime = $time unless $lasttime; $interval = $time - $lasttime unless defined $interval; $lasttime = $time; $max_interval = $interval if $interval > $max_interval; printf STDERR ("skipped %.1g hours before %s\n",$interval/3600, $line) if !$notimestamp && $interval > 3660; ## determine direction bin: my $dirbin; unless ( $nocenterdir ) { $dirbin = int( ($dir + $dirbinsize/2) / $dirbinsize ); } else { $dirbin = int( $dir / $dirbinsize ); } $dirbin = 0 if $dirbin == $dirbincount; ## determine speed bin: my $increment = $intervalmode ? $interval : 1; my $spdbin; for ( $spdbin = $#minspeed; $spdbin>=-1; $spdbin-- ) { if ( $spdbin < 0 ) { # special case, calm $calm_bin += $increment; last; } # print "DEBUG:", $minspeed[$spdbin], "\n"; if ( $spd >= $minspeed[$spdbin] ) { $bin[$dirbin][$spdbin] += $increment; last; } # else speed is in a lesser bin } $totaltime += $interval; # seconds $records++; # number of records processed } ## show results: print "$title\n" if $title; unless ( $quiet ) { print "wind data from file $f\n"; unless ( $notimestamp ) { print "data from $firstrec to $lastrec\n"; my $avg_interval = sprintf "%.1f", $totaltime / $records / 60; # minutes print "average interval between records is $avg_interval minutes\n"; my $max_hours = $max_interval/3600; print "maximum interval between records is $max_hours hours\n"; } print "$#culled records culled\n" if @culled; my $error_summary = ''; foreach my $err ( sort keys %errcounts ) { $error_summary .= "$err=$errcounts{$err}, "; } print "records skipped: $error_summary\n" if $error_summary; my $process_type; if ( $intervalmode ) { $process_type = 'Percent Frequency, Interval-weighted'; } elsif ( $percent ) { $process_type = 'Percent Frequency'; } else { $process_type = 'Raw Data Counts'; } print "$process_type\n"; } print "\n\n"; # whitespace between headers and output data if ( $spdheadings ) { my $fieldwidth = $percent ? 7 : 6; print 'Dir ' if $dirheadings; foreach my $minspeed ( @minspeed ) { printf("%${fieldwidth}s", $minspeed); } printf("%${fieldwidth}s", "All") if $showtotals; print "\n"; } sub format_count { # format for output (see $percent & $hidezero options) my $count = shift; my $result; if ( $intervalmode ) { $result = sprintf("%7.${precision}f", $count/$totaltime*100); $result = " 0" if $squeezezero && $result == 0; } elsif ( $percent ) { $result = sprintf("%7.${precision}f", $count/$records*100); $result = " 0" if $squeezezero && $result == 0; } elsif ( $tabs ) { $result = "\t" . $count; } else { $result = sprintf("%6s", $count); } $result =~ s/\S/ /g if $count == 0 && $hidezero; return $result; } my @speedbin_totals; foreach ( @minspeed ) { # init the speed bins totalizer push @speedbin_totals, 0; } foreach my $dirbin ( @bin ) { # $dirbin is array ref: [ @speedbins, $dirtag ] my $dirtag = pop @$dirbin; # direction identifier my @speedbins = @$dirbin; my $dirtotal = 0; $dirtotal += $_ foreach @speedbins; # sum across for ( my $i = 0; $i < @speedbins; $i++) { # sum down $speedbin_totals[$i] += $speedbins[$i]; } if ( $csv ) { # need to print direction degrees or tag print join(',', $dirtag, @speedbins); print ",$dirtotal" if $showtotals; } elsif ( $tabs ) { # need to print direction degrees or tag my $output = join("\t", $dirtag, @speedbins); $output .= "\t$dirtotal" if $showtotals; $output =~ s/(\s)0/$1/g if $hidezero; print $output; } else { # default table-style output printf "%3s ", $dirtag if $dirheadings; # show direction identifier print format_count($_) foreach @speedbins; print( format_count($dirtotal) ) if $showtotals; } print "\n"; } # print totals if ( $showtotals ) { # speed bin totals my $sum = 0; print "All " if $dirheadings; foreach my $total ( @speedbin_totals ) { print format_count($total); $sum += $total; } print format_count($sum) if $showtotals; print "\n"; } # print "Calm", format_count($calm_bin), "\n" unless $hidecalm; unless ( $hidecalm ) { my $tag = $showdirnum ? "0" : "Calm"; # special case, see help info print $tag, format_count($calm_bin), "\n"; } print "Total Observations: $records\n" unless $hidetotalsummary; exit 0 unless @culled; exit 1 unless $verbose; print "---------\n"; print "the following records were culled as invalid:\n"; print "$_\n" foreach @culled; print "---------\n"; exit 1; # signal an error since some records culled =head1 NAME binify -- convert raw windspeed and direction data to wind rose bin format =head1 SYNOPSIS $ windrose-binify [options] datafile =head1 DESCRIPTION Input data is expected in the following, comma-separated format: YEAR,DAYOFYEAR,HHMM,SPEED,DIR[,INTERVAL] where SPEED is in arbitrary units, DIR is degrees E of N, and INTERVAL is in seconds. The interval will be processed only if the --interval option is specified; normally the interval is calculated based on the record timestamps. Actually the interval is usually for "information only", as the process is based on event counts; the data interval is thus normally assumed to be regular, the same for all records. Speed is processed into bins specified by the --speedbins option (see below). Speed units are arbitrary, and need not be specified if input data and the speed bins are in the same units. If input and output (bin values) differ, however, both the --ispeed and --ospeed options must be provided so that a conversion factor can be determined. (Note: unsupported units will simply need to be added to the program.) Use - (single dash) as the datafile to process data on standard input. =head1 OPTIONS =over 4 =item --dirbincount=N number of direction bins (sets --dirbinsize parameter) =item --dirbinsize size of direction bins (sets --dirbincount parameter) =item --nocenterdir do not center direction bins on nominal direction; the default is to rotate direction bins so that, e.g,. the N bin centers on due north =item --speedbins= list of lower limits of speed bins; must be in increasing order; if the first value is greater than zero, speeds less than the first value will be counted as "calm" example: --speedbins=8.5,12.5,15.5,18.5,24.5,31.5,38.5,45.5 =item --ospeed=UNITS used optionally to label speed bins on output; exception see --ispeed =item --ispeed=UNITS used in conjunction with ospeed option to calculate conversion factor to apply to input data; must be one of "mph", "knots", "mps" =item --help, -h print this information =item --table print output as a table (default); see also --csv =item --csv print output as comma-separated values =item --tab print output as tab-separated values =item --showdegrees show direction bins as degrees, not textual tags =item --showdirnum show direction bins as the ordinal bin number rather than degrees or textual tags; also show "calm" string tag as "0" =item --showtotals show row and column totals =item --percent show results as percent frequency rather than counts =item --hidezero suppress output fields which are zero, replacing with blanks =item --hidecalm suppress output of calm total =item --quiet suppress output of information in header =item --hidetotalsummary suppress output of total records processed =item --title specify a title string for output; see also #TITLE comment =item --spdheadings display speed (columns) headings for output bins =item --dirheadings display direction (rows) headings for output bins =item --interval use the provided interval (in each record) rather than calculating based on record timestamps; automatically sets the --percent option =item --notimestamp ignore timestamps (don't calculate intervals) =item --verbose print additional stuff after output table =item --rounddir round direction value to nearest integer (default is off, no rounding) =item --precision=N output precision (decimal places) for bin values (default is 3, i.e., --precision=3) =back =head1 COMMENTS Comments are allowed in the input data file, consisting of any line beginning with "#" in the first column. Comments are simply ignored, unless the comment has a certain structure, described next. A comment of the form: #TITLE this will be displayed as the title (first line) of the output is processed (as noted) by assigning the text to the $title variable, and printed as the first line of output. See also the --title option. Only the first title specified will be used; the --title option thus overrides a title comment. =head1 BUGS This conversion program is ad hoc, based on the (arbitrary) input file format presented, and the output examples provided. These results should be carefully checked, and used with caution. =head1 AUTHOR Ken Irving Water and Environmental Research Center University of Alaska, Fairbanks 907-474-6152 =cut