#!/usr/local/bin/perl            

# align - filter to align columns of text
# Steve Kinzler, steve@kinzler.com, Jun 00/Oct 01/Dec 01
# see website http://kinzler.com/me/align/
# http://kinzler.com/me/home.html#unix

$VERSION = '1.7.5';

require 5.000;
use Getopt::Std;	$Getopt::Std::STANDARD_HELP_VERSION = 1;

$usage = <<EOF;
usage: $0 [ -s { s+ | s | t+ | t | _+ | _ | /patt } ]
       [ -j { s | t | _ | /string } ] [ -g # ] [ -t # ]
       [ -a { l | r | c | n | N | d | k }... ] [ { -e | -E } perlexpr ]
       [ file ... ]
	-s	split the input into columns defined by
		s+	one or more whitespace characters
		s	every whitespace character
		t+	one or more tab characters
		t	every tab character
		_+	one or more space characters
		_	every space character
		/patt	every occurrence of the given Perl regexp pattern
	-j	join the output into columns separated by
		s	tabs and spaces as needed
		t	only tabs (with all left alignment only)
		_	only spaces
		/string	repetitions and a truncation of the given string
	-g	the minimum character places between output columns (gutter)
	-t	the number of character places taken by a full tab character
	-a	the alignment of each column with the last repeated as needed
		l	left
		r	right
		c	center
		n	numeric (aligned on the decimal point)
		N	numeric with decimal parts zero-padded
		d	default alignment from column contents
		k	no alignment (can unalign all rightward columns)
		may be preceeded by a number to repeat that many times
	-e	align only lines for which the given Perl expression is true
	-E	like -e but do not output unaligned lines
The default split method is determined by the following heuristic rules
applied in order to the entire input:
	s+	if there are any adjacent tab and space characters
	t+	if there are any two adjacent tab characters
	t	if there is any tab character
	_+	if there are any two adjacent space characters
	_	otherwise
The default join method is determined by the split method as follows:
	t	if t+ or t (and all left aligned)
	s	if s+ or s or /patt
	_	otherwise
The default gutter value is 1.
The default tab value is \$TABSTOP ($ENV{'TABSTOP'}), otherwise 8.
The default alignment is 'numeric' on columns of all numbers and 'left'
on all other columns.
By default, all lines are aligned.  In a -e expression, \$_ will be set
to the current line.
Visual alignment may not be achieved if the column text or join string
contains tabs, non-printing characters or other characters not occupying
one character place.
Version $VERSION
EOF

die $usage if ! getopts('hs:j:g:t:a:e:E:') || $opt_h;

$opt_g = ($opt_g ne '') ? $opt_g + 0 : 1;
$opt_t = ($opt_t ne '') ? $opt_t + 0 : $ENV{'TABSTOP'} + 0 || 8;
die "$0: invalid gutter value ($opt_g)\n" if $opt_g <  0;
die "$0: invalid tab value ($opt_t)\n"	  if $opt_t <= 0;

$opt_a =~ s/(\d+)(.)/$2 x $1/eg;
$opt_a =~ s/[^lrcnNdk]//g;

$opt_s =~ s/^$/DeFaUlT/	    ||
$opt_s =~ s/^[st]\+?$/\\$&/ ||
$opt_s =~ s/^_(\+?)$/ $1/   ||
$opt_s =~ /^[\t ]\+?$/	    ||
$opt_s =~ /^\//		    || die $usage;

$opt_j =~ s/^$/DeFaUlT/	    ||
$opt_j =~ /^[st_]?$/	    ||
$opt_j =~ s/^\t$/t/	    ||
$opt_j =~ s/^ $/_/	    ||
$opt_j =~ /^\/./	    || die $usage;

$eexpr = ($opt_e ne '') ? $opt_e : $opt_E;
$_ = ''; eval $eexpr;
warn("$0: ignoring invalid expression ($eexpr) => $@"), $eexpr = '' if $@;

$numpatt = '^([-+]?)(?:(\d[\d,]*)(\.?\d*)|(\.\d+))$';

###############################################################################

chomp(@in = <>);
@do = ($eexpr eq '') ? @in : grep(eval $eexpr, @in);

$opt_s = grep(/ \t|\t /, @do) ? '\s+' :
	 grep(/\t\t/,	 @do) ? '\t+' :
	 grep(/\t/,	 @do) ? '\t'  :
	 grep(/  /,	 @do) ? ' +'  :
				' '     if $opt_s eq 'DeFaUlT';

$opt_j = ($opt_s =~ /^(\\t|\t)/) ? 't' :
	 ($opt_s =~ /^(\\s|\/)/) ? 's' :
				   '_'  if $opt_j eq 'DeFaUlT';

$opt_s =~ s/^\///;

while (@do) {
	@cols = split(/$opt_s/, shift @do);
	@a = @aligns; @aligns = ();	@nI = @numwI; @numwI = ();
	@w = @widths; @widths = ();	@nF = @numwF; @numwF = ();

	foreach (@cols) {
		@isnum = /$numpatt/o;
		push(@aligns, (($a = shift @a) =~ /[^n]/) ? $a :
			      (@isnum) ? 'n' : 'l');
		push(@widths, &max(shift @w,  length($_)));
		push(@numwI,  &max(shift @nI, length($isnum[0] . $isnum[1])));
		push(@numwF,  &max(shift @nF, length($isnum[2] . $isnum[3])));
	}

	push(@aligns, @a);	push(@numwI, @nI);
	push(@widths, @w);	push(@numwF, @nF);
}
$a = join('', @aligns);
substr($opt_a, $d, 1) = substr($a, $d, 1) || 'l'
	while ($d = index($opt_a, 'd')) >= 0;
$opt_a = $a || 'l' unless $opt_a;

$opt_j	= 's'	  if $opt_a =~ /[^l]/ && $opt_j eq 't';
$opt_j .= ':FlAg' unless $opt_j =~ s/^\///;

($tab, $tlen) = ($opt_j =~ /^[ts_]:FlAg$/) ? ("\t",   $opt_t)
					   : ($opt_j, length($opt_j));

@a = split(//, $opt_a); @nI = @numwI; @nF = @numwF;
foreach (@widths) {
	$_ = &max($_, shift(@nI) + shift(@nF))
		if (($#a) ? shift @a : $a[0]) =~ /^[nN]$/;
}

foreach (@in) {
	$opt_e ne '' && print($_, $/), next if $eexpr ne '' && ! eval $eexpr;

	@cols = split(/$opt_s/);
	$llen = $hold = 0;
	@a    = split(//, $opt_a);
	@w    = @widths;		@nF = @numwF;

	while (@cols) {
		$a  = ($#a) ? shift @a : $a[0];
		$_  = shift @cols;	$nF = shift @nF;

		@isnum = /$numpatt/o if $a =~ /^[nN]$/;
		if ($a eq 'N' && @isnum) {
			s/$/./ if $nF && ! /\./;
			($F) = /(\.\d*)$/, s/$/'0' x ($nF - length($F))/e;
			@isnum = /$numpatt/o;
		}

		$l  = length($_);
		$G  = shift(@w) - $l;
		$g  = $og = ($a eq 'l')	? 0	      :
			    ($a eq 'r')	? $G	      :
			    ($a eq 'c')	? int($G / 2) :
			    ($a eq 'k')	? ($G = 0)    :
			    (! @isnum)	? $G	      :
				$G - ($nF - length($isnum[2] . $isnum[3]));
		$g += $hold + $opt_g if $#w < $#widths - 1;

		if ($opt_j eq 't:FlAg') {
			$ns = &min($g, $ex = ($llen + $g) % $tlen);
			$nt = int(($g - $ns + $tlen - .000001) / $tlen);
			$g += $tlen - $ex if $ns;
			$j  = $tab x $nt . (($ns) ? $tab : '');

		} elsif ($opt_j eq 's:FlAg') {
			$ns = &min($g, ($llen + $g) % $tlen);
			$nt = int(($g - $ns + $tlen - .000001) / $tlen);
			$j  = $tab x $nt . ' ' x $ns;

		} elsif ($opt_j eq '_:FlAg') {
			$j  = ' ' x $g;

		} else {
			$ns = $hold % $tlen;
			$nt = int($hold / $tlen);
			$j  = substr($tab, $tlen - $ns) . $tab x $nt;
			$ns = ($g - $hold) % $tlen;
			$nt = int(($g - $hold) / $tlen);
			$j .= $tab x $nt . substr($tab, 0, $ns);
		}

		print $j, $_;

		$llen += $l + $g;
		$hold  = $G - $og;
	}

	print $/;
}

###############################################################################

sub max { ($_[0] >= $_[1]) ? $_[0] : $_[1] }
sub min { ($_[0] <= $_[1]) ? $_[0] : $_[1] }
