#!/bin/sh
#
# @(#)  /u/des/src/sortmail/recomposemail  1.15  98/09/12  13:24:41
#
# recomposemail: script for merging mail messages by date;
#	messages have been processed by 'decomposemail' into
#	individual files with date.time.seq info appended
#	to the filenames;  'recomposemail' re-assembles them;
#	see USAGE below for options;
#
# D.Singer, 10/95
#
#
# Copyright (c) 1998 by Daniel E. Singer.  All rights reserved.
# Permission is granted to reproduce and distribute this program
# with the following conditions:
#   1) This copyright notice and the author identification below
#      must be left intact in the program and in any copies.
#   2) Any modifications to the program must be clearly identified
#      in the source file.
# 
# Written by:
#   Daniel E. Singer
#   UNIX Systems Administrator
#   Department of Computer Science
#   Duke University, Durham, NC
#   Phone: 919/660-6500
#   Email: des@cs.duke.edu
#

#PATH='/usr/bin:/usr/sbin:/bin:/usr/ucb:/usr/bsd:/usr/etc:/usr/local/bin'
#export PATH

PROG=`basename "$0"`

umask 077	# create files and dirs with restrictive perms,
		# since this involves email;

#
# platform specific settings
#
AWK=nawk
SYS="`uname -sr`"	# OS type
case "$SYS" in
  "FreeBSD "*)
	AWK=awk
	;;
  "HP-UX "*)
	AWK=awk
	;;
  "Linux "*)
	AWK=awk
  esac

#
# extension (suffix) pattern;
# a `grep' pattern is used instead of shell matching, because as
# Kevin Glueck <kglueck@viz.tamu.edu> points out, using "ls pattern"
# will crash on huge mboxes on some systems, whereas "ls | grep pattern"
# will work;
#
#EXT_PAT='[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9].[0-9][0-9][0-9][0-9][0-9][0-9].[0-9][0-9][0-9][0-9][0-9]*'
GREP_EXT_PAT='[0-9]\{8\}\.[0-9]\{6\}\.[0-9]\{5,\}'

VECHO=':'	# don't echo unless -v is set
AFLAG=0		# append to existing mailbox
KEEP=0		# don't keep the original files
MFLAG=0		# merge to mbox.yymm
MMFLAG=0	# merge to yymm/mbox
YFLAG=0		# merge to mbox.yyyy
YYFLAG=0	# merge to yyyy/mbox

USAGE="
Usage:	$PROG [-ahkv] [-m|M] [-y|Y] [base_name...]

	  -a	append to current base_name file, ignored with
		-m or -M;
	  -h	help, print this message and exit;
	  -k	keep the original files, ie, don't remove them;
	  -m	combine by months into files named with mbox.YYMM;
	  -M	combine by months into dirs named with YYMM/mbox;
	  -y	combine by years into files named with mbox.YYYY;
	  -Y	combine by years into dirs named with YYYY/mbox;
		with -m, yields YYYY/mbox.MM;
		with -M, yields YYYY/MM/mbox;
	  -v	verbose, more messages;

	  base_name  base name of a group of message files;

	'$PROG' takes a group of files created by 'decomposemail', and
	combines them into files based on the selected options.
"

# these are some possible combination of the date sort flags,
# and are compared against SMODE
no_SORT="0000"	# no date sorting
 m_SORT="0001"	# -m
 M_SORT="0011"	# -M
 y_SORT="0100"	# -y
 Y_SORT="1100"	# -Y
Ym_SORT="1101"	# -Y and -m
YM_SORT="1111"	# -Y and -M
  SMODE="$no_SORT" # sort mode, a combination of the date sort flags
USE_DATES=0	# will be using dates (any of -[mMyY])
USE_DIRS=0	# will be using subdirs (any of -[MY])


#
# process command line options
#

SYNTAX="$PROG: option syntax error."

syntax_error() {
	echo "$SYNTAX" >&2
	echo "$USAGE" >&2
	exit 1
}

arg_syntax_check() {
	[ "$1" -lt 1 ] && syntax_error
}

while [ "$#" -gt 0 ]; do
	OPT="$1"
	case "$OPT" in
	  -a)
		AFLAG=1
		;;
	  -k)
		KEEP=1
		;;
	  -m)
		MFLAG=1
		;;
	  -M)
		MMFLAG=1
		MFLAG=1
		;;
	  -y)
		YFLAG=1
		;;
	  -Y)
		YYFLAG=1
		YFLAG=1
		;;
	  -v)
		VECHO="echo"
		VFLAG=1
		;;
	  -h)
		echo "$USAGE" >&2
		exit 0
		;;
	#  -c)
	#	shift
	#	arg_syntax_check "$#"
	#	CARG="$1"
	#	CFLAG=1
	#	;;
	  # ...
	  --)
		shift
		break
		;;

	  # unknown option
	  -?)
		syntax_error
		;;
	  # compound option
	  -??*)
		# break up a compound option
		NEW_OPTS=`$AWK 'BEGIN {
			OPT_STR = "'"$OPT"'";
			LEN = length(OPT_STR);
			NEW_OPTS = "";
			STATUS = 0;
			for (POS=2; POS+0 <= LEN; ++POS) {
				OPT = substr(OPT_STR,POS,1);
				if (OPT !~ /[a-zA-Z0-9_]/)
					STATUS = 1;
				NEW_OPTS = NEW_OPTS " -" OPT;
			}
			print NEW_OPTS;
			exit STATUS;
		  }' <&-` || {
			syntax_error
		  }
		shift
		set -- $NEW_OPTS ${1:+"$@"}
		continue
		;;
	  # end of options, just command arguments left
	  *)
		break
	  esac
	shift
  done

#
# check for bad option combos
#
SMODE="$YYFLAG$YFLAG$MMFLAG$MFLAG"
case "$SMODE" in
  "$no_SORT") ;;
  "$y_SORT") ;;
  "$Y_SORT") ;;
  "$m_SORT") ;;
  "$M_SORT") ;;
  "$Ym_SORT") ;;
  "$YM_SORT") ;;
  *)
	echo "$PROG: cannot mix -y with -m or -M." >&2
	echo "$USAGE" >&2
	exit 1
  esac
[ "$MFLAG" = 1 -o "$YFLAG" = 1 ] && USE_DATES=1
[ "$MMFLAG" = 1 -o "$YYFLAG" = 1 ] && USE_DIRS=1

#
# append flag is ignored if a date sort option is selected
#
if [ "$USE_DATES" = 1 ]; then
	if [ "$AFLAG" = 1 ]; then
		echo "$PROG: ignoring -a, due to -m, -M, -y or -Y." >&2
		AFLAG=0
	  fi
  fi

#
# check for base_names
#
if [ "$#" = 0 ]; then
	#FILES=`ls *.$EXT_PAT 2>&-`
	FILES=`ls | grep "\\.$GREP_EXT_PAT$" 2>&-`
#echo "FILES=\"$FILES\""
#exit

	case "$FILES" in
	  #''|"*.$EXT_PAT")
	  '')
		echo "$PROG: no basenames specified, and no sequenced files found." >&2
		echo "$USAGE" >&2
		exit 1
	  esac

	#
	# extract the basenames from the files that matched the pattern
	#
	BASE_NAMES=`echo "$FILES" | $AWK '{
		# there should be no path part...
		FILE = $0;

		# now get the basename
		NUM = split(FILE,STRS,"\.");
		NAME = STRS[1];

		# a file basename might have additional "."s in it...
		for (I=2; I < NUM - 2; ++I)
			NAME = NAME "." STRS[I];

		LIST[NAME] = 1;
	  }
	  END {
		for (I in LIST)
			printf("%s\n", I);
	  }' | sort`

	#
	# set the basenames found into the positional parameters
	#
	set -f
	_IFS="$IFS"
	# NOTE: next line contains newline
	IFS='
'
	set -- $BASE_NAMES
	IFS="$_IFS"
	set +f
  fi
#echo "BASE_NAMES=\"$BASE_NAMES\""
#echo "\$*=\"$*\""
#exit


#
# now process files for each basename,
# combining into new mailboxes
#
for BASE_NAME do

	$VECHO "" >&2
	$VECHO "$PROG: processing \"$BASE_NAME\"..." >&2
	MSG_COUNT=

	#
	# get files with BASE_NAME
	#

	#FILES=`ls "$BASE_NAME".$EXT_PAT 2>&-`
	FILES=`ls | grep "^$BASE_NAME\\.$GREP_EXT_PAT$" 2>&-`
	if [ -z "$FILES" ]; then
		echo "$PROG: no message files with basename \"$BASE_NAME\"." >&2
		continue
	  fi

	if [ "$USE_DATES" = 0 ]; then
		#
		# strip any path from destination name
		#
		case "$BASE_NAME" in
		  */*)
			BASE_NAME_0=`basename "$BASE_NAME"`
			;;
		  *)
			BASE_NAME_0="$BASE_NAME"
		  esac
		DEST="$BASE_NAME_0"

		#
		# if not appending, make sure that DEST name does not
		# conflict with an existing file
		#
		if [ "$AFLAG" = 0 ]; then
			# find an unused filename to write to
			N=
			while [ -f "$DEST" ]; do
				# this might be a little quicker than 'expr'
				N="$N 1"
				set -- $N
				NN="$#"
				DEST="$BASE_NAME_0.$NN"
			  done

			if [ "$DEST" != "$BASE_NAME_0" ]; then
				echo "$PROG: \"$BASE_NAME_0\" exists, using \"$DEST\"." >&2
			  fi
		  elif [ -f "$DEST" ]; then
			echo "$PROG: appending to \"$DEST\"..." >&2
		  fi
	  fi

	# used to keep count of processed files
	FNUMS=

	# set FILES into positional parameters
	set -f
	_IFS="$IFS"
	# NOTE: next line contains newline
	IFS='
'
	set -- $FILES
	IFS="$_IFS"
	set +f

	#
	# for each file with BASE_NAME
	#

	for FILE do

		if [ "$USE_DATES" = 1 ]; then
			#
			# get the names for the destination dir and/or file
			# from each filename;
			# file names are {basename}.date.time.sequence
			#
			NAMES=`$AWK <&- '
			  BEGIN {
				FILE = "'"$FILE"'";

				# FILE might include a path with dots, etc;
				# this should just get the file part
				NUM = split(FILE,STRS,"/");
				FILE = STRS[NUM];

				# now get the basename and sequence parts
				NUM = split(FILE,STRS,"\.");
				NAME = STRS[1];

				# a file basename might have additional "."s in it...
				for (I=2; I+0 < NUM - 2; ++I)
					NAME = NAME "." STRS[I];

				DATE_STR = STRS[NUM-2];
				#TIME_STR = STRS[NUM-1];

				Y4 = substr(DATE_STR,1,4);
				Y2 = substr(DATE_STR,3,2);
				M2 = substr(DATE_STR,5,2);
				#DAY   = substr(DATE_STR,7,2);
				#HOUR   = substr(TIME_STR,1,2);
				#MINUTE = substr(TIME_STR,3,2);
				#SECOND = substr(TIME_STR,5,2);

				printf("%s\n%s\n%s\n%s\n", NAME, Y4, Y2, M2);
			  }'`

			#
			# use positional parameters to get at the info
			#
			set -f
			_IFS="$IFS"
			# NOTE: next line contains newline
			IFS='
'
			set -- $NAMES
			IFS="$_IFS"
			set +f

			MBOX="$1"
			Y4="$2"
			Y2="$3"
			M2="$4"

			DDIR= DDIR_= SUFX= _SUFX=

			case "$SMODE" in
			  "$y_SORT")
				SUFX="$Y4"
				;;
			  "$Y_SORT")
				DDIR="$Y4"
				;;
			  "$m_SORT")
				SUFX="$Y2$M2"
				;;
			  "$M_SORT")
				DDIR="$Y2$M2"
				;;
			  "$Ym_SORT")
				DDIR="$Y4"
				SUFX="$M2"
				;;
			  "$YM_SORT")
				DDIR="$Y4/$M2"
			  esac

			DDIR_="${DDIR:+$DDIR/}"
			_SUFX="${SUFX:+.$SUFX}"
			DEST="$DDIR_$MBOX$_SUFX"
		  fi
#echo "DDIR=\"$DDIR\""
#echo "DDIR_=\"$DDIR_\""
#echo "SUFX=\"$SUFX\""
#echo "_SUFX=\"$_SUFX\""
#echo "DEST=\"$DEST\""
#continue

		# increment FCNT; this is a little quicker than 'expr' (maybe)
		FNUMS="$FNUMS 1"
		set -- $FNUMS
		FCNT="$#"

		$VECHO "" >&2
		$VECHO "$PROG: $FCNT. $FILE => $DEST" >&2

		if [ "$USE_DIRS" = 1 ]; then
			#
			# create the directory if necessary
			#
			if [ ! -d "$DDIR" ]; then
				$VECHO "$PROG: creating directory \"$DDIR\"..." >&2
				mkdir -p "$DDIR" || {
					echo "$PROG: cannot create directory \"$DDIR\"." >&2
					exit 1
				}
			  elif [ ! -w "$DDIR" ]; then
				echo "$PROG: cannot write to directory \"$DDIR\"." >&2
				exit 1
			  fi
		  fi

		#
		# create the file if necessary
		#
		if [ ! -f "$DEST" ]; then
			$VECHO "$PROG: creating file \"$DEST\"..." >&2
			{
			  > "$DEST"
			} || {
			  echo "$PROG: problem creating file \"$DEST\"." >&2
			  exit 1
			}
		  fi

		#
		# append the current file to the mbox file
		#
		if cat "$FILE" >> "$DEST"; then
			if [ "$KEEP" = 0 ]; then
				$VECHO "$PROG: removing \"$FILE\"..." >&2
				rm -f "$FILE" || {
				  echo "$PROG: problem removing file \"$DEST\"." >&2
				  exit 1
				}
			  fi
			MSG_COUNT="$MSG_COUNT 1"
		  else
			echo "$PROG: problem appending to \"$DEST\"." >&2
			exit 1
		  fi
	  done

	#
	# get count of messages, via positional parameter abuse
	#
	set -- X $MSG_COUNT
	shift
	MSG_COUNT="$#"

	$VECHO "" >&2
	#$VECHO "$PROG: processed $MSG_COUNT messages of \"$BASE_NAME\"." >&2
	echo "$PROG: processed $MSG_COUNT messages of \"$BASE_NAME\"." >&2
  done

$VECHO "" >&2
$VECHO "$PROG: done." >&2

exit
