#!/bin/sh
#
# Audit the man-spell +ok+ exceptions in man pages
#
# Copyright (c) 2024 Ken McDonell, Inc.  All Rights Reserved.
#

usage="Usage: audit-man-spell [-d] [man-src-file ...]"

export LC_COLATE=POSIX

if which ispell >/dev/null 2>&1
then
    :
else
    echo "Arrgh: ispell not installed, no dice"
    exit 1
fi

if which man-spell >/dev/null 2>&1
then
    :
else
    echo "Arrgh: man-spell not on $PATH, no dice"
    exit 1
fi

tmp=/tmp/audit-man-spell-$$
rm -f $tmp.*
status=1	# failure is the default
trap "rm -f $tmp.*; exit \$status" 0 1 2 3 15

# file(1) does not work, so need some heuristics
# ... assume existance of $1 already established
#
_istroff()
{
    # shell or other script?
    head -1 "$1" | grep -q '^#!' && return 1
    # generated by podman?
    head -1 "$1" | grep -q 'by Pod::Man' && return 1
    # -man macros?
    grep -q '^\.SH ' <"$1" && return 0
    return 1
}

debug=0
while getopts "d?" c
do
    case $c
    in
	d)	debug=`expr $debug + 1`
		;;
	?)	echo >&2 "$usage"
		exit
		;;
    esac
done
shift `expr $OPTIND - 1`

# Algorithm ... for each input file ...
# - if there +ok+ ctl lines, then ...
#   + cut out the ctl lines (exceptions) and split into strings
#     and patterns
#   + man-spell the edited file
#   + for each exception string in the ctl lines ...
#     if it removes one or more misspelled words, that's good, else
#     flag the exception string as redundant
#   + patterns are a bit trickier to detect if they are making a
#     useful contribution (see below)
#
if [ $# -gt 0 ]
then
    for arg; do echo $arg; done
else
    find * -follow -name '*.[1-9]*' -type f
fi \
| while read file
do
    if [ ! -f "$file" ]
    then
	echo "$file: not found"
	continue
    fi
    if _istroff "$file"
    then
	# smells like troff man source ...
	#
	rm -f $tmp.errs
	[ $debug -gt 0 ] && echo "$file:"
	if grep -q '^\.\\" +ok+ ' <$file
	then
	    # has exceptions ...
	    #
	    grep '^\.\\" +ok+ ' <$file \
	    | sed \
		-e 's/^\.\\" +ok+ //' \
		-e 's/{[^}]*}//g' \
		-e 's/[ 	][ 	]*/ /g' \
		-e 's/ *$//' \
		-e 's/^ *//' \
		-e '/^ *$/d' \
	    | tr ' ' '\012' >$tmp.except
	    if [ $debug -gt 1 ]
	    then
		( echo "Current exceptions:"; cat $tmp.except ) | fmt >&2
	    fi
	    grep -v '^\.\\" +ok+ ' <$file >$tmp.in
	    man-spell $tmp.in 2>$tmp.err >$tmp.misspell
	    if [ $debug -gt 1 -a -s $tmp.err ]
	    then
		echo >&2 "Warnings from man-spell ..."
		cat >&2 $tmp.err
	    fi
	    cat $tmp.except \
	    | while read except
	    do
		if grep -q "^$except\$" <$tmp.misspell
		then
		    [ $debug -gt 2 ] && echo >&2 "= $except"
		    grep -v "^$except\$" <$tmp.misspell >$tmp.tmp
		    mv $tmp.tmp $tmp.misspell
		else
		    if [ ! -f $tmp.errs ]
		    then
			[ $debug -eq 0 ] && echo "$file:"
			echo "Exceptions not needed ..."
			touch $tmp.errs
		    fi
		    echo "- $except"
		fi
	    done
	    if [ -s $tmp.misspell ]
	    then
		if [ ! -f $tmp.errs -a $debug -eq 0 ]
		then
		    echo "$file:"
		    touch $tmp.errs
		fi
		echo "Additional exceptions required ..."
		sed -e 's/^/+ /' <$tmp.misspell
	    fi
	else
	    [ $debug -gt 0 ] && echo >&2 "$file: no exceptions"
	fi
    else
	[ $debug -gt 0 ] && echo >&2 "$file: skipped (not troff)"
    fi
done

exit
