#!/bin/bash
#
# Locate unknown words in C/C++ comments.  Uses "extract-comments"
# and "ablexicon" scripts.
#
set -o nounset
set -o pipefail
set -o errexit
set -f

BLUE="\e[1;34m"
GREEN="\e[1;32m"
DEFAULTFG="\e[39m"

function usage () {
    echo "Find unknown words in C/C++ comments"
    echo ""
    echo "Usage:"
    echo " ${0##*/} [options]"
    echo ""
    echo "Options:"
    echo " -d, --directory    directory to scan (defaults to .)"
    echo " -l, --lexicon      lexicon file (one word per line, default 'lexicon.txt')"
    echo " -t, --terse        terse output only (enabled if no lexicon available)"
    echo " -h, --help         display this help"
    exit 1
}

#
# Verify that required commands are present
#
REQUIRED=( "extract-comments" "ablexicon" "getopt" )
for i in "${REQUIRED[@]}"
do
    command -v $i"" >/dev/null
    if [ $? -ne "0" ]
    then
        echo "Can't find '"$i"' , exiting...">&2
        exit 1
    fi
done

GETOPT_OUT=`getopt -o htd:l: --long help,terse,directory:,lexicon: -n "${0##*/}" -- "$@"`
if [ $? != 0 ]
then
    echo "Exiting..." >&2
    exit 1
fi

eval set -- "$GETOPT_OUT"

DIRNAME=/dev/fd/0
LEXICON=
STATUS=
TERSE=
while true; do
  case "$1" in
    -h | --help ) usage $0 ;;
    -t | --terse ) TERSE=1; shift ;;
    -d | --directory ) DIRNAME="$2"; shift 2 ;;
    -l | --lexicon ) LEXICON="$2"; shift 2 ;;
    -- ) shift; break ;;
    * ) break ;;
  esac
done

if [ ! -d $DIRNAME"" ]
then
    echo "Invalid directory: "$DIRNAME
    usage
fi

if [ $LEXICON"" = "" ]
then
    if [ -f $DIRNAME/lexicon.txt ]
    then
        LEXICON=$DIRNAME/lexicon.txt
    else
        LEXICON=/dev/null
        TERSE=1
    fi
fi

TMPFILE=${0##*/}-$USER-$RANDOM
unknowns=( "not-used" )     # get around empty array with nounset
extract-comments `find $DIRNAME -name \*.[ch]` |
    tr [:upper:] [:lower:] |
    grep -o -E '[a-zA-Z]+' |
    ablexicon -l $LEXICON > $TMPFILE
readarray -O 1 -t unknowns < $TMPFILE
rm -f $TMPFILE

for word in "${unknowns[@]}"
do
    if [ $word"" == "not-used" ]
	then
        continue
	fi

    if [ $TERSE"" != "" ]
    then
        echo $word
        continue
    fi

    for file in `find $DIRNAME -name \*.[ch]`
    do
        # Disable errexit here, extract-comments can return non-zero
        set +e
        #
        # A little inefficient here; we will grep twice, once to detect
        # the unknown word and another to print it with color highlighting.
        # If there's a way to preserve ANSI color output with the first
        # search and reuse it within the if statement (I gave up trying
        # to find one after a few minutes), that would be nice.
        #
        extract-comments $file | grep -iw $word > /dev/null
        if [ $? == "0" ]
        then
            if [ $STATUS"" != "1"  ]
            then
                echo -e $GREEN"############################################################################"$DEFAULTFG
                echo -e $GREEN"#"$DEFAULTFG
                echo -e $GREEN"#  Unknown word(s) found.  Please either correct the spelling or add them"$DEFAULTFG
                echo -e $GREEN"#  to the lexicon file '"$LEXICON"'".$DEFAULTFG
                echo -e $GREEN"#"$DEFAULTFG
                echo -e $GREEN"############################################################################"$DEFAULTFG
                STATUS=1  # Return non-zero status if any unidentified words are found
            fi
            echo ""
            echo -e $BLUE$file$DEFAULTFG
            echo ""
            extract-comments $file | grep --color=always -iw $word | GREP_COLORS="mt=01;32" grep --color=always -E -e '^[ \t]*[0-9]+'
        fi
        # Re-enable errexit
        set -o errexit
    done
done

if [ $STATUS"" = "1" ]
then
    exit 1
fi
