#!/bin/sh
#####################################################-*-mode:shell-script-*-
##                                                                       ##
##                     Carnegie Mellon University                        ##
##                      Copyright (c) 2012-2013                          ##
##                        All Rights Reserved.                           ##
##                                                                       ##
##  Permission is hereby granted, free of charge, to use and distribute  ##
##  this software and its documentation without restriction, including   ##
##  without limitation the rights to use, copy, modify, merge, publish,  ##
##  distribute, sublicense, and/or sell copies of this work, and to      ##
##  permit persons to whom this work is furnished to do so, subject to   ##
##  the following conditions:                                            ##
##   1. The code must retain the above copyright notice, this list of    ##
##      conditions and the following disclaimer.                         ##
##   2. Any modifications must be clearly marked as such.                ##
##   3. Original authors' names are not deleted.                         ##
##   4. The authors' names are not used to endorse or promote products   ##
##      derived from this software without specific prior written        ##
##      permission.                                                      ##
##                                                                       ##
##  CARNEGIE MELLON UNIVERSITY AND THE CONTRIBUTORS TO THIS WORK         ##
##  DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING      ##
##  ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT   ##
##  SHALL CARNEGIE MELLON UNIVERSITY NOR THE CONTRIBUTORS BE LIABLE      ##
##  FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES    ##
##  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN   ##
##  AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,          ##
##  ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF       ##
##  THIS SOFTWARE.                                                       ##
##                                                                       ##
###########################################################################
##                                                                       ##
##  Author: Alan W Black (awb@cs.cmu.edu) Aug 2012                       ##
##                                                                       ##
###########################################################################
##  Search for potential digraphs: assumes standard voice is built       ##
###########################################################################

LANG=C; export LANG

if [ ! "$ESTDIR" ]
then
   echo "environment variable ESTDIR is unset"
   echo "set it to your local speech tools directory e.g."
   echo '   bash$ export ESTDIR=/home/awb/projects/speech_tools/'
   echo or
   echo '   csh% setenv ESTDIR /home/awb/projects/speech_tools/'
   exit 1
fi

if [ ! "$FESTVOXDIR" ]
then
   echo "environment variable FESTVOXDIR is unset"
   echo "set it to your local festvox directory e.g."
   echo '   bash$ export FESTVOXDIR=/home/awb/projects/festvox/'
   echo or
   echo '   csh% setenv FESTVOXDIR /home/awb/projects/festvox/'
   exit 1
fi

if [ ! -d dg ]
then
   mkdir dg
fi

if [ $# = 0 ]
then
   $0 do_dg_all
   exit 0
fi

if [ $1 = "do_dg_all" ]
then
    # Find digraphs to test
    $0 find_digraphs etc/txt.done.data
    # Test each digraph 
    $0 dg_passes 

    exit 0
fi

if [ $1 = "find_digraphs" ]
then
    echo Finding digraphs
    cat $2 |
    sed 's/^[^"]*"//g;s/"[ ].*)[ ]*$//;s/")$//' |
    awk '{for (i=1; i<=NF; i++) printf("\"%s\"\n",$i)}' >dg_$$.words
    $ESTDIR/../festival/bin/festival -b '(mapcar (lambda (x) (format t "%l\n" (utf8explode x)) t) (load "'dg_$$.words'" t))' | 
    sed 's/^(//;s/)$//;s/"//g' |
    awk '{for (i=1; i<NF; i++)
          {
             d=sprintf("%s%s",$i,$(i+1));
             freq[d]++;
          }}
         END {for (dg in freq)
                 printf("%d %s\n",freq[dg],dg); }' |
    sort -nr |
    awk '{if (NR < 200) printf("\"%s\"\n",$2)}' >digraphs.test
    rm -f dg_$$.words

    exit 0
fi

if [ $1 = "dg_passes" ]
then
   echo 10 >bestscore 
   rm -f good_digraphs.scm
   rm -f bad_digraphs.scm
   touch good_digraphs.scm
   rm -f all.scores

   (echo base ; cat digraphs.test) |
   while read digraph
   do
      echo $digraph >test_digraph.scm

      $0 dg_build 
     
      newscore=`grep "^MCD" dg/test/cgp_dg/scores | awk '{print $3}'`
      score=`cat bestscore`
      better=`echo $newscore $score | awk '{if ($1 < $2) print 1; else print 0}'`
      if [ $better = 1 ]
      then
         echo $newscore >bestscore
         echo $digraph >>good_digraphs.scm
         echo $digraph $newscore $newscore good >>all.scores
      else
         echo $digraph >>bad_digraphs.scm
         echo $digraph $newscore $score bad >>all.scores
      fi
   done

   exit 0
fi 

if [ $1 = "dg_passes_mp" ]
then
   echo Find digraphs with random selection
   rm -f good_digraphs.scm
   touch good_digraphs.scm
   rm -f all.scores

   echo 1 50 |
   awk '{for (i=$1; i<=$2; i++)
            printf("%02d\n",i);}' |
   while read pass
   do
      cat digraphs.test |
      awk 'BEGIN {srand()}
           { if (rand() > 0.80)
                print $1 }' >test_digraph.scm

      $0 dg_build 
      
      # Is it better ?
      newscore=`grep "^MCD" dg/test/cgp_dg/scores | awk '{print $3}'`
      cat test_digraph.scm |
      awk '{print $1,"'$pass'",'$newscore'}' >test_$pass.scores
      cat test_$pass.scores >>raw.scores
      cat raw.scores |
      awk '{ncount[$1] += 1;
            ntot[$1] += $3}
           END { for (dg in ntot)
                    printf("%s %f %d\n",dg,ntot[dg]/ncount[dg],ncount[dg]) }' |
      sort --key=2 -n >all.$pass.scores
      cp -pr all.$pass.scores all.scores   

   done

   exit
fi

if [ $1 = "dg_build" ]
then
   NUMUTTS=500
   rm -rf dg
   mkdir dg
   cd dg
   . ../etc/voice.defs

   $FESTVOXDIR/src/clustergen/setup_cg $FV_INST $FV_LANG $FV_NAME
   ln ../wav/*.wav wav
   cp -pr ../mcep_deltas .
   cp -pr ../v v
   head -$NUMUTTS ../etc/txt.done.data.train >etc/txt.done.data

   cat ../test_digraph.scm >digraph_list.scm
   tac ../good_digraphs.scm >>digraph_list.scm

   cp -pr digraph_list.scm extra_chars

   $FESTVOXDIR/src/grapheme/make_cg_grapheme

   ./bin/do_build parallel build_prompts 
   ./bin/do_build label
   ./bin/do_clustergen generate_statenames
   ./bin/do_clustergen generate_filters
   ./bin/do_clustergen parallel build_utts
   ./bin/do_clustergen parallel f0
   ./bin/do_clustergen parallel combine_coeffs_v
   ./bin/do_clustergen parallel cluster etc/txt.done.data
   ./bin/do_clustergen cg_test mcdf0 cgp_dg etc/txt.done.data

   exit 0   
fi

exit



if [ $1 = "rebuild" ]
then
   ./bin/do_clustergen parallel cluster etc/txt.done.data.train
   ./bin/do_clustergen cg_test mcdf0 cgp_ws etc/txt.done.data.test >mcd-ws.out

   exit 0
fi
