#!/bin/sh
#####################################################-*-mode:shell-script-*-
##                                                                       ##
##                     Carnegie Mellon University                        ##
##                         Copyright (c) 2012                            ##
##                        All Rights Reserved.                           ##
##                                                                       ##
##  Permission is hereby granted, free of charge, to use and distribute  ##
##  this software and its documentation without restriction, including   ##
##  without limitation the rights to use, copy, modify, merge, publish,  ##
##  distribute, sublicense, and/or sell copies of this work, and to      ##
##  permit persons to whom this work is furnished to do so, subject to   ##
##  the following conditions:                                            ##
##   1. The code must retain the above copyright notice, this list of    ##
##      conditions and the following disclaimer.                         ##
##   2. Any modifications must be clearly marked as such.                ##
##   3. Original authors' names are not deleted.                         ##
##   4. The authors' names are not used to endorse or promote products   ##
##      derived from this software without specific prior written        ##
##      permission.                                                      ##
##                                                                       ##
##  CARNEGIE MELLON UNIVERSITY AND THE CONTRIBUTORS TO THIS WORK         ##
##  DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING      ##
##  ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT   ##
##  SHALL CARNEGIE MELLON UNIVERSITY NOR THE CONTRIBUTORS BE LIABLE      ##
##  FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES    ##
##  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN   ##
##  AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,          ##
##  ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF       ##
##  THIS SOFTWARE.                                                       ##
##                                                                       ##
###########################################################################
##                                                                       ##
##  Author: Alan W Black (awb@cs.cmu.edu) Aug 2012                       ##
##                                                                       ##
###########################################################################
##  Find optimal number of hmms states in built cg voice                 ##
###########################################################################

LANG=C; export LANG

if [ ! "$ESTDIR" ]
then
   echo "environment variable ESTDIR is unset"
   echo "set it to your local speech tools directory e.g."
   echo '   bash$ export ESTDIR=/home/awb/projects/speech_tools/'
   echo or
   echo '   csh% setenv ESTDIR /home/awb/projects/speech_tools/'
   exit 1
fi

if [ ! "$FESTVOXDIR" ]
then
   echo "environment variable FESTVOXDIR is unset"
   echo "set it to your local festvox directory e.g."
   echo '   bash$ export FESTVOXDIR=/home/awb/projects/festvox/'
   echo or
   echo '   csh% setenv FESTVOXDIR /home/awb/projects/festvox/'
   exit 1
fi

if [ $# = 0 ]
then
   $0 mp 
   $0 extract_ph_list

   $0 rebuild

   exit 0
fi

if [ $1 = "mp" ]
then
   # Multi-pass build to find best number of hmm states
   if [ ! -d ns ]
   then
      mkdir ns
   fi
   rm -f ns/all.scores
   rm -f ns/raw.scores

   rm -f ns/all.*.scores
   rm -f ns/test_*.scores

   # Do 50 passes
   echo 1 50 |
   awk '{for (i=$1; i<=$2; i++)
            printf("%02d\n",i);}' |
   while read pass
   do
      cat ehmm/etc/ph_list |
      awk 'BEGIN {srand()}
           { if (1 == 1)
             { 
                xxx=rand();
                if (xxx < 0.2)
                   print $1,$2-1
                else if (xxx > 0.8)
                   print $1,$2+1
                else
                   print $1,$2
             }
           }' >ns/ph_list

      $0 build

      # Is it better ?
      newscore=`grep "^MCD" ns_voice/test/cgp_ns/scores | awk '{print $3}'`
      cat ns/ph_list |
      awk '{printf("%s_%s %s %s\n",$1,$2,"'$pass'",'$newscore')}' >ns/test_$pass.scores
      cat ns/test_$pass.scores >>ns/raw.scores
      cat ns/raw.scores |
      awk '{ncount[$1] += 1;
            ntot[$1] += $3}
           END { for (dg in ntot)
                    printf("%s %f %d\n",dg,ntot[dg]/ncount[dg],ncount[dg]) }' |
      sort --key=2 -n >ns/all.$pass.scores
      cp -pr ns/all.$pass.scores ns/all.scores   
   done

   exit 0
fi

if [ $1 = "extract_ph_list" ]
then
   # Extract the best ph_list ready for rebuild
   if [ -f etc/ph_list ]
   then
      mv etc/ph_list etc/ph_list.old
   fi

   for i in `sed 's/_/ /' ns/all.scores | awk '{print $1}' | sort -u `
   do 
      grep '^'${i}_ ns/all.scores |
      sed 's/_[0-9]/ &/;s/ _/ /' |
      awk '{if (NR == 1) print $1,$2}'
   done >etc/ph_list

   exit 0
fi

if [ $1 = "build" ]
then
   # Build and test with new ph_list
   rm -rf ns_voice
   mkdir ns_voice
   cd ns_voice

   . ../etc/voice.defs

   $FESTVOXDIR/src/clustergen/setup_cg $FV_INST $FV_LANG $FV_NAME
   ln ../wav/*.wav wav
   ln -s ../mcep_deltas mcep_deltas
   ln -s ../v v
   ln ../prompt-utt/* prompt-utt
   # Only do tests on actual train set, but split it into train.train train.test
   cat ../etc/txt.done.data.train >etc/txt.done.data
   cp -pr ../etc/f0.params etc
   ./bin/traintest etc/txt.done.data
   cp -pr ../ns/ph_list etc/ph_list
   if [ -f ../festvox/${FV_INST}_${FV_LANG}_${FV_NAME}_char_phone_map.scm ]
   then
      # A grapheme based voice
      $FESTVOXDIR/src/grapheme/make_cg_grapheme
   fi

   # Relabel with random number of states
   ./bin/do_build label

   ./bin/do_clustergen generate_statenames
   ./bin/do_clustergen generate_filters
   ./bin/do_clustergen parallel build_utts
   ./bin/do_clustergen parallel f0_v_sptk
   ./bin/do_clustergen parallel combine_coeffs_v
   ./bin/do_clustergen parallel cluster etc/txt.done.data.train
   ./bin/do_clustergen cg_test mcdf0 cgp_ns etc/txt.done.data.test

   # Build done
   cd ..
   exit 0 
fi

if [ $1 = "rebuild" ]
then
   # We have a new set of hmm states, so rebuild base voice with them

   if [ ! -d ehmm_base ]
   then
      mv ehmm ehmm_base
   else
      rm -rf ehmm
   fi

   ./bin/do_build label
   ./bin/do_clustergen generate_statenames
   ./bin/do_clustergen parallel build_utts
   ./bin/do_clustergen parallel f0_v_sptk
   ./bin/do_clustergen parallel combine_coeffs_v
   ./bin/do_clustergen parallel cluster etc/txt.done.data.train
   ./bin/do_clustergen dur etc/txt.done.data.train
   mv dur.dur.S25.out dur.dur.S25.out-ns
   ./bin/do_clustergen cg_test mcdf0 cgp_ns etc/txt.done.data.test >mcd-ns.out

   exit 0
fi



   