#!/bin/sh
###########################################################################
##                                                                       ##
##                   Language Technologies Institute                     ##
##                     Carnegie Mellon University                        ##
##                         Copyright (c) 2014                            ##
##                        All Rights Reserved.                           ##
##                                                                       ##
##  Permission is hereby granted, free of charge, to use and distribute  ##
##  this software and its documentation without restriction, including   ##
##  without limitation the rights to use, copy, modify, merge, publish,  ##
##  distribute, sublicense, and/or sell copies of this work, and to      ##
##  permit persons to whom this work is furnished to do so, subject to   ##
##  the following conditions:                                            ##
##   1. The code must retain the above copyright notice, this list of    ##
##      conditions and the following disclaimer.                         ##
##   2. Any modifications must be clearly marked as such.                ##
##   3. Original authors' names are not deleted.                         ##
##   4. The authors' names are not used to endorse or promote products   ##
##      derived from this software without specific prior written        ##
##      permission.                                                      ##
##                                                                       ##
##  CARNEGIE MELLON UNIVERSITY AND THE CONTRIBUTORS TO THIS WORK         ##
##  DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING      ##
##  ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT   ##
##  SHALL CARNEGIE MELLON UNIVERSITY NOR THE CONTRIBUTORS BE LIABLE      ##
##  FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES    ##
##  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN   ##
##  AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,          ##
##  ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF       ##
##  THIS SOFTWARE.                                                       ##
##                                                                       ##
###########################################################################
##
##  Replacement wagon call that also does random forests
##
##                                                                       ##
###########################################################################
##                                                                       ##
## Example
##
## -track_feats 1-55 -vertex_output mean -desc festival/clunits/mcep.desc -data 'festival/feats/z_3.feats' -test 'festival/feats/z_3.feats' -balance 0 -track 'festival/disttabs/z_3.mcep' -stop 15 -output 'festival/trees/z_3_mcep.tree'
##
##                                                                       ##
###########################################################################

WAGON=$ESTDIR/bin/wagon
#
#
WGN_TF=${1}
WGN_TFV=${2}
WGN_VO=${3}
WGN_VOV=${4}
WGN_desc=${5}
WGN_descv=${6}
WGN_data=${7}
WGN_datav=${8}
WGN_test=${9}
WGN_testv=${10}
WGN_bal=${11}
WGN_balv=${12}
WGN_track=${13}
WGN_trackv=${14}
WGN_stop=${15}
WGN_stopv=${16}
WGN_tree=${17}
WGN_treev=${18}

echo $*

# Stuff
WD=wagon_rf_$$

mkdir $WD

if [ ! -d all_rf ]
then
   mkdir all_rf
fi

#  Copy in description file and datafile
cat $WGN_descv >$WD/desc.base
cat $WGN_datav >$WD/data

# Get some randomness
SEED=`echo $$ | awk '{ print $1%10000 }'`
echo $SEED >$WD/seed

#  Build models N times
Ntimes=20
echo 1 $Ntimes |
awk '{for (i=$1; i<=$2; i++)
         printf("%02d\n",i)}' |
while read i
do
   echo Iteration $i $WGN_treev
   SSEED=`cat $WD/seed`
   # make random description file
   # Keep first two features always
   cat $WD/desc.base |
   awk 'BEGIN {for (i=1;i<'$SSEED'; i++)
                      randint(10)}
        function randint(n) { return int(n * rand()) }
        {if (NR < 3)
         {
            print $0
         }
         else if (substr($0,1,1) == "(")
         {
            p = randint(100);
            if (p < 40)
            {
               printf("%s %s ignore ",$1,$2);
               for (i=3; i<=NF; i++)
                  printf("%s ",$i);
               printf("\n");
            }
            else
               print $0
         }
         else
            print $0
        }' >$WD/desc.$i

   echo $SSEED |
   awk 'function randint(n) { return int(n * rand()) }
        {for (i=1;i<$1; i++)
                 n=randint(10000);
         print n}' >$WD/seed

    # Build a tree (note we don't use our $WD/data.test.$i here yet)
    $WAGON $WGN_TF $WGN_TFV $WGN_VO $WGN_VOV $WGN_desc $WD/desc.$i $WGN_data $WD/data $WGN_test $WD/data $WGN_bal $WGN_balv $WGN_track $WGN_trackv $WGN_stop $WGN_stopv $WGN_tree $WD/tree.$i

    if [ ! -d rf_models/trees_$i ]
    then
       mkdir -p rf_models/trees_$i
    fi

    cp -pr $WD/tree.$i $WGN_treev
    cp -pr $WGN_treev rf_models/trees_$i

done


# Collect them together per unittype, rather than per pass 
# (not used yet)
N0times=`echo $Ntimes | awk '{printf("%02d\n",$1)}'`

cp -pr $WD/tree.$N0times $WGN_treev
UNITNAME=`basename $WGN_treev | sed 's/_mcep.tree//'`

if [ -d all_rf/$UNITNAME ]
then
   rm -rf all_rf/$UNITNAME
fi

mv $WD all_rf/$UNITNAME

exit

##
##  Debuging info .... what a call looks like
##

# Original call
#$WAGON $WGN_TF $WGN_TFV $WGN_VO $WGN_VOV $WGN_desc $WGN_descv $WGN_data $WGN_datav $WGN_test $WGN_testv $WGN_bal $WGN_balv $WGN_track $WGN_trackv $WGN_stop $WGN_stopv $WGN_tree $WGN_treev 


$ESTDIR/bin/wagon  -track_feats 1-50 -vertex_output mean -desc festival/clunits/mcep.desc -data 'festival/feats/ih_2.feats' -test 'festival/feats/ih_2.feats' -balance 0 -track 'festival/disttabs/ih_2.mcep' -stop 50 -output 'festival/trees/ih_2_mcep.tree' 

