#!/usr/bin/env python3

#
# This script adds the occasional "1" into a sequence of real "0"s in a wav file.
# this stops an bug in HCopy corrupting the energy track of MFCC files.
#

import getopt
import sys
import os
import signal
import re
import lsdir
from math import sqrt

#Usage
def usage():
      print("add_noise [-h] <wav> <utts.data file>")
      print("\t -h prints this help")
      print("\t wav_dir")
      print("\t utts.data file")

try:
    args = getopt.getopt(sys.argv[1:], 'h:',[])

except getopt.GetoptError:
    usage()
    sys.exit(1)

if (len(args[1]) < 2):
    usage()
    sys.exit(1)
#-------

try:
    estmoduledir = os.environ["EST_PYTHON"]
except KeyError:
    print("\n** environment variable EST_PYTHON is unset **\n")

sys.path.append( estmoduledir )
import est


# window and window shift (s)

## Main program
wavdir = sys.argv[1] + '/'
utts_file = sys.argv[2] 

if not os.path.isdir(wavdir):
    print("Usage: {} wav_dir utts_file".format(argv[0]))
    exit(1)

filelist = lsdir.listFiles( wavdir, utts_file )
#print "filelist ::: ", filelist

threshold = 50

for file in filelist:

    file = file + '.wav'  
    print("checking file :: {}".format(file))
    changed = 0
    count = 0

    wave = est.Wave()
    base = os.path.splitext(os.path.basename(file))[0]
    wave.load(file)

    frames = wave.num_samples()

    # find start.

    c = 0 
    while c < frames:
        if ( wave.a(c) == 0 ):
            count += 1
            if count > threshold:
                #print " %d data was: %d " % (c, wave.a(c))
                wave.set_a(c,0,1)
                #print "data is: %d\n" % wave.a(c)
                count = 0
                changed = 1
        else:
            count = 0
        c += 1
                
    if changed == 1:
        print("Fixing file: {}\n".format(file))
        wave.save(file)
