Source code for bob.kaldi.hmm

#!/usr/bin/env python
# vim: set fileencoding=utf-8 :
# Milos Cernak <milos.cernak@idiap.ch>
# Septebmer 9, 2017

import os

import numpy as np

from . import io
from subprocess import PIPE, Popen
from os.path import isfile
import tempfile
import logging
import pkg_resources
import shutil

import bob.kaldi

logger = logging.getLogger(__name__)


[docs]def train_mono(feats, trans_words, fst_L, topology_in, shared_phones='', numgauss=1000, power=0.25, num_iters=40, beam=6): """Monophone model training. Parameters ---------- feats: dict The input cepstral features (2D array of 32-bit floats). trans_words: str Text transcription of the `feats` (the word labels) fst_L: str A filename of the lexicon compiled as FST. topology_in : str A topology file that specifies 3-state left-to-right HMM, and default transition probs. shared_phones : :obj:`str`, optional A filename of the of phones whose pdfs should be shared. numgauss : :obj:`int`, optional A number of Gaussians of GMMs. power : :obj:`float`, optional Power to allocate Gaussians to states. num_iters : :obj:`int`, optional A number of iteration for re-estimation of GMMs. beam : :obj:`float`, optional Decoding beam used in alignment. Returns ------- str The mono-phones acoustic models. """ feat_dim = -1 with tempfile.NamedTemporaryFile(delete=False, suffix='.ark') as arkf: with open(arkf.name, 'wb') as f: for k in feats.keys(): uttid = k io.write_mat(f, feats[k], key=uttid.encode('utf-8')) if feat_dim < 1: (m, feat_dim) = feats[k].shape with tempfile.NamedTemporaryFile(delete=False, suffix='.top') as topof: with open(topof.name, 'wt') as f: f.write(topology_in) with tempfile.NamedTemporaryFile(delete=False, suffix='.tra') as traf: with open(traf.name, 'wt') as f: f.write(trans_words) binary1 = 'gmm-init-mono' cmd1 = [binary1] binary2 = 'compile-train-graphs' cmd2 = [binary2] binary3 = 'align-equal-compiled' cmd3 = [binary3] binary4 = 'gmm-acc-stats-ali' cmd4 = [binary4] binary5 = 'gmm-est' cmd5 = [binary5] binary6 = 'gmm-align-compiled' with tempfile.NamedTemporaryFile(suffix='.mdl') as initf, \ tempfile.NamedTemporaryFile(suffix='.tree') as treef, \ tempfile.NamedTemporaryFile(suffix='.fst') as fstf, \ tempfile.NamedTemporaryFile(suffix='.ali') as alif, \ tempfile.NamedTemporaryFile(suffix='.acc') as accf, \ tempfile.NamedTemporaryFile(suffix='.est') as estf: if shared_phones != '': cmd1 += [ '--shared-phones=' + str(shared_phones), ] cmd1 += [ '--train-feats=ark:copy-feats ark:'+arkf.name+' ark:-|', topof.name, str(feat_dim), initf.name, treef.name, ] with tempfile.NamedTemporaryFile(suffix='.log') as logfile: pipe1 = Popen(cmd1, stdin=PIPE, stdout=PIPE, stderr=logfile) pipe1.communicate() with open(logfile.name) as fp: logtxt = fp.read() logger.debug("%s", logtxt) cmd2 += [ treef.name, initf.name, str(fst_L), 'ark,t:' + traf.name, 'ark,t:' + fstf.name, ] with tempfile.NamedTemporaryFile(suffix='.log') as logfile: pipe2 = Popen(cmd2, stdin=PIPE, stdout=PIPE, stderr=logfile) pipe2.communicate() with open(logfile.name) as fp: logtxt = fp.read() logger.debug("%s", logtxt) cmd3 += [ 'ark,t:' + fstf.name, 'ark:' + arkf.name, 'ark,t:' + alif.name, ] with tempfile.NamedTemporaryFile(suffix='.log') as logfile: pipe3 = Popen(cmd3, stdin=PIPE, stdout=PIPE, stderr=logfile) pipe3.communicate() with open(logfile.name) as fp: logtxt = fp.read() logger.debug("%s", logtxt) cmd4 += [ initf.name, 'ark:' + arkf.name, 'ark,t:' + alif.name, accf.name, ] with tempfile.NamedTemporaryFile(suffix='.log') as logfile: pipe4 = Popen(cmd4, stdin=PIPE, stdout=PIPE, stderr=logfile) pipe4.communicate() with open(logfile.name) as fp: logtxt = fp.read() logger.debug("%s", logtxt) cmd5 += [ '--min-gaussian-occupancy=3', '--mix-up=' + str (numgauss), '--power=' + str(power), '--binary=false', initf.name, accf.name, estf.name, ] with tempfile.NamedTemporaryFile(suffix='.log') as logfile: pipe5 = Popen(cmd5, stdin=PIPE, stdout=PIPE, stderr=logfile) pipe5.communicate() with open(logfile.name) as fp: logtxt = fp.read() logger.debug("%s", logtxt) inModel=estf.name for x in range(0, num_iters): logger.info("Training pass " + str(x)) cmd6 = [ binary6, '--transition-scale=1.0', '--acoustic-scale=0.1', '--self-loop-scale=0.1', '--beam=' + str(beam), '--retry-beam=' + str(beam*4), '--careful=false', inModel, 'ark,t:' + fstf.name, 'ark:' + arkf.name, 'ark:' + alif.name, ] with tempfile.NamedTemporaryFile(suffix='.log') as logfile: pipe6 = Popen(cmd6, stdin=PIPE, stdout=PIPE, stderr=logfile) pipe6.communicate() with open(logfile.name) as fp: logtxt = fp.read() logger.debug("%s", logtxt) cmd7 = [ binary4, inModel, 'ark:' + arkf.name, 'ark:' + alif.name, accf.name, ] with tempfile.NamedTemporaryFile(suffix='.log') as logfile: pipe7 = Popen(cmd7, stdin=PIPE, stdout=PIPE, stderr=logfile) pipe7.communicate() with open(logfile.name) as fp: logtxt = fp.read() logger.debug("%s", logtxt) with tempfile.NamedTemporaryFile(delete=False, suffix='.est') as itf: cmd8 = [ binary5, '--binary=false', '--mix-up=' + str (numgauss), '--power=' + str(power), inModel, accf.name, itf.name, ] with tempfile.NamedTemporaryFile(suffix='.log') as logfile: pipe8 = Popen(cmd8, stdin=PIPE, stdout=PIPE, stderr=logfile) pipe8.communicate() with open(logfile.name) as fp: logtxt = fp.read() logger.debug("%s", logtxt) if x>0: # do not remove estf.name; just itf.name os.unlink(inModel) inModel=itf.name # shutil.copyfile(inModel,'final.mdl') os.unlink(arkf.name) os.unlink(topof.name) os.unlink(traf.name) with open(inModel) as fp: hmmtxt = fp.read() return hmmtxt