private int MergeDuplicates() { int numMerged = 0; for (int i = 0; i < latticeWords.Count - 1; i++) { HTKLatticeReader.LatticeWord first = latticeWords[i]; for (int j = i + 1; j < latticeWords.Count; j++) { HTKLatticeReader.LatticeWord second = latticeWords[j]; if (first.Equals(second)) { if (Debug) { log.Info("removed duplicate"); } first.Merge(second); latticeWords.Remove(j); wordsStartAt[second.startNode].Remove(second); wordsEndAt[second.endNode].Remove(second); for (int k = second.startNode; k <= second.endNode; k++) { wordsAtTime[k].Remove(second); } numMerged++; j--; } } } return(numMerged); }
private void ChangeEndTimes(IList <HTKLatticeReader.LatticeWord> words, int newEndTime) { List <HTKLatticeReader.LatticeWord> toRemove = new List <HTKLatticeReader.LatticeWord>(); foreach (HTKLatticeReader.LatticeWord lw in words) { latticeWords.Remove(lw); int oldEndTime = lw.endNode; lw.endNode = newEndTime; if (latticeWords.Contains(lw)) { if (Debug) { log.Info("duplicate found"); } HTKLatticeReader.LatticeWord twin = latticeWords[latticeWords.IndexOf(lw)]; // assert (twin != lw) ; lw.endNode = oldEndTime; twin.Merge(lw); wordsStartAt[lw.startNode].Remove(lw); //wordsEndAt[lw.endNode].remove(lw); toRemove.Add(lw); for (int i = lw.startNode; i <= lw.endNode; i++) { wordsAtTime[i].Remove(lw); } } else { if (oldEndTime > newEndTime) { for (int i = newEndTime + 1; i <= oldEndTime; i++) { wordsAtTime[i].Remove(lw); } } else { for (int i = oldEndTime + 1; i <= newEndTime; i++) { wordsAtTime[i].Add(lw); } } latticeWords.Add(lw); if (oldEndTime != newEndTime) { //wordsEndAt[oldEndTime].remove(lw); toRemove.Add(lw); wordsEndAt[newEndTime].Add(lw); } } } words.RemoveAll(toRemove); }
private void RemoveRedundency() { bool changed = true; while (changed) { changed = false; foreach (List <HTKLatticeReader.LatticeWord> aWordsAtTime in wordsAtTime) { if (aWordsAtTime.Count < 2) { continue; } for (int j = 0; j < aWordsAtTime.Count - 1; j++) { HTKLatticeReader.LatticeWord w1 = aWordsAtTime[j]; for (int k = j + 1; k < aWordsAtTime.Count; k++) { HTKLatticeReader.LatticeWord w2 = aWordsAtTime[k]; if (Sharpen.Runtime.EqualsIgnoreCase(w1.word, w2.word)) { if (RemoveRedundentPair(w1, w2)) { //int numMerged = mergeDuplicates(); //if (DEBUG) { log.info("merged " + numMerged + " identical entries."); } changed = true; //printWords(); //j--; goto INNER_continue; } } } INNER_continue :; } INNER_break :; } } }
private double GetProb(HTKLatticeReader.LatticeWord lw) { return(lw.am * 100.0 + lw.lm); }
/// <exception cref="System.Exception"/> private void ReadInput(BufferedReader @in) { // GET RID OF COMMENT LINES string line = @in.ReadLine(); while (line.Trim().StartsWith("#")) { line = @in.ReadLine(); } // READ LATTICE latticeWords = new List <HTKLatticeReader.LatticeWord>(); Pattern wordLinePattern = Pattern.Compile("(\\d+)\\s+(\\d+)\\s+lm=(-?\\d+\\.\\d+),am=(-?\\d+\\.\\d+)\\s+([^( ]+)(?:\\((\\d+)\\))?.*"); Matcher wordLineMatcher = wordLinePattern.Matcher(line); while (wordLineMatcher.Matches()) { int startNode = System.Convert.ToInt32(wordLineMatcher.Group(1)) - 1; int endNode = System.Convert.ToInt32(wordLineMatcher.Group(2)) - 1; double lm = double.ParseDouble(wordLineMatcher.Group(3)); double am = double.ParseDouble(wordLineMatcher.Group(4)); string word = wordLineMatcher.Group(5).ToLower(); string pronun = wordLineMatcher.Group(6); if (Sharpen.Runtime.EqualsIgnoreCase(word, "<s>")) { line = @in.ReadLine(); wordLineMatcher = wordLinePattern.Matcher(line); continue; } if (Sharpen.Runtime.EqualsIgnoreCase(word, "</s>")) { word = LexiconConstants.Boundary; } int pronunciation; if (pronun == null) { pronunciation = 0; } else { pronunciation = System.Convert.ToInt32(pronun); } HTKLatticeReader.LatticeWord lw = new HTKLatticeReader.LatticeWord(word, startNode, endNode, lm, am, pronunciation, mergeType); if (Debug) { log.Info(lw); } latticeWords.Add(lw); line = @in.ReadLine(); wordLineMatcher = wordLinePattern.Matcher(line); } // GET NUMBER OF NODES numStates = System.Convert.ToInt32(line.Trim()); if (Debug) { log.Info(numStates); } // READ NODE TIMES nodeTimes = new int[numStates]; Pattern nodeTimePattern = Pattern.Compile("(\\d+)\\s+t=(\\d+)\\s*"); Matcher nodeTimeMatcher; for (int i = 0; i < numStates; i++) { nodeTimeMatcher = nodeTimePattern.Matcher(@in.ReadLine()); if (!nodeTimeMatcher.Matches()) { log.Info("Input File Error"); System.Environment.Exit(1); } // assert ((Integer.parseInt(nodeTimeMatcher.group(1))-1) == i) ; nodeTimes[i] = System.Convert.ToInt32(nodeTimeMatcher.Group(2)); if (Debug) { log.Info(i + "\tt=" + nodeTimes[i]); } } }
//return; private bool RemoveRedundentPair(HTKLatticeReader.LatticeWord w1, HTKLatticeReader.LatticeWord w2) { if (Debug) { log.Info("trying to remove:"); log.Info(w1); log.Info(w2); } int w1Start = w1.startNode; int w2Start = w2.startNode; int w1End = w1.endNode; int w2End = w2.endNode; // we must pick new start and end times that are legal int newStart; int oldStart; if (w1Start < w2Start) { newStart = w2Start; oldStart = w1Start; } else { newStart = w1Start; oldStart = w2Start; } int newEnd; int oldEnd; if (w1End < w2End) { newEnd = w1End; oldEnd = w2End; } else { newEnd = w2End; oldEnd = w1End; } // check legality (illegality not guarenteed) foreach (HTKLatticeReader.LatticeWord lw in wordsStartAt[oldStart]) { if (lw.endNode < newStart || ((lw.endNode == newStart) && (lw.endNode != lw.startNode))) { if (Debug) { log.Info("failed"); } return(false); } } foreach (HTKLatticeReader.LatticeWord lw_1 in wordsEndAt[oldEnd]) { if (lw_1.startNode > newEnd || ((lw_1.startNode == newEnd) && (lw_1.endNode != lw_1.startNode))) { if (Debug) { log.Info("failed"); } return(false); } } // change start/end times of adjacent entries ChangeStartTimes(wordsStartAt[oldEnd], newEnd); ChangeEndTimes(wordsEndAt[oldStart], newStart); // change start/end times of words adjacent to adjacent entries ChangeStartTimes(wordsStartAt[oldStart], newStart); ChangeEndTimes(wordsEndAt[oldEnd], newEnd); if (Debug) { log.Info("succeeded"); } return(true); }