private void Align(List <string> database, List <string> query, params int[] result) { var aligner = new LongTextAligner(database, 1); int[] alignment = aligner.Align(query); Assert.IsTrue(Helper.Contains(Utilities.AsList(alignment), result)); }
public TextAlignerSmallTest() { var url = new URL("transcription-small.txt"); var words = new List <String>(); var fileString = File.ReadAllText(url.Path); words.AddRange(fileString.Split(' ', '\n', '\r')); words.RemoveAll(item => item.Length == 0); _aligner = new LongTextAligner(words, 2); }
public List <WordResult> Align(FileInfo audioUrl, List <string> sentenceTranscript) { var transcript = SentenceToWords(sentenceTranscript); var aligner = new LongTextAligner(transcript, TupleSize); var alignedWords = new Dictionary <int, WordResult>(); var ranges = new LinkedList <Range>(); //var texts = new ArrayDeque(); //var timeFrames = new ArrayDeque(); var texts = new LinkedList <List <string> >(); var timeFrames = new LinkedList <TimeFrame>(); ranges.AddLast(new Range(0, transcript.Count)); texts.Offer(transcript); TimeFrame totalTimeFrame = TimeFrame.Infinite; timeFrames.Offer(totalTimeFrame); long lastFrame = TimeFrame.Infinite.End; for (int i = 0; i < 4; i++) { if (i == 3) { _context.SetLocalProperty("decoder->searchManager", "alignerSearchManager"); } while (texts.Count != 0) { Debug.Assert(texts.Count == ranges.Count); Debug.Assert(texts.Count == timeFrames.Count); var text = texts.Poll(); var frame = timeFrames.Poll(); var range = ranges.Poll(); if (i < 3 && texts.Count < MinLmAlignSize) { continue; } this.LogInfo("Aligning frame " + frame + " to text " + text + " range " + range); if (i < 3) { _languageModel.SetText(text); } _recognizer.Allocate(); if (i == 3) { _grammar.SetWords(text); } _context.SetSpeechSource(audioUrl.OpenRead(), frame); var hypothesis = new List <WordResult>(); Result speechResult; while (null != (speechResult = _recognizer.Recognize())) { hypothesis.AddRange(speechResult.GetTimedBestResult(false)); } if (i == 0) { if (hypothesis.Count > 0) { lastFrame = hypothesis[hypothesis.Count - 1].TimeFrame.End; } } var words = new List <string>(); foreach (WordResult wr in hypothesis) { words.Add(wr.Word.Spelling); } int[] alignment = aligner.Align(words, range); List <WordResult> results = hypothesis; this.LogInfo("Decoding result is " + results); // dumpAlignment(transcript, alignment, results); DumpAlignmentStats(transcript, alignment, results); for (int j = 0; j < alignment.Length; j++) { if (alignment[j] != -1) { alignedWords.Add(alignment[j], hypothesis[j]); } } _recognizer.Deallocate(); } ScheduleNextAlignment(transcript, alignedWords, ranges, texts, timeFrames, lastFrame); } return(new List <WordResult>(alignedWords.Values)); }
public static void main(string[] args) { URL audioUrl; string text; if (args.Length > 1) { audioUrl = new File(args[0]).toURI().toURL(); Scanner scanner = new Scanner(new File(args[1])); scanner.useDelimiter("\\Z"); text = scanner.next(); scanner.close(); } else { audioUrl = ClassLiteral <AlignerDemo> .Value.getResource("10001-90210-01803.wav"); text = "one zero zero zero one nine oh two one oh zero one eight zero three"; } string amPath = (args.Length <= 2) ? "resource:/edu/cmu/sphinx/models/en-us/en-us" : args[2]; string dictPath = (args.Length <= 3) ? "resource:/edu/cmu/sphinx/models/en-us/cmudict-en-us.dict" : args[3]; string g2pPath = (args.Length <= 4) ? null : args[4]; SpeechAligner speechAligner = new SpeechAligner(amPath, dictPath, g2pPath); List list = speechAligner.align(audioUrl, text); ArrayList arrayList = new ArrayList(); Iterator iterator = list.iterator(); while (iterator.hasNext()) { WordResult wordResult = (WordResult)iterator.next(); arrayList.add(wordResult.getWord().getSpelling()); } LongTextAligner longTextAligner = new LongTextAligner(arrayList, 2); List sentenceTranscript = speechAligner.getTokenizer().expand(text); List list2 = speechAligner.sentenceToWords(sentenceTranscript); int[] array = longTextAligner.align(list2); int num = -1; for (int i = 0; i < array.Length; i++) { if (array[i] == -1) { [email protected]("- %s\n", new object[] { list2.get(i) }); } else { if (array[i] - num > 1) { Iterator iterator2 = list.subList(num + 1, array[i]).iterator(); while (iterator2.hasNext()) { WordResult wordResult2 = (WordResult)iterator2.next(); [email protected]("+ %-25s [%s]\n", new object[] { wordResult2.getWord().getSpelling(), wordResult2.getTimeFrame() }); } } [email protected](" %-25s [%s]\n", new object[] { ((WordResult)list.get(array[i])).getWord().getSpelling(), ((WordResult)list.get(array[i])).getTimeFrame() }); num = array[i]; } } if (num >= 0 && list.size() - num > 1) { Iterator iterator3 = list.subList(num + 1, list.size()).iterator(); while (iterator3.hasNext()) { WordResult wordResult3 = (WordResult)iterator3.next(); [email protected]("+ %-25s [%s]\n", new object[] { wordResult3.getWord().getSpelling(), wordResult3.getTimeFrame() }); } } }
public virtual List align(URL audioUrl, List sentenceTranscript) { List list = this.sentenceToWords(sentenceTranscript); LongTextAligner longTextAligner = new LongTextAligner(list, 3); TreeMap treeMap = new TreeMap(); LinkedList linkedList = new LinkedList(); ArrayDeque arrayDeque = new ArrayDeque(); ArrayDeque arrayDeque2 = new ArrayDeque(); linkedList.offer(new Range(0, list.size())); arrayDeque.offer(list); TimeFrame _INFINITE = TimeFrame.__INFINITE; arrayDeque2.offer(_INFINITE); long end = TimeFrame.__INFINITE.getEnd(); this.languageModel.setText(sentenceTranscript); for (int i = 0; i < 4; i++) { if (i == 1) { this.context.setLocalProperty("decoder->searchManager", "alignerSearchManager"); } while (!arrayDeque.isEmpty()) { if (!SpeechAligner.assertionsDisabled && arrayDeque.size() != linkedList.size()) { throw new AssertionError(); } if (!SpeechAligner.assertionsDisabled && arrayDeque.size() != arrayDeque2.size()) { throw new AssertionError(); } List list2 = (List)arrayDeque.poll(); TimeFrame timeFrame = (TimeFrame)arrayDeque2.poll(); Range range = (Range)linkedList.poll(); this.logger.info(new StringBuilder().append("Aligning frame ").append(timeFrame).append(" to text ").append(list2).append(" range ").append(range).toString()); this.recognizer.allocate(); if (i >= 1) { this.grammar.setWords(list2); } InputStream inputStream = audioUrl.openStream(); this.context.setSpeechSource(inputStream, timeFrame); ArrayList arrayList = new ArrayList(); Result result; while (null != (result = this.recognizer.recognize())) { this.logger.info(new StringBuilder().append("Utterance result ").append(result.getTimedBestResult(true)).toString()); arrayList.addAll(result.getTimedBestResult(false)); } if (i == 0 && arrayList.size() > 0) { end = ((WordResult)arrayList.get(arrayList.size() - 1)).getTimeFrame().getEnd(); } ArrayList arrayList2 = new ArrayList(); Iterator iterator = arrayList.iterator(); while (iterator.hasNext()) { WordResult wordResult = (WordResult)iterator.next(); arrayList2.add(wordResult.getWord().getSpelling()); } int[] array = longTextAligner.align(arrayList2, range); ArrayList arrayList3 = arrayList; this.logger.info(new StringBuilder().append("Decoding result is ").append(arrayList3).toString()); this.dumpAlignmentStats(list, array, arrayList3); for (int j = 0; j < array.Length; j++) { if (array[j] != -1) { treeMap.put(Integer.valueOf(array[j]), arrayList.get(j)); } } inputStream.close(); this.recognizer.deallocate(); } this.scheduleNextAlignment(list, treeMap, linkedList, arrayDeque, arrayDeque2, end); } return(new ArrayList(treeMap.values())); }