private void Align(List <string> database, List <string> query, params int[] result)
        {
            var aligner = new LongTextAligner(database, 1);

            int[] alignment = aligner.Align(query);
            Assert.IsTrue(Helper.Contains(Utilities.AsList(alignment), result));
        }
Example #2
0
        public TextAlignerSmallTest()
        {
            var url        = new URL("transcription-small.txt");
            var words      = new List <String>();
            var fileString = File.ReadAllText(url.Path);

            words.AddRange(fileString.Split(' ', '\n', '\r'));
            words.RemoveAll(item => item.Length == 0);
            _aligner = new LongTextAligner(words, 2);
        }
Example #3
0
        public List <WordResult> Align(FileInfo audioUrl, List <string> sentenceTranscript)
        {
            var transcript = SentenceToWords(sentenceTranscript);

            var aligner      = new LongTextAligner(transcript, TupleSize);
            var alignedWords = new Dictionary <int, WordResult>();
            var ranges       = new LinkedList <Range>();
            //var texts = new ArrayDeque();
            //var timeFrames = new ArrayDeque();
            var texts      = new LinkedList <List <string> >();
            var timeFrames = new LinkedList <TimeFrame>();

            ranges.AddLast(new Range(0, transcript.Count));
            texts.Offer(transcript);
            TimeFrame totalTimeFrame = TimeFrame.Infinite;

            timeFrames.Offer(totalTimeFrame);
            long lastFrame = TimeFrame.Infinite.End;

            for (int i = 0; i < 4; i++)
            {
                if (i == 3)
                {
                    _context.SetLocalProperty("decoder->searchManager", "alignerSearchManager");
                }

                while (texts.Count != 0)
                {
                    Debug.Assert(texts.Count == ranges.Count);
                    Debug.Assert(texts.Count == timeFrames.Count);

                    var text  = texts.Poll();
                    var frame = timeFrames.Poll();
                    var range = ranges.Poll();

                    if (i < 3 && texts.Count < MinLmAlignSize)
                    {
                        continue;
                    }

                    this.LogInfo("Aligning frame " + frame + " to text " + text + " range " + range);

                    if (i < 3)
                    {
                        _languageModel.SetText(text);
                    }

                    _recognizer.Allocate();

                    if (i == 3)
                    {
                        _grammar.SetWords(text);
                    }

                    _context.SetSpeechSource(audioUrl.OpenRead(), frame);

                    var    hypothesis = new List <WordResult>();
                    Result speechResult;
                    while (null != (speechResult = _recognizer.Recognize()))
                    {
                        hypothesis.AddRange(speechResult.GetTimedBestResult(false));
                    }

                    if (i == 0)
                    {
                        if (hypothesis.Count > 0)
                        {
                            lastFrame = hypothesis[hypothesis.Count - 1].TimeFrame.End;
                        }
                    }

                    var words = new List <string>();
                    foreach (WordResult wr in hypothesis)
                    {
                        words.Add(wr.Word.Spelling);
                    }
                    int[]             alignment = aligner.Align(words, range);
                    List <WordResult> results   = hypothesis;
                    this.LogInfo("Decoding result is " + results);

                    // dumpAlignment(transcript, alignment, results);
                    DumpAlignmentStats(transcript, alignment, results);

                    for (int j = 0; j < alignment.Length; j++)
                    {
                        if (alignment[j] != -1)
                        {
                            alignedWords.Add(alignment[j], hypothesis[j]);
                        }
                    }

                    _recognizer.Deallocate();
                }
                ScheduleNextAlignment(transcript, alignedWords, ranges, texts, timeFrames, lastFrame);
            }
            return(new List <WordResult>(alignedWords.Values));
        }
        public static void main(string[] args)
        {
            URL    audioUrl;
            string text;

            if (args.Length > 1)
            {
                audioUrl = new File(args[0]).toURI().toURL();
                Scanner scanner = new Scanner(new File(args[1]));
                scanner.useDelimiter("\\Z");
                text = scanner.next();
                scanner.close();
            }
            else
            {
                audioUrl = ClassLiteral <AlignerDemo> .Value.getResource("10001-90210-01803.wav");

                text = "one zero zero zero one nine oh two one oh zero one eight zero three";
            }
            string        amPath        = (args.Length <= 2) ? "resource:/edu/cmu/sphinx/models/en-us/en-us" : args[2];
            string        dictPath      = (args.Length <= 3) ? "resource:/edu/cmu/sphinx/models/en-us/cmudict-en-us.dict" : args[3];
            string        g2pPath       = (args.Length <= 4) ? null : args[4];
            SpeechAligner speechAligner = new SpeechAligner(amPath, dictPath, g2pPath);
            List          list          = speechAligner.align(audioUrl, text);
            ArrayList     arrayList     = new ArrayList();
            Iterator      iterator      = list.iterator();

            while (iterator.hasNext())
            {
                WordResult wordResult = (WordResult)iterator.next();
                arrayList.add(wordResult.getWord().getSpelling());
            }
            LongTextAligner longTextAligner    = new LongTextAligner(arrayList, 2);
            List            sentenceTranscript = speechAligner.getTokenizer().expand(text);
            List            list2 = speechAligner.sentenceToWords(sentenceTranscript);

            int[] array = longTextAligner.align(list2);
            int   num   = -1;

            for (int i = 0; i < array.Length; i++)
            {
                if (array[i] == -1)
                {
                    [email protected]("- %s\n", new object[]
                    {
                        list2.get(i)
                    });
                }
                else
                {
                    if (array[i] - num > 1)
                    {
                        Iterator iterator2 = list.subList(num + 1, array[i]).iterator();
                        while (iterator2.hasNext())
                        {
                            WordResult wordResult2 = (WordResult)iterator2.next();
                            [email protected]("+ %-25s [%s]\n", new object[]
                            {
                                wordResult2.getWord().getSpelling(),
                                wordResult2.getTimeFrame()
                            });
                        }
                    }
                    [email protected]("  %-25s [%s]\n", new object[]
                    {
                        ((WordResult)list.get(array[i])).getWord().getSpelling(),
                        ((WordResult)list.get(array[i])).getTimeFrame()
                    });
                    num = array[i];
                }
            }
            if (num >= 0 && list.size() - num > 1)
            {
                Iterator iterator3 = list.subList(num + 1, list.size()).iterator();
                while (iterator3.hasNext())
                {
                    WordResult wordResult3 = (WordResult)iterator3.next();
                    [email protected]("+ %-25s [%s]\n", new object[]
                    {
                        wordResult3.getWord().getSpelling(),
                        wordResult3.getTimeFrame()
                    });
                }
            }
        }
Example #5
0
        public virtual List align(URL audioUrl, List sentenceTranscript)
        {
            List            list            = this.sentenceToWords(sentenceTranscript);
            LongTextAligner longTextAligner = new LongTextAligner(list, 3);
            TreeMap         treeMap         = new TreeMap();
            LinkedList      linkedList      = new LinkedList();
            ArrayDeque      arrayDeque      = new ArrayDeque();
            ArrayDeque      arrayDeque2     = new ArrayDeque();

            linkedList.offer(new Range(0, list.size()));
            arrayDeque.offer(list);
            TimeFrame _INFINITE = TimeFrame.__INFINITE;

            arrayDeque2.offer(_INFINITE);
            long end = TimeFrame.__INFINITE.getEnd();

            this.languageModel.setText(sentenceTranscript);
            for (int i = 0; i < 4; i++)
            {
                if (i == 1)
                {
                    this.context.setLocalProperty("decoder->searchManager", "alignerSearchManager");
                }
                while (!arrayDeque.isEmpty())
                {
                    if (!SpeechAligner.assertionsDisabled && arrayDeque.size() != linkedList.size())
                    {
                        throw new AssertionError();
                    }
                    if (!SpeechAligner.assertionsDisabled && arrayDeque.size() != arrayDeque2.size())
                    {
                        throw new AssertionError();
                    }
                    List      list2     = (List)arrayDeque.poll();
                    TimeFrame timeFrame = (TimeFrame)arrayDeque2.poll();
                    Range     range     = (Range)linkedList.poll();
                    this.logger.info(new StringBuilder().append("Aligning frame ").append(timeFrame).append(" to text ").append(list2).append(" range ").append(range).toString());
                    this.recognizer.allocate();
                    if (i >= 1)
                    {
                        this.grammar.setWords(list2);
                    }
                    InputStream inputStream = audioUrl.openStream();
                    this.context.setSpeechSource(inputStream, timeFrame);
                    ArrayList arrayList = new ArrayList();
                    Result    result;
                    while (null != (result = this.recognizer.recognize()))
                    {
                        this.logger.info(new StringBuilder().append("Utterance result ").append(result.getTimedBestResult(true)).toString());
                        arrayList.addAll(result.getTimedBestResult(false));
                    }
                    if (i == 0 && arrayList.size() > 0)
                    {
                        end = ((WordResult)arrayList.get(arrayList.size() - 1)).getTimeFrame().getEnd();
                    }
                    ArrayList arrayList2 = new ArrayList();
                    Iterator  iterator   = arrayList.iterator();
                    while (iterator.hasNext())
                    {
                        WordResult wordResult = (WordResult)iterator.next();
                        arrayList2.add(wordResult.getWord().getSpelling());
                    }
                    int[]     array      = longTextAligner.align(arrayList2, range);
                    ArrayList arrayList3 = arrayList;
                    this.logger.info(new StringBuilder().append("Decoding result is ").append(arrayList3).toString());
                    this.dumpAlignmentStats(list, array, arrayList3);
                    for (int j = 0; j < array.Length; j++)
                    {
                        if (array[j] != -1)
                        {
                            treeMap.put(Integer.valueOf(array[j]), arrayList.get(j));
                        }
                    }
                    inputStream.close();
                    this.recognizer.deallocate();
                }
                this.scheduleNextAlignment(list, treeMap, linkedList, arrayDeque, arrayDeque2, end);
            }
            return(new ArrayList(treeMap.values()));
        }