Exemple #1
0
        private int MergeDuplicates()
        {
            int numMerged = 0;

            for (int i = 0; i < latticeWords.Count - 1; i++)
            {
                HTKLatticeReader.LatticeWord first = latticeWords[i];
                for (int j = i + 1; j < latticeWords.Count; j++)
                {
                    HTKLatticeReader.LatticeWord second = latticeWords[j];
                    if (first.Equals(second))
                    {
                        if (Debug)
                        {
                            log.Info("removed duplicate");
                        }
                        first.Merge(second);
                        latticeWords.Remove(j);
                        wordsStartAt[second.startNode].Remove(second);
                        wordsEndAt[second.endNode].Remove(second);
                        for (int k = second.startNode; k <= second.endNode; k++)
                        {
                            wordsAtTime[k].Remove(second);
                        }
                        numMerged++;
                        j--;
                    }
                }
            }
            return(numMerged);
        }
Exemple #2
0
        private void ChangeEndTimes(IList <HTKLatticeReader.LatticeWord> words, int newEndTime)
        {
            List <HTKLatticeReader.LatticeWord> toRemove = new List <HTKLatticeReader.LatticeWord>();

            foreach (HTKLatticeReader.LatticeWord lw in words)
            {
                latticeWords.Remove(lw);
                int oldEndTime = lw.endNode;
                lw.endNode = newEndTime;
                if (latticeWords.Contains(lw))
                {
                    if (Debug)
                    {
                        log.Info("duplicate found");
                    }
                    HTKLatticeReader.LatticeWord twin = latticeWords[latticeWords.IndexOf(lw)];
                    // assert (twin != lw) ;
                    lw.endNode = oldEndTime;
                    twin.Merge(lw);
                    wordsStartAt[lw.startNode].Remove(lw);
                    //wordsEndAt[lw.endNode].remove(lw);
                    toRemove.Add(lw);
                    for (int i = lw.startNode; i <= lw.endNode; i++)
                    {
                        wordsAtTime[i].Remove(lw);
                    }
                }
                else
                {
                    if (oldEndTime > newEndTime)
                    {
                        for (int i = newEndTime + 1; i <= oldEndTime; i++)
                        {
                            wordsAtTime[i].Remove(lw);
                        }
                    }
                    else
                    {
                        for (int i = oldEndTime + 1; i <= newEndTime; i++)
                        {
                            wordsAtTime[i].Add(lw);
                        }
                    }
                    latticeWords.Add(lw);
                    if (oldEndTime != newEndTime)
                    {
                        //wordsEndAt[oldEndTime].remove(lw);
                        toRemove.Add(lw);
                        wordsEndAt[newEndTime].Add(lw);
                    }
                }
            }
            words.RemoveAll(toRemove);
        }
Exemple #3
0
        private void RemoveRedundency()
        {
            bool changed = true;

            while (changed)
            {
                changed = false;
                foreach (List <HTKLatticeReader.LatticeWord> aWordsAtTime in wordsAtTime)
                {
                    if (aWordsAtTime.Count < 2)
                    {
                        continue;
                    }
                    for (int j = 0; j < aWordsAtTime.Count - 1; j++)
                    {
                        HTKLatticeReader.LatticeWord w1 = aWordsAtTime[j];
                        for (int k = j + 1; k < aWordsAtTime.Count; k++)
                        {
                            HTKLatticeReader.LatticeWord w2 = aWordsAtTime[k];
                            if (Sharpen.Runtime.EqualsIgnoreCase(w1.word, w2.word))
                            {
                                if (RemoveRedundentPair(w1, w2))
                                {
                                    //int numMerged = mergeDuplicates();
                                    //if (DEBUG) { log.info("merged " + numMerged + " identical entries."); }
                                    changed = true;
                                    //printWords();
                                    //j--;
                                    goto INNER_continue;
                                }
                            }
                        }
                        INNER_continue :;
                    }
                    INNER_break :;
                }
            }
        }
Exemple #4
0
 private double GetProb(HTKLatticeReader.LatticeWord lw)
 {
     return(lw.am * 100.0 + lw.lm);
 }
Exemple #5
0
        /// <exception cref="System.Exception"/>
        private void ReadInput(BufferedReader @in)
        {
            // GET RID OF COMMENT LINES
            string line = @in.ReadLine();

            while (line.Trim().StartsWith("#"))
            {
                line = @in.ReadLine();
            }
            // READ LATTICE
            latticeWords = new List <HTKLatticeReader.LatticeWord>();
            Pattern wordLinePattern = Pattern.Compile("(\\d+)\\s+(\\d+)\\s+lm=(-?\\d+\\.\\d+),am=(-?\\d+\\.\\d+)\\s+([^( ]+)(?:\\((\\d+)\\))?.*");
            Matcher wordLineMatcher = wordLinePattern.Matcher(line);

            while (wordLineMatcher.Matches())
            {
                int    startNode = System.Convert.ToInt32(wordLineMatcher.Group(1)) - 1;
                int    endNode   = System.Convert.ToInt32(wordLineMatcher.Group(2)) - 1;
                double lm        = double.ParseDouble(wordLineMatcher.Group(3));
                double am        = double.ParseDouble(wordLineMatcher.Group(4));
                string word      = wordLineMatcher.Group(5).ToLower();
                string pronun    = wordLineMatcher.Group(6);
                if (Sharpen.Runtime.EqualsIgnoreCase(word, "<s>"))
                {
                    line            = @in.ReadLine();
                    wordLineMatcher = wordLinePattern.Matcher(line);
                    continue;
                }
                if (Sharpen.Runtime.EqualsIgnoreCase(word, "</s>"))
                {
                    word = LexiconConstants.Boundary;
                }
                int pronunciation;
                if (pronun == null)
                {
                    pronunciation = 0;
                }
                else
                {
                    pronunciation = System.Convert.ToInt32(pronun);
                }
                HTKLatticeReader.LatticeWord lw = new HTKLatticeReader.LatticeWord(word, startNode, endNode, lm, am, pronunciation, mergeType);
                if (Debug)
                {
                    log.Info(lw);
                }
                latticeWords.Add(lw);
                line            = @in.ReadLine();
                wordLineMatcher = wordLinePattern.Matcher(line);
            }
            // GET NUMBER OF NODES
            numStates = System.Convert.ToInt32(line.Trim());
            if (Debug)
            {
                log.Info(numStates);
            }
            // READ NODE TIMES
            nodeTimes = new int[numStates];
            Pattern nodeTimePattern = Pattern.Compile("(\\d+)\\s+t=(\\d+)\\s*");
            Matcher nodeTimeMatcher;

            for (int i = 0; i < numStates; i++)
            {
                nodeTimeMatcher = nodeTimePattern.Matcher(@in.ReadLine());
                if (!nodeTimeMatcher.Matches())
                {
                    log.Info("Input File Error");
                    System.Environment.Exit(1);
                }
                // assert ((Integer.parseInt(nodeTimeMatcher.group(1))-1) == i) ;
                nodeTimes[i] = System.Convert.ToInt32(nodeTimeMatcher.Group(2));
                if (Debug)
                {
                    log.Info(i + "\tt=" + nodeTimes[i]);
                }
            }
        }
Exemple #6
0
        //return;
        private bool RemoveRedundentPair(HTKLatticeReader.LatticeWord w1, HTKLatticeReader.LatticeWord w2)
        {
            if (Debug)
            {
                log.Info("trying to remove:");
                log.Info(w1);
                log.Info(w2);
            }
            int w1Start = w1.startNode;
            int w2Start = w2.startNode;
            int w1End   = w1.endNode;
            int w2End   = w2.endNode;
            // we must pick new start and end times that are legal
            int newStart;
            int oldStart;

            if (w1Start < w2Start)
            {
                newStart = w2Start;
                oldStart = w1Start;
            }
            else
            {
                newStart = w1Start;
                oldStart = w2Start;
            }
            int newEnd;
            int oldEnd;

            if (w1End < w2End)
            {
                newEnd = w1End;
                oldEnd = w2End;
            }
            else
            {
                newEnd = w2End;
                oldEnd = w1End;
            }
            // check legality (illegality not guarenteed)
            foreach (HTKLatticeReader.LatticeWord lw in wordsStartAt[oldStart])
            {
                if (lw.endNode < newStart || ((lw.endNode == newStart) && (lw.endNode != lw.startNode)))
                {
                    if (Debug)
                    {
                        log.Info("failed");
                    }
                    return(false);
                }
            }
            foreach (HTKLatticeReader.LatticeWord lw_1 in wordsEndAt[oldEnd])
            {
                if (lw_1.startNode > newEnd || ((lw_1.startNode == newEnd) && (lw_1.endNode != lw_1.startNode)))
                {
                    if (Debug)
                    {
                        log.Info("failed");
                    }
                    return(false);
                }
            }
            // change start/end times of adjacent entries
            ChangeStartTimes(wordsStartAt[oldEnd], newEnd);
            ChangeEndTimes(wordsEndAt[oldStart], newStart);
            // change start/end times of words adjacent to adjacent entries
            ChangeStartTimes(wordsStartAt[oldStart], newStart);
            ChangeEndTimes(wordsEndAt[oldEnd], newEnd);
            if (Debug)
            {
                log.Info("succeeded");
            }
            return(true);
        }