private WordGraphConfidences ComputeWordGraphConfidences(WordGraph wordGraph)
        {
            double normalizationFactor = LogSpace.Zero;
            var    backwardProbs       = new double[wordGraph.Arcs.Count];

            for (int i = wordGraph.Arcs.Count - 1; i >= 0; i--)
            {
                WordGraphArc arc = wordGraph.Arcs[i];
                double       sum = LogSpace.One;
                if (!wordGraph.FinalStates.Contains(arc.NextState))
                {
                    sum = LogSpace.Zero;
                    foreach (int nextArcIndex in wordGraph.GetNextArcIndices(arc.NextState))
                    {
                        WordGraphArc nextArc = wordGraph.Arcs[nextArcIndex];
                        sum = LogSpace.Add(sum, LogSpace.Multiply(nextArc.Score, backwardProbs[nextArcIndex]));
                    }
                }
                backwardProbs[i] = sum;
                if (arc.PrevState == WordGraph.InitialState)
                {
                    normalizationFactor = LogSpace.Add(normalizationFactor,
                                                       LogSpace.Multiply(arc.Score, backwardProbs[i]));
                }
            }

            var rawWpps      = new Dictionary <string, Dictionary <int, double> >();
            var forwardProbs = new (double Prob, int Index)[wordGraph.Arcs.Count];
Beispiel #2
0
        private void ComputePrevScores(int state, out double[] prevScores, out int[] stateBestPrevArcs)
        {
            if (IsEmpty)
            {
                prevScores        = new double[0];
                stateBestPrevArcs = new int[0];
                return;
            }

            prevScores        = Enumerable.Repeat(LogSpace.Zero, StateCount).ToArray();
            stateBestPrevArcs = new int[StateCount];

            if (state == InitialState)
            {
                prevScores[InitialState] = InitialStateScore;
            }
            else
            {
                prevScores[state] = 0;
            }

            var accessibleStates = new HashSet <int> {
                state
            };

            for (int arcIndex = 0; arcIndex < Arcs.Count; arcIndex++)
            {
                WordGraphArc arc = Arcs[arcIndex];
                if (accessibleStates.Contains(arc.PrevState))
                {
                    double score = LogSpace.Multiply(arc.Score, prevScores[arc.PrevState]);
                    if (score > prevScores[arc.NextState])
                    {
                        prevScores[arc.NextState]        = score;
                        stateBestPrevArcs[arc.NextState] = arcIndex;
                    }
                    accessibleStates.Add(arc.NextState);
                }
                else
                {
                    if (!accessibleStates.Contains(arc.NextState))
                    {
                        prevScores[arc.NextState] = LogSpace.Zero;
                    }
                }
            }
        }
Beispiel #3
0
        public IEnumerable <double> ComputeRestScores()
        {
            double[] restScores = Enumerable.Repeat(LogSpace.Zero, StateCount).ToArray();

            foreach (int state in _finalStates)
            {
                restScores[state] = InitialStateScore;
            }

            for (int i = Arcs.Count - 1; i >= 0; i--)
            {
                WordGraphArc arc   = Arcs[i];
                double       score = LogSpace.Multiply(arc.Score, restScores[arc.NextState]);
                if (score > restScores[arc.PrevState])
                {
                    restScores[arc.PrevState] = score;
                }
            }

            return(restScores);
        }
Beispiel #4
0
        /// <summary>
        /// Removes redundant arcs from the word graph.
        /// TODO: This seems to affect the results of an interactive translation session, so don't use it yet.
        /// </summary>
        /// <returns>The optimized word graph.</returns>
        public WordGraph Optimize()
        {
            var dfaArcs           = new List <WordGraphArc>();
            var dfaStates         = new DfaStateCollection();
            var dfaFinalStates    = new HashSet <int>();
            int nextDfaStateIndex = 1;
            var unmarkedStates    = new Queue <DfaState>();

            unmarkedStates.Enqueue(new DfaState(0, new[] { new NfaState(0) }));

            while (unmarkedStates.Count > 0)
            {
                DfaState dfaState      = unmarkedStates.Dequeue();
                var      candidateArcs = new Dictionary <string, DfaArc>();
                foreach ((int arcIndex, NfaState nfaState) in GetArcIndices(dfaState))
                {
                    WordGraphArc arc           = Arcs[arcIndex];
                    int          nextWordIndex = nfaState.WordIndex + 1;
                    DfaArc       candidateArc  = candidateArcs.GetOrCreate(arc.Words[nextWordIndex]);
                    if (nextWordIndex == arc.Words.Count - 1)
                    {
                        candidateArc.NfaStates.Add(new NfaState(arc.NextState));

                        Path path;
                        if (dfaState.Paths.TryGetValue(nfaState.StateIndex, out Path prevPath))
                        {
                            path = new Path(prevPath.StartState, prevPath.Arcs.Concat(arcIndex),
                                            LogSpace.Multiply(prevPath.Score, arc.Score));
                        }
                        else
                        {
                            path = new Path(dfaState.Index, new[] { arcIndex }, arc.Score);
                        }

                        if (!candidateArc.Paths.TryGetValue(arc.NextState, out Path otherPath) ||
                            path.Score > otherPath.Score)
                        {
                            candidateArc.Paths[arc.NextState] = path;
                        }
                    }
                    else
                    {
                        candidateArc.NfaStates.Add(new NfaState(nfaState.StateIndex, arcIndex, nextWordIndex));
                        candidateArc.IsNextSubState = true;

                        if (dfaState.Paths.TryGetValue(nfaState.StateIndex, out Path prevPath))
                        {
                            candidateArc.Paths[nfaState.StateIndex] = prevPath;
                        }
                    }
                }

                foreach (DfaArc candidateArc in candidateArcs.Values)
                {
                    if (!dfaStates.TryGetValue(candidateArc.NfaStates, out DfaState nextDfaState))
                    {
                        int stateIndex = candidateArc.IsNextSubState ? dfaState.Index : nextDfaStateIndex++;
                        nextDfaState = new DfaState(stateIndex, candidateArc.NfaStates);
                        if (candidateArc.IsNextSubState)
                        {
                            foreach (KeyValuePair <int, Path> kvp in candidateArc.Paths)
                            {
                                nextDfaState.Paths.Add(kvp);
                            }
                        }
                        else
                        {
                            dfaStates.Add(nextDfaState);
                        }
                        unmarkedStates.Enqueue(nextDfaState);
                    }

                    bool isFinal = nextDfaState.NfaStates.Where(s => !s.IsSubState)
                                   .Any(s => FinalStates.Contains(s.StateIndex));
                    if ((isFinal || !candidateArc.IsNextSubState) && candidateArc.Paths.Count > 0)
                    {
                        Path bestPath = candidateArc.Paths.Values.MaxBy(p => p.Score);

                        int curState = bestPath.StartState;
                        for (int i = 0; i < bestPath.Arcs.Count; i++)
                        {
                            WordGraphArc nfaArc    = Arcs[bestPath.Arcs[i]];
                            int          nextState = !candidateArc.IsNextSubState && i == bestPath.Arcs.Count - 1
                                                                ? nextDfaState.Index
                                                                : nextDfaStateIndex++;
                            dfaArcs.Add(new WordGraphArc(curState, nextState, nfaArc.Score, nfaArc.Words,
                                                         nfaArc.Alignment, nfaArc.SourceSegmentRange, nfaArc.IsUnknown, nfaArc.WordConfidences));
                            curState = nextState;
                        }
                        if (isFinal)
                        {
                            dfaFinalStates.Add(curState);
                        }
                    }
                }
            }

            return(new WordGraph(dfaArcs, dfaFinalStates, InitialStateScore));
        }