private WordGraphConfidences ComputeWordGraphConfidences(WordGraph wordGraph) { double normalizationFactor = LogSpace.Zero; var backwardProbs = new double[wordGraph.Arcs.Count]; for (int i = wordGraph.Arcs.Count - 1; i >= 0; i--) { WordGraphArc arc = wordGraph.Arcs[i]; double sum = LogSpace.One; if (!wordGraph.FinalStates.Contains(arc.NextState)) { sum = LogSpace.Zero; foreach (int nextArcIndex in wordGraph.GetNextArcIndices(arc.NextState)) { WordGraphArc nextArc = wordGraph.Arcs[nextArcIndex]; sum = LogSpace.Add(sum, LogSpace.Multiply(nextArc.Score, backwardProbs[nextArcIndex])); } } backwardProbs[i] = sum; if (arc.PrevState == WordGraph.InitialState) { normalizationFactor = LogSpace.Add(normalizationFactor, LogSpace.Multiply(arc.Score, backwardProbs[i])); } } var rawWpps = new Dictionary <string, Dictionary <int, double> >(); var forwardProbs = new (double Prob, int Index)[wordGraph.Arcs.Count];
private void ComputePrevScores(int state, out double[] prevScores, out int[] stateBestPrevArcs) { if (IsEmpty) { prevScores = new double[0]; stateBestPrevArcs = new int[0]; return; } prevScores = Enumerable.Repeat(LogSpace.Zero, StateCount).ToArray(); stateBestPrevArcs = new int[StateCount]; if (state == InitialState) { prevScores[InitialState] = InitialStateScore; } else { prevScores[state] = 0; } var accessibleStates = new HashSet <int> { state }; for (int arcIndex = 0; arcIndex < Arcs.Count; arcIndex++) { WordGraphArc arc = Arcs[arcIndex]; if (accessibleStates.Contains(arc.PrevState)) { double score = LogSpace.Multiply(arc.Score, prevScores[arc.PrevState]); if (score > prevScores[arc.NextState]) { prevScores[arc.NextState] = score; stateBestPrevArcs[arc.NextState] = arcIndex; } accessibleStates.Add(arc.NextState); } else { if (!accessibleStates.Contains(arc.NextState)) { prevScores[arc.NextState] = LogSpace.Zero; } } } }
public IEnumerable <double> ComputeRestScores() { double[] restScores = Enumerable.Repeat(LogSpace.Zero, StateCount).ToArray(); foreach (int state in _finalStates) { restScores[state] = InitialStateScore; } for (int i = Arcs.Count - 1; i >= 0; i--) { WordGraphArc arc = Arcs[i]; double score = LogSpace.Multiply(arc.Score, restScores[arc.NextState]); if (score > restScores[arc.PrevState]) { restScores[arc.PrevState] = score; } } return(restScores); }
/// <summary> /// Removes redundant arcs from the word graph. /// TODO: This seems to affect the results of an interactive translation session, so don't use it yet. /// </summary> /// <returns>The optimized word graph.</returns> public WordGraph Optimize() { var dfaArcs = new List <WordGraphArc>(); var dfaStates = new DfaStateCollection(); var dfaFinalStates = new HashSet <int>(); int nextDfaStateIndex = 1; var unmarkedStates = new Queue <DfaState>(); unmarkedStates.Enqueue(new DfaState(0, new[] { new NfaState(0) })); while (unmarkedStates.Count > 0) { DfaState dfaState = unmarkedStates.Dequeue(); var candidateArcs = new Dictionary <string, DfaArc>(); foreach ((int arcIndex, NfaState nfaState) in GetArcIndices(dfaState)) { WordGraphArc arc = Arcs[arcIndex]; int nextWordIndex = nfaState.WordIndex + 1; DfaArc candidateArc = candidateArcs.GetOrCreate(arc.Words[nextWordIndex]); if (nextWordIndex == arc.Words.Count - 1) { candidateArc.NfaStates.Add(new NfaState(arc.NextState)); Path path; if (dfaState.Paths.TryGetValue(nfaState.StateIndex, out Path prevPath)) { path = new Path(prevPath.StartState, prevPath.Arcs.Concat(arcIndex), LogSpace.Multiply(prevPath.Score, arc.Score)); } else { path = new Path(dfaState.Index, new[] { arcIndex }, arc.Score); } if (!candidateArc.Paths.TryGetValue(arc.NextState, out Path otherPath) || path.Score > otherPath.Score) { candidateArc.Paths[arc.NextState] = path; } } else { candidateArc.NfaStates.Add(new NfaState(nfaState.StateIndex, arcIndex, nextWordIndex)); candidateArc.IsNextSubState = true; if (dfaState.Paths.TryGetValue(nfaState.StateIndex, out Path prevPath)) { candidateArc.Paths[nfaState.StateIndex] = prevPath; } } } foreach (DfaArc candidateArc in candidateArcs.Values) { if (!dfaStates.TryGetValue(candidateArc.NfaStates, out DfaState nextDfaState)) { int stateIndex = candidateArc.IsNextSubState ? dfaState.Index : nextDfaStateIndex++; nextDfaState = new DfaState(stateIndex, candidateArc.NfaStates); if (candidateArc.IsNextSubState) { foreach (KeyValuePair <int, Path> kvp in candidateArc.Paths) { nextDfaState.Paths.Add(kvp); } } else { dfaStates.Add(nextDfaState); } unmarkedStates.Enqueue(nextDfaState); } bool isFinal = nextDfaState.NfaStates.Where(s => !s.IsSubState) .Any(s => FinalStates.Contains(s.StateIndex)); if ((isFinal || !candidateArc.IsNextSubState) && candidateArc.Paths.Count > 0) { Path bestPath = candidateArc.Paths.Values.MaxBy(p => p.Score); int curState = bestPath.StartState; for (int i = 0; i < bestPath.Arcs.Count; i++) { WordGraphArc nfaArc = Arcs[bestPath.Arcs[i]]; int nextState = !candidateArc.IsNextSubState && i == bestPath.Arcs.Count - 1 ? nextDfaState.Index : nextDfaStateIndex++; dfaArcs.Add(new WordGraphArc(curState, nextState, nfaArc.Score, nfaArc.Words, nfaArc.Alignment, nfaArc.SourceSegmentRange, nfaArc.IsUnknown, nfaArc.WordConfidences)); curState = nextState; } if (isFinal) { dfaFinalStates.Add(curState); } } } } return(new WordGraph(dfaArcs, dfaFinalStates, InitialStateScore)); }