private void InitArcs() { for (int arcIndex = 0; arcIndex < _wordGraph.Arcs.Count; arcIndex++) { WordGraphArc arc = _wordGraph.Arcs[arcIndex]; // init ecm score info for each word of arc EcmScoreInfo prevEsi = _stateEcmScoreInfos[arc.PrevState]; var esis = new List <EcmScoreInfo>(); foreach (string word in arc.Words) { var esi = new EcmScoreInfo(); _ecm.SetupEsi(esi, prevEsi, word); esis.Add(esi); prevEsi = esi; } _arcEcmScoreInfos.Add(esis); // init best scores for the arc's successive state UpdateStateBestScores(arcIndex, 0); _statesInvolvedInArcs.Add(arc.PrevState); _statesInvolvedInArcs.Add(arc.NextState); } }
private void UpdateStateBestScores(int arcIndex, int prefixDiffSize) { WordGraphArc arc = _wordGraph.Arcs[arcIndex]; List <EcmScoreInfo> arcEsis = _arcEcmScoreInfos[arcIndex]; EcmScoreInfo prevEsi = arcEsis.Count == 0 ? _stateEcmScoreInfos[arc.PrevState] : arcEsis[arcEsis.Count - 1]; double wordGraphScore = _stateWordGraphScores[arc.PrevState] + arc.Score; List <double> nextStateBestScores = _stateBestScores[arc.NextState]; List <int> nextStateBestPrevArcs = _stateBestPrevArcs[arc.NextState]; var positions = new List <int>(); int startPos = prefixDiffSize == 0 ? 0 : prevEsi.Scores.Count - prefixDiffSize; for (int i = startPos; i < prevEsi.Scores.Count; i++) { double newScore = (EcmWeight * -prevEsi.Scores[i]) + (WordGraphWeight * wordGraphScore); if (i == nextStateBestScores.Count || nextStateBestScores[i] < newScore) { AddOrReplace(nextStateBestScores, i, newScore); positions.Add(i); AddOrReplace(nextStateBestPrevArcs, i, arcIndex); } } _stateEcmScoreInfos[arc.NextState].UpdatePositions(prevEsi, positions); if (wordGraphScore > _stateWordGraphScores[arc.NextState]) { _stateWordGraphScores[arc.NextState] = wordGraphScore; } }
public void SetupInitialEsi(EcmScoreInfo initialEsi) { double score = _segmentEditDistance.Compute(new string[0], new string[0]); initialEsi.Scores.Clear(); initialEsi.Scores.Add(score); initialEsi.Operations.Clear(); }
public void SetupEsi(EcmScoreInfo esi, EcmScoreInfo prevEsi, string word) { double score = _segmentEditDistance.Compute(new string[] { word }, new string[0]); esi.Scores.Clear(); esi.Scores.Add(prevEsi.Scores[0] + score); esi.Operations.Clear(); esi.Operations.Add(EditOperation.None); }
public void ExtendEsi(EcmScoreInfo esi, EcmScoreInfo prevEsi, string word, string[] prefixDiff, bool isLastWordComplete) { IEnumerable <EditOperation> ops = _segmentEditDistance.IncrComputePrefix(esi.Scores, prevEsi.Scores, word, prefixDiff, isLastWordComplete); foreach (EditOperation op in ops) { esi.Operations.Add(op); } }
private void UpdateInitialStateBestScores() { EcmScoreInfo esi = _stateEcmScoreInfos[WordGraph.InitialState]; _stateWordGraphScores[WordGraph.InitialState] = _wordGraph.InitialStateScore; List <double> bestScores = _stateBestScores[WordGraph.InitialState]; List <int> bestPrevArcs = _stateBestPrevArcs[WordGraph.InitialState]; bestScores.Clear(); bestPrevArcs.Clear(); foreach (double score in esi.Scores) { bestScores.Add((EcmWeight * -score) + (WordGraphWeight * _wordGraph.InitialStateScore)); bestPrevArcs.Add(int.MaxValue); } }
private void GetSubStateHypotheses(PriorityQueue <Hypothesis> queue) { for (int arcIndex = 0; arcIndex < _wordGraph.Arcs.Count; arcIndex++) { WordGraphArc arc = _wordGraph.Arcs[arcIndex]; if (arc.Words.Count > 1 && !IsArcPruned(arc)) { double wordGraphScore = _stateWordGraphScores[arc.PrevState] + arc.Score; for (int i = 0; i < arc.Words.Count - 1; i++) { EcmScoreInfo esi = _arcEcmScoreInfos[arcIndex][i]; double score = (WordGraphWeight * wordGraphScore) + (EcmWeight * -esi.Scores[esi.Scores.Count - 1]) + (WordGraphWeight * _restScores[arc.NextState]); queue.Enqueue(new Hypothesis(score, arc.NextState, arcIndex, i)); } } } }
public void UpdatePositions(EcmScoreInfo prevEsi, List <int> positions) { while (Scores.Count < prevEsi.Scores.Count) { Scores.Add(0); } while (Operations.Count < prevEsi.Operations.Count) { Operations.Add(EditOperation.None); } for (int i = 0; i < positions.Count; i++) { Scores[positions[i]] = prevEsi.Scores[positions[i]]; if (prevEsi.Operations.Count > i) { Operations[positions[i]] = prevEsi.Operations[positions[i]]; } } }
private void ProcessWordGraphForPrefixDiff(string[] prefixDiff, bool isLastWordComplete) { if (prefixDiff.Length == 0) { return; } if (!_wordGraph.IsEmpty) { EcmScoreInfo prevInitialEsi = _stateEcmScoreInfos[WordGraph.InitialState]; _ecm.ExtendInitialEsi(_stateEcmScoreInfos[WordGraph.InitialState], prevInitialEsi, prefixDiff); UpdateInitialStateBestScores(); } for (int arcIndex = 0; arcIndex < _wordGraph.Arcs.Count; arcIndex++) { WordGraphArc arc = _wordGraph.Arcs[arcIndex]; // update ecm score info for each word of arc EcmScoreInfo prevEsi = _stateEcmScoreInfos[arc.PrevState]; List <EcmScoreInfo> esis = _arcEcmScoreInfos[arcIndex]; while (esis.Count < arc.Words.Count) { esis.Add(new EcmScoreInfo()); } for (int i = 0; i < arc.Words.Count; i++) { EcmScoreInfo esi = esis[i]; _ecm.ExtendEsi(esi, prevEsi, arc.IsUnknown ? string.Empty : arc.Words[i], prefixDiff, isLastWordComplete); prevEsi = esi; } // update best scores for the arc's successive state UpdateStateBestScores(arcIndex, prefixDiff.Length); } }
public void ExtendInitialEsi(EcmScoreInfo initialEsi, EcmScoreInfo prevInitialEsi, string[] prefixDiff) { _segmentEditDistance.IncrComputePrefixFirstRow(initialEsi.Scores, prevInitialEsi.Scores, prefixDiff); }