예제 #1
0
        private void InitArcs()
        {
            for (int arcIndex = 0; arcIndex < _wordGraph.Arcs.Count; arcIndex++)
            {
                WordGraphArc arc = _wordGraph.Arcs[arcIndex];

                // init ecm score info for each word of arc
                EcmScoreInfo prevEsi = _stateEcmScoreInfos[arc.PrevState];
                var          esis    = new List <EcmScoreInfo>();
                foreach (string word in arc.Words)
                {
                    var esi = new EcmScoreInfo();
                    _ecm.SetupEsi(esi, prevEsi, word);
                    esis.Add(esi);
                    prevEsi = esi;
                }
                _arcEcmScoreInfos.Add(esis);

                // init best scores for the arc's successive state
                UpdateStateBestScores(arcIndex, 0);

                _statesInvolvedInArcs.Add(arc.PrevState);
                _statesInvolvedInArcs.Add(arc.NextState);
            }
        }
예제 #2
0
        private void UpdateStateBestScores(int arcIndex, int prefixDiffSize)
        {
            WordGraphArc        arc     = _wordGraph.Arcs[arcIndex];
            List <EcmScoreInfo> arcEsis = _arcEcmScoreInfos[arcIndex];

            EcmScoreInfo prevEsi = arcEsis.Count == 0 ? _stateEcmScoreInfos[arc.PrevState] : arcEsis[arcEsis.Count - 1];

            double wordGraphScore = _stateWordGraphScores[arc.PrevState] + arc.Score;

            List <double> nextStateBestScores   = _stateBestScores[arc.NextState];
            List <int>    nextStateBestPrevArcs = _stateBestPrevArcs[arc.NextState];

            var positions = new List <int>();
            int startPos  = prefixDiffSize == 0 ? 0 : prevEsi.Scores.Count - prefixDiffSize;

            for (int i = startPos; i < prevEsi.Scores.Count; i++)
            {
                double newScore = (EcmWeight * -prevEsi.Scores[i]) + (WordGraphWeight * wordGraphScore);

                if (i == nextStateBestScores.Count || nextStateBestScores[i] < newScore)
                {
                    AddOrReplace(nextStateBestScores, i, newScore);
                    positions.Add(i);
                    AddOrReplace(nextStateBestPrevArcs, i, arcIndex);
                }
            }

            _stateEcmScoreInfos[arc.NextState].UpdatePositions(prevEsi, positions);

            if (wordGraphScore > _stateWordGraphScores[arc.NextState])
            {
                _stateWordGraphScores[arc.NextState] = wordGraphScore;
            }
        }
예제 #3
0
        public void SetupInitialEsi(EcmScoreInfo initialEsi)
        {
            double score = _segmentEditDistance.Compute(new string[0], new string[0]);

            initialEsi.Scores.Clear();
            initialEsi.Scores.Add(score);
            initialEsi.Operations.Clear();
        }
예제 #4
0
        public void SetupEsi(EcmScoreInfo esi, EcmScoreInfo prevEsi, string word)
        {
            double score = _segmentEditDistance.Compute(new string[] { word }, new string[0]);

            esi.Scores.Clear();
            esi.Scores.Add(prevEsi.Scores[0] + score);
            esi.Operations.Clear();
            esi.Operations.Add(EditOperation.None);
        }
예제 #5
0
        public void ExtendEsi(EcmScoreInfo esi, EcmScoreInfo prevEsi, string word, string[] prefixDiff,
                              bool isLastWordComplete)
        {
            IEnumerable <EditOperation> ops = _segmentEditDistance.IncrComputePrefix(esi.Scores, prevEsi.Scores, word,
                                                                                     prefixDiff, isLastWordComplete);

            foreach (EditOperation op in ops)
            {
                esi.Operations.Add(op);
            }
        }
예제 #6
0
        private void UpdateInitialStateBestScores()
        {
            EcmScoreInfo esi = _stateEcmScoreInfos[WordGraph.InitialState];

            _stateWordGraphScores[WordGraph.InitialState] = _wordGraph.InitialStateScore;

            List <double> bestScores   = _stateBestScores[WordGraph.InitialState];
            List <int>    bestPrevArcs = _stateBestPrevArcs[WordGraph.InitialState];

            bestScores.Clear();
            bestPrevArcs.Clear();

            foreach (double score in esi.Scores)
            {
                bestScores.Add((EcmWeight * -score) + (WordGraphWeight * _wordGraph.InitialStateScore));
                bestPrevArcs.Add(int.MaxValue);
            }
        }
예제 #7
0
        private void GetSubStateHypotheses(PriorityQueue <Hypothesis> queue)
        {
            for (int arcIndex = 0; arcIndex < _wordGraph.Arcs.Count; arcIndex++)
            {
                WordGraphArc arc = _wordGraph.Arcs[arcIndex];
                if (arc.Words.Count > 1 && !IsArcPruned(arc))
                {
                    double wordGraphScore = _stateWordGraphScores[arc.PrevState] + arc.Score;

                    for (int i = 0; i < arc.Words.Count - 1; i++)
                    {
                        EcmScoreInfo esi   = _arcEcmScoreInfos[arcIndex][i];
                        double       score = (WordGraphWeight * wordGraphScore)
                                             + (EcmWeight * -esi.Scores[esi.Scores.Count - 1])
                                             + (WordGraphWeight * _restScores[arc.NextState]);
                        queue.Enqueue(new Hypothesis(score, arc.NextState, arcIndex, i));
                    }
                }
            }
        }
예제 #8
0
        public void UpdatePositions(EcmScoreInfo prevEsi, List <int> positions)
        {
            while (Scores.Count < prevEsi.Scores.Count)
            {
                Scores.Add(0);
            }

            while (Operations.Count < prevEsi.Operations.Count)
            {
                Operations.Add(EditOperation.None);
            }

            for (int i = 0; i < positions.Count; i++)
            {
                Scores[positions[i]] = prevEsi.Scores[positions[i]];
                if (prevEsi.Operations.Count > i)
                {
                    Operations[positions[i]] = prevEsi.Operations[positions[i]];
                }
            }
        }
예제 #9
0
        private void ProcessWordGraphForPrefixDiff(string[] prefixDiff, bool isLastWordComplete)
        {
            if (prefixDiff.Length == 0)
            {
                return;
            }

            if (!_wordGraph.IsEmpty)
            {
                EcmScoreInfo prevInitialEsi = _stateEcmScoreInfos[WordGraph.InitialState];
                _ecm.ExtendInitialEsi(_stateEcmScoreInfos[WordGraph.InitialState], prevInitialEsi, prefixDiff);
                UpdateInitialStateBestScores();
            }

            for (int arcIndex = 0; arcIndex < _wordGraph.Arcs.Count; arcIndex++)
            {
                WordGraphArc arc = _wordGraph.Arcs[arcIndex];

                // update ecm score info for each word of arc
                EcmScoreInfo        prevEsi = _stateEcmScoreInfos[arc.PrevState];
                List <EcmScoreInfo> esis    = _arcEcmScoreInfos[arcIndex];
                while (esis.Count < arc.Words.Count)
                {
                    esis.Add(new EcmScoreInfo());
                }
                for (int i = 0; i < arc.Words.Count; i++)
                {
                    EcmScoreInfo esi = esis[i];
                    _ecm.ExtendEsi(esi, prevEsi, arc.IsUnknown ? string.Empty : arc.Words[i], prefixDiff,
                                   isLastWordComplete);
                    prevEsi = esi;
                }

                // update best scores for the arc's successive state
                UpdateStateBestScores(arcIndex, prefixDiff.Length);
            }
        }
예제 #10
0
 public void ExtendInitialEsi(EcmScoreInfo initialEsi, EcmScoreInfo prevInitialEsi, string[] prefixDiff)
 {
     _segmentEditDistance.IncrComputePrefixFirstRow(initialEsi.Scores, prevInitialEsi.Scores, prefixDiff);
 }