/// <summary>
        /// Recursively builds an answer lattice (Chinese words) from a Viterbi search graph
        /// of binary predictions.
        /// </summary>
        /// <remarks>
        /// Recursively builds an answer lattice (Chinese words) from a Viterbi search graph
        /// of binary predictions. This function does a limited amount of post-processing:
        /// preserve white spaces of the input, and not segment between two latin characters or
        /// between two digits. Consequently, the probabilities of all paths in answerLattice
        /// may not sum to 1 (they do sum to 1 if no post processing applies).
        /// </remarks>
        /// <param name="tSource">Current node in Viterbi search graph.</param>
        /// <param name="aSource">Current node in answer lattice.</param>
        /// <param name="answer">Partial word starting at aSource.</param>
        /// <param name="nodeId">Currently unused node identifier for answer graph.</param>
        /// <param name="pos">Current position in docArray.</param>
        /// <param name="cost">Current cost of answer.</param>
        /// <param name="stateLinks">
        /// Maps nodes of the search graph to nodes in answer lattice
        /// (when paths of the search graph are recombined, paths of the answer lattice should be
        /// recombined as well, if at word boundary).
        /// </param>
        private void TagLatticeToAnswerLattice(DFSAState <string, int> tSource, DFSAState <string, int> aSource, StringBuilder answer, MutableInteger nodeId, int pos, double cost, IDictionary <DFSAState <string, int>, DFSAState <string, int> > stateLinks,
                                               DFSA <string, int> answerLattice, CoreLabel[] docArray)
        {
            // Add "1" prediction after the end of the sentence, if applicable:
            if (tSource.IsAccepting() && tSource.ContinuingInputs().IsEmpty())
            {
                tSource.AddTransition(new DFSATransition <string, int>(string.Empty, tSource, new DFSAState <string, int>(-1, null), "1", string.Empty, 0));
            }
            // Get current label, character, and prediction:
            CoreLabel curLabel  = (pos < docArray.Length) ? docArray[pos] : null;
            string    curChr    = null;
            string    origSpace = null;

            if (curLabel != null)
            {
                curChr = curLabel.Get(typeof(CoreAnnotations.OriginalCharAnnotation));
                System.Diagnostics.Debug.Assert((curChr.Length == 1));
                origSpace = curLabel.Get(typeof(CoreAnnotations.SpaceBeforeAnnotation));
            }
            // Get set of successors in search graph:
            ICollection <string> inputs = tSource.ContinuingInputs();
            // Only keep most probable transition out of initial state:
            string answerConstraint = null;

            if (pos == 0)
            {
                double minCost = double.PositiveInfinity;
                // DFSATransition<String, Integer> bestTransition = null;
                foreach (string predictSpace in inputs)
                {
                    DFSATransition <string, int> transition = tSource.Transition(predictSpace);
                    double transitionCost = transition.Score();
                    if (transitionCost < minCost)
                    {
                        if (predictSpace != null)
                        {
                            logger.Info(string.Format("mincost (%s): %e -> %e%n", predictSpace, minCost, transitionCost));
                            minCost          = transitionCost;
                            answerConstraint = predictSpace;
                        }
                    }
                }
            }
            // Follow along each transition:
            foreach (string predictSpace_1 in inputs)
            {
                DFSATransition <string, int> transition = tSource.Transition(predictSpace_1);
                DFSAState <string, int>      tDest      = transition.Target();
                DFSAState <string, int>      newASource = aSource;
                //logger.info(String.format("tsource=%s tdest=%s asource=%s pos=%d predictSpace=%s%n", tSource, tDest, newASource, pos, predictSpace));
                StringBuilder newAnswer = new StringBuilder(answer.ToString());
                int           answerLen = newAnswer.Length;
                string        prevChr   = (answerLen > 0) ? newAnswer.Substring(answerLen - 1) : null;
                double        newCost   = cost;
                // Ignore paths starting with zero:
                if (answerConstraint != null && !answerConstraint.Equals(predictSpace_1))
                {
                    logger.Info(string.Format("Skipping transition %s at pos 0.%n", predictSpace_1));
                    continue;
                }
                // Ignore paths not consistent with input segmentation:
                if (flags.keepAllWhitespaces && "0".Equals(predictSpace_1) && "1".Equals(origSpace))
                {
                    logger.Info(string.Format("Skipping non-boundary at pos %d, since space in the input.%n", pos));
                    continue;
                }
                // Ignore paths adding segment boundaries between two latin characters, or between two digits:
                // (unless already present in original input)
                if ("1".Equals(predictSpace_1) && "0".Equals(origSpace) && prevChr != null && curChr != null)
                {
                    char p = prevChr[0];
                    char c = curChr[0];
                    if (ChineseStringUtils.IsLetterASCII(p) && ChineseStringUtils.IsLetterASCII(c))
                    {
                        logger.Info(string.Format("Not hypothesizing a boundary at pos %d, since between two ASCII letters (%s and %s).%n", pos, prevChr, curChr));
                        continue;
                    }
                    if (ChineseUtils.IsNumber(p) && ChineseUtils.IsNumber(c))
                    {
                        logger.Info(string.Format("Not hypothesizing a boundary at pos %d, since between two numeral characters (%s and %s).%n", pos, prevChr, curChr));
                        continue;
                    }
                }
                // If predictSpace==1, create a new transition in answer search graph:
                if ("1".Equals(predictSpace_1))
                {
                    if (newAnswer.ToString().Length > 0)
                    {
                        // If answer destination node visited before, create a new edge and leave:
                        if (stateLinks.Contains(tSource))
                        {
                            DFSAState <string, int> aDest = stateLinks[tSource];
                            newASource.AddTransition(new DFSATransition <string, int>(string.Empty, newASource, aDest, newAnswer.ToString(), string.Empty, newCost));
                            //logger.info(String.format("new transition: asource=%s adest=%s edge=%s%n", newASource, aDest, newAnswer));
                            continue;
                        }
                        // If answer destination node not visited before, create it + new edge:
                        nodeId.IncValue(1);
                        DFSAState <string, int> aDest_1 = new DFSAState <string, int>(nodeId, answerLattice, 0.0);
                        stateLinks[tSource] = aDest_1;
                        newASource.AddTransition(new DFSATransition <string, int>(string.Empty, newASource, aDest_1, newAnswer.ToString(), string.Empty, newCost));
                        //logger.info(String.format("new edge: adest=%s%n", newASource, aDest, newAnswer));
                        //logger.info(String.format("new transition: asource=%s adest=%s edge=%s%n%n%n", newASource, aDest, newAnswer));
                        // Reached an accepting state:
                        if (tSource.IsAccepting())
                        {
                            aDest_1.SetAccepting(true);
                            continue;
                        }
                        // Start new answer edge:
                        newASource = aDest_1;
                        newAnswer  = new StringBuilder();
                        newCost    = 0.0;
                    }
                }
                System.Diagnostics.Debug.Assert((curChr != null));
                newAnswer.Append(curChr);
                newCost += transition.Score();
                if (newCost < flags.searchGraphPrune || ChineseStringUtils.IsLetterASCII(curChr[0]))
                {
                    TagLatticeToAnswerLattice(tDest, newASource, newAnswer, nodeId, pos + 1, newCost, stateLinks, answerLattice, docArray);
                }
            }
        }
示例#2
0
        public static DFSA <string, int> GetGraph(ISequenceModel ts, IIndex <string> classIndex)
        {
            DFSA <string, int> viterbiSearchGraph = new DFSA <string, int>(null);
            // Set up tag options
            int length      = ts.Length();
            int leftWindow  = ts.LeftWindow();
            int rightWindow = ts.RightWindow();

            System.Diagnostics.Debug.Assert((rightWindow == 0));
            int padLength = length + leftWindow + rightWindow;

            // NOTE: tags[i][j]  : i is index into pos, and j into product
            int[][] tags   = new int[padLength][];
            int[]   tagNum = new int[padLength];
            for (int pos = 0; pos < padLength; pos++)
            {
                tags[pos]   = ts.GetPossibleValues(pos);
                tagNum[pos] = tags[pos].Length;
            }
            // Set up Viterbi search graph:
            DFSAState <string, int>[][] graphStates = null;
            DFSAState <string, int>     startState  = null;
            DFSAState <string, int>     endState    = null;

            if (viterbiSearchGraph != null)
            {
                int stateId = -1;
                startState = new DFSAState <string, int>(++stateId, viterbiSearchGraph, 0.0);
                viterbiSearchGraph.SetInitialState(startState);
                graphStates = new DFSAState[length][];
                for (int pos_1 = 0; pos_1 < length; ++pos_1)
                {
                    //System.err.printf("%d states at pos %d\n",tags[pos].length,pos);
                    graphStates[pos_1] = new DFSAState[tags[pos_1].Length];
                    for (int product = 0; product < tags[pos_1].Length; ++product)
                    {
                        graphStates[pos_1][product] = new DFSAState <string, int>(++stateId, viterbiSearchGraph);
                    }
                }
                // Accepting state:
                endState = new DFSAState <string, int>(++stateId, viterbiSearchGraph, 0.0);
                endState.SetAccepting(true);
            }
            int[] tempTags = new int[padLength];
            // Set up product space sizes
            int[] productSizes = new int[padLength];
            int   curProduct   = 1;

            for (int i = 0; i < leftWindow; i++)
            {
                curProduct *= tagNum[i];
            }
            for (int pos_2 = leftWindow; pos_2 < padLength; pos_2++)
            {
                if (pos_2 > leftWindow + rightWindow)
                {
                    curProduct /= tagNum[pos_2 - leftWindow - rightWindow - 1];
                }
                // shift off
                curProduct *= tagNum[pos_2];
                // shift on
                productSizes[pos_2 - rightWindow] = curProduct;
            }
            double[][] windowScore = new double[padLength][];
            // Score all of each window's options
            for (int pos_3 = leftWindow; pos_3 < leftWindow + length; pos_3++)
            {
                windowScore[pos_3] = new double[productSizes[pos_3]];
                Arrays.Fill(tempTags, tags[0][0]);
                for (int product = 0; product < productSizes[pos_3]; product++)
                {
                    int p     = product;
                    int shift = 1;
                    for (int curPos = pos_3; curPos >= pos_3 - leftWindow; curPos--)
                    {
                        tempTags[curPos] = tags[curPos][p % tagNum[curPos]];
                        p /= tagNum[curPos];
                        if (curPos > pos_3)
                        {
                            shift *= tagNum[curPos];
                        }
                    }
                    if (tempTags[pos_3] == tags[pos_3][0])
                    {
                        // get all tags at once
                        double[] scores = ts.ScoresOf(tempTags, pos_3);
                        // fill in the relevant windowScores
                        for (int t = 0; t < tagNum[pos_3]; t++)
                        {
                            windowScore[pos_3][product + t * shift] = scores[t];
                        }
                    }
                }
            }
            // loop over the classification spot
            for (int pos_4 = leftWindow; pos_4 < length + leftWindow; pos_4++)
            {
                // loop over window product types
                for (int product = 0; product < productSizes[pos_4]; product++)
                {
                    if (pos_4 == leftWindow)
                    {
                        // all nodes in the first spot link to startState:
                        int curTag = tags[pos_4][product % tagNum[pos_4]];
                        //System.err.printf("pos=%d, product=%d, tag=%d score=%.3f\n",pos,product,curTag,windowScore[pos][product]);
                        DFSATransition <string, int> tr = new DFSATransition <string, int>(string.Empty, startState, graphStates[pos_4][product], classIndex.Get(curTag), string.Empty, -windowScore[pos_4][product]);
                        startState.AddTransition(tr);
                    }
                    else
                    {
                        int sharedProduct = product / tagNum[pos_4 + rightWindow];
                        int factor        = productSizes[pos_4] / tagNum[pos_4 + rightWindow];
                        for (int newTagNum = 0; newTagNum < tagNum[pos_4 - leftWindow - 1]; newTagNum++)
                        {
                            int predProduct = newTagNum * factor + sharedProduct;
                            int predTag     = tags[pos_4 - 1][predProduct % tagNum[pos_4 - 1]];
                            int curTag      = tags[pos_4][product % tagNum[pos_4]];
                            //log.info("pos: "+pos);
                            //log.info("product: "+product);
                            //System.err.printf("pos=%d-%d, product=%d-%d, tag=%d-%d score=%.3f\n",pos-1,pos,predProduct,product,predTag,curTag,
                            //  windowScore[pos][product]);
                            DFSAState <string, int>      sourceState = graphStates[pos_4 - leftWindow][predTag];
                            DFSAState <string, int>      destState   = (pos_4 - leftWindow + 1 == graphStates.Length) ? endState : graphStates[pos_4 - leftWindow + 1][curTag];
                            DFSATransition <string, int> tr          = new DFSATransition <string, int>(string.Empty, sourceState, destState, classIndex.Get(curTag), string.Empty, -windowScore[pos_4][product]);
                            graphStates[pos_4 - leftWindow][predTag].AddTransition(tr);
                        }
                    }
                }
            }
            return(viterbiSearchGraph);
        }