/// <summary>
 /// Builds a lattice of all possible segmentations using only words
 /// present in the lexicon.
 /// </summary>
 /// <remarks>
 /// Builds a lattice of all possible segmentations using only words
 /// present in the lexicon. This function must be run prior to
 /// running maxMatchSegmentation.
 /// </remarks>
 private void BuildSegmentationLattice(string s)
 {
     edgesNb = 0;
     len     = s.Length;
     // Initialize word lattice:
     states  = new List <DFSAState <Word, int> >();
     lattice = new DFSA <Word, int>("wordLattice");
     for (int i = 0; i <= s.Length; ++i)
     {
         states.Add(new DFSAState <Word, int>(i, lattice));
     }
     // Set start and accepting state:
     lattice.SetInitialState(states[0]);
     states[len].SetAccepting(true);
     // Find all instances of lexicon words in input string:
     for (int start = 0; start < len; ++start)
     {
         for (int end = len; end > start; --end)
         {
             string str = Sharpen.Runtime.Substring(s, start, end);
             System.Diagnostics.Debug.Assert((str.Length > 0));
             bool isOneChar = (start + 1 == end);
             bool isInDict  = words.Contains(str);
             if (isInDict || isOneChar)
             {
                 double cost = isInDict ? 1 : 100;
                 DFSATransition <Word, int> trans = new DFSATransition <Word, int>(null, states[start], states[end], new Word(str), null, cost);
                 //logger.info("start="+start+" end="+end+" word="+str);
                 states[start].AddTransition(trans);
                 ++edgesNb;
             }
         }
     }
 }
        public virtual void PrintLattice(DFSA <string, int> tagLattice, IList <CoreLabel> doc, PrintWriter @out)
        {
            CoreLabel[] docArray = Sharpen.Collections.ToArray(doc, new CoreLabel[doc.Count]);
            // Create answer lattice:
            MutableInteger          nodeId        = new MutableInteger(0);
            DFSA <string, int>      answerLattice = new DFSA <string, int>(null);
            DFSAState <string, int> aInitState    = new DFSAState <string, int>(nodeId, answerLattice);

            answerLattice.SetInitialState(aInitState);
            IDictionary <DFSAState <string, int>, DFSAState <string, int> > stateLinks = Generics.NewHashMap();

            // Convert binary lattice into word lattice:
            TagLatticeToAnswerLattice(tagLattice.InitialState(), aInitState, new StringBuilder(string.Empty), nodeId, 0, 0.0, stateLinks, answerLattice, docArray);
            try
            {
                answerLattice.PrintAttFsmFormat(@out);
            }
            catch (IOException e)
            {
                throw new Exception(e);
            }
        }
Exemplo n.º 3
0
        public static DFSA <string, int> GetGraph(ISequenceModel ts, IIndex <string> classIndex)
        {
            DFSA <string, int> viterbiSearchGraph = new DFSA <string, int>(null);
            // Set up tag options
            int length      = ts.Length();
            int leftWindow  = ts.LeftWindow();
            int rightWindow = ts.RightWindow();

            System.Diagnostics.Debug.Assert((rightWindow == 0));
            int padLength = length + leftWindow + rightWindow;

            // NOTE: tags[i][j]  : i is index into pos, and j into product
            int[][] tags   = new int[padLength][];
            int[]   tagNum = new int[padLength];
            for (int pos = 0; pos < padLength; pos++)
            {
                tags[pos]   = ts.GetPossibleValues(pos);
                tagNum[pos] = tags[pos].Length;
            }
            // Set up Viterbi search graph:
            DFSAState <string, int>[][] graphStates = null;
            DFSAState <string, int>     startState  = null;
            DFSAState <string, int>     endState    = null;

            if (viterbiSearchGraph != null)
            {
                int stateId = -1;
                startState = new DFSAState <string, int>(++stateId, viterbiSearchGraph, 0.0);
                viterbiSearchGraph.SetInitialState(startState);
                graphStates = new DFSAState[length][];
                for (int pos_1 = 0; pos_1 < length; ++pos_1)
                {
                    //System.err.printf("%d states at pos %d\n",tags[pos].length,pos);
                    graphStates[pos_1] = new DFSAState[tags[pos_1].Length];
                    for (int product = 0; product < tags[pos_1].Length; ++product)
                    {
                        graphStates[pos_1][product] = new DFSAState <string, int>(++stateId, viterbiSearchGraph);
                    }
                }
                // Accepting state:
                endState = new DFSAState <string, int>(++stateId, viterbiSearchGraph, 0.0);
                endState.SetAccepting(true);
            }
            int[] tempTags = new int[padLength];
            // Set up product space sizes
            int[] productSizes = new int[padLength];
            int   curProduct   = 1;

            for (int i = 0; i < leftWindow; i++)
            {
                curProduct *= tagNum[i];
            }
            for (int pos_2 = leftWindow; pos_2 < padLength; pos_2++)
            {
                if (pos_2 > leftWindow + rightWindow)
                {
                    curProduct /= tagNum[pos_2 - leftWindow - rightWindow - 1];
                }
                // shift off
                curProduct *= tagNum[pos_2];
                // shift on
                productSizes[pos_2 - rightWindow] = curProduct;
            }
            double[][] windowScore = new double[padLength][];
            // Score all of each window's options
            for (int pos_3 = leftWindow; pos_3 < leftWindow + length; pos_3++)
            {
                windowScore[pos_3] = new double[productSizes[pos_3]];
                Arrays.Fill(tempTags, tags[0][0]);
                for (int product = 0; product < productSizes[pos_3]; product++)
                {
                    int p     = product;
                    int shift = 1;
                    for (int curPos = pos_3; curPos >= pos_3 - leftWindow; curPos--)
                    {
                        tempTags[curPos] = tags[curPos][p % tagNum[curPos]];
                        p /= tagNum[curPos];
                        if (curPos > pos_3)
                        {
                            shift *= tagNum[curPos];
                        }
                    }
                    if (tempTags[pos_3] == tags[pos_3][0])
                    {
                        // get all tags at once
                        double[] scores = ts.ScoresOf(tempTags, pos_3);
                        // fill in the relevant windowScores
                        for (int t = 0; t < tagNum[pos_3]; t++)
                        {
                            windowScore[pos_3][product + t * shift] = scores[t];
                        }
                    }
                }
            }
            // loop over the classification spot
            for (int pos_4 = leftWindow; pos_4 < length + leftWindow; pos_4++)
            {
                // loop over window product types
                for (int product = 0; product < productSizes[pos_4]; product++)
                {
                    if (pos_4 == leftWindow)
                    {
                        // all nodes in the first spot link to startState:
                        int curTag = tags[pos_4][product % tagNum[pos_4]];
                        //System.err.printf("pos=%d, product=%d, tag=%d score=%.3f\n",pos,product,curTag,windowScore[pos][product]);
                        DFSATransition <string, int> tr = new DFSATransition <string, int>(string.Empty, startState, graphStates[pos_4][product], classIndex.Get(curTag), string.Empty, -windowScore[pos_4][product]);
                        startState.AddTransition(tr);
                    }
                    else
                    {
                        int sharedProduct = product / tagNum[pos_4 + rightWindow];
                        int factor        = productSizes[pos_4] / tagNum[pos_4 + rightWindow];
                        for (int newTagNum = 0; newTagNum < tagNum[pos_4 - leftWindow - 1]; newTagNum++)
                        {
                            int predProduct = newTagNum * factor + sharedProduct;
                            int predTag     = tags[pos_4 - 1][predProduct % tagNum[pos_4 - 1]];
                            int curTag      = tags[pos_4][product % tagNum[pos_4]];
                            //log.info("pos: "+pos);
                            //log.info("product: "+product);
                            //System.err.printf("pos=%d-%d, product=%d-%d, tag=%d-%d score=%.3f\n",pos-1,pos,predProduct,product,predTag,curTag,
                            //  windowScore[pos][product]);
                            DFSAState <string, int>      sourceState = graphStates[pos_4 - leftWindow][predTag];
                            DFSAState <string, int>      destState   = (pos_4 - leftWindow + 1 == graphStates.Length) ? endState : graphStates[pos_4 - leftWindow + 1][curTag];
                            DFSATransition <string, int> tr          = new DFSATransition <string, int>(string.Empty, sourceState, destState, classIndex.Get(curTag), string.Empty, -windowScore[pos_4][product]);
                            graphStates[pos_4 - leftWindow][predTag].AddTransition(tr);
                        }
                    }
                }
            }
            return(viterbiSearchGraph);
        }