/// <summary> /// Builds a lattice of all possible segmentations using only words /// present in the lexicon. /// </summary> /// <remarks> /// Builds a lattice of all possible segmentations using only words /// present in the lexicon. This function must be run prior to /// running maxMatchSegmentation. /// </remarks> private void BuildSegmentationLattice(string s) { edgesNb = 0; len = s.Length; // Initialize word lattice: states = new List <DFSAState <Word, int> >(); lattice = new DFSA <Word, int>("wordLattice"); for (int i = 0; i <= s.Length; ++i) { states.Add(new DFSAState <Word, int>(i, lattice)); } // Set start and accepting state: lattice.SetInitialState(states[0]); states[len].SetAccepting(true); // Find all instances of lexicon words in input string: for (int start = 0; start < len; ++start) { for (int end = len; end > start; --end) { string str = Sharpen.Runtime.Substring(s, start, end); System.Diagnostics.Debug.Assert((str.Length > 0)); bool isOneChar = (start + 1 == end); bool isInDict = words.Contains(str); if (isInDict || isOneChar) { double cost = isInDict ? 1 : 100; DFSATransition <Word, int> trans = new DFSATransition <Word, int>(null, states[start], states[end], new Word(str), null, cost); //logger.info("start="+start+" end="+end+" word="+str); states[start].AddTransition(trans); ++edgesNb; } } } }
public virtual void PrintLattice(DFSA <string, int> tagLattice, IList <CoreLabel> doc, PrintWriter @out) { CoreLabel[] docArray = Sharpen.Collections.ToArray(doc, new CoreLabel[doc.Count]); // Create answer lattice: MutableInteger nodeId = new MutableInteger(0); DFSA <string, int> answerLattice = new DFSA <string, int>(null); DFSAState <string, int> aInitState = new DFSAState <string, int>(nodeId, answerLattice); answerLattice.SetInitialState(aInitState); IDictionary <DFSAState <string, int>, DFSAState <string, int> > stateLinks = Generics.NewHashMap(); // Convert binary lattice into word lattice: TagLatticeToAnswerLattice(tagLattice.InitialState(), aInitState, new StringBuilder(string.Empty), nodeId, 0, 0.0, stateLinks, answerLattice, docArray); try { answerLattice.PrintAttFsmFormat(@out); } catch (IOException e) { throw new Exception(e); } }
public static DFSA <string, int> GetGraph(ISequenceModel ts, IIndex <string> classIndex) { DFSA <string, int> viterbiSearchGraph = new DFSA <string, int>(null); // Set up tag options int length = ts.Length(); int leftWindow = ts.LeftWindow(); int rightWindow = ts.RightWindow(); System.Diagnostics.Debug.Assert((rightWindow == 0)); int padLength = length + leftWindow + rightWindow; // NOTE: tags[i][j] : i is index into pos, and j into product int[][] tags = new int[padLength][]; int[] tagNum = new int[padLength]; for (int pos = 0; pos < padLength; pos++) { tags[pos] = ts.GetPossibleValues(pos); tagNum[pos] = tags[pos].Length; } // Set up Viterbi search graph: DFSAState <string, int>[][] graphStates = null; DFSAState <string, int> startState = null; DFSAState <string, int> endState = null; if (viterbiSearchGraph != null) { int stateId = -1; startState = new DFSAState <string, int>(++stateId, viterbiSearchGraph, 0.0); viterbiSearchGraph.SetInitialState(startState); graphStates = new DFSAState[length][]; for (int pos_1 = 0; pos_1 < length; ++pos_1) { //System.err.printf("%d states at pos %d\n",tags[pos].length,pos); graphStates[pos_1] = new DFSAState[tags[pos_1].Length]; for (int product = 0; product < tags[pos_1].Length; ++product) { graphStates[pos_1][product] = new DFSAState <string, int>(++stateId, viterbiSearchGraph); } } // Accepting state: endState = new DFSAState <string, int>(++stateId, viterbiSearchGraph, 0.0); endState.SetAccepting(true); } int[] tempTags = new int[padLength]; // Set up product space sizes int[] productSizes = new int[padLength]; int curProduct = 1; for (int i = 0; i < leftWindow; i++) { curProduct *= tagNum[i]; } for (int pos_2 = leftWindow; pos_2 < padLength; pos_2++) { if (pos_2 > leftWindow + rightWindow) { curProduct /= tagNum[pos_2 - leftWindow - rightWindow - 1]; } // shift off curProduct *= tagNum[pos_2]; // shift on productSizes[pos_2 - rightWindow] = curProduct; } double[][] windowScore = new double[padLength][]; // Score all of each window's options for (int pos_3 = leftWindow; pos_3 < leftWindow + length; pos_3++) { windowScore[pos_3] = new double[productSizes[pos_3]]; Arrays.Fill(tempTags, tags[0][0]); for (int product = 0; product < productSizes[pos_3]; product++) { int p = product; int shift = 1; for (int curPos = pos_3; curPos >= pos_3 - leftWindow; curPos--) { tempTags[curPos] = tags[curPos][p % tagNum[curPos]]; p /= tagNum[curPos]; if (curPos > pos_3) { shift *= tagNum[curPos]; } } if (tempTags[pos_3] == tags[pos_3][0]) { // get all tags at once double[] scores = ts.ScoresOf(tempTags, pos_3); // fill in the relevant windowScores for (int t = 0; t < tagNum[pos_3]; t++) { windowScore[pos_3][product + t * shift] = scores[t]; } } } } // loop over the classification spot for (int pos_4 = leftWindow; pos_4 < length + leftWindow; pos_4++) { // loop over window product types for (int product = 0; product < productSizes[pos_4]; product++) { if (pos_4 == leftWindow) { // all nodes in the first spot link to startState: int curTag = tags[pos_4][product % tagNum[pos_4]]; //System.err.printf("pos=%d, product=%d, tag=%d score=%.3f\n",pos,product,curTag,windowScore[pos][product]); DFSATransition <string, int> tr = new DFSATransition <string, int>(string.Empty, startState, graphStates[pos_4][product], classIndex.Get(curTag), string.Empty, -windowScore[pos_4][product]); startState.AddTransition(tr); } else { int sharedProduct = product / tagNum[pos_4 + rightWindow]; int factor = productSizes[pos_4] / tagNum[pos_4 + rightWindow]; for (int newTagNum = 0; newTagNum < tagNum[pos_4 - leftWindow - 1]; newTagNum++) { int predProduct = newTagNum * factor + sharedProduct; int predTag = tags[pos_4 - 1][predProduct % tagNum[pos_4 - 1]]; int curTag = tags[pos_4][product % tagNum[pos_4]]; //log.info("pos: "+pos); //log.info("product: "+product); //System.err.printf("pos=%d-%d, product=%d-%d, tag=%d-%d score=%.3f\n",pos-1,pos,predProduct,product,predTag,curTag, // windowScore[pos][product]); DFSAState <string, int> sourceState = graphStates[pos_4 - leftWindow][predTag]; DFSAState <string, int> destState = (pos_4 - leftWindow + 1 == graphStates.Length) ? endState : graphStates[pos_4 - leftWindow + 1][curTag]; DFSATransition <string, int> tr = new DFSATransition <string, int>(string.Empty, sourceState, destState, classIndex.Get(curTag), string.Empty, -windowScore[pos_4][product]); graphStates[pos_4 - leftWindow][predTag].AddTransition(tr); } } } } return(viterbiSearchGraph); }