/// <summary> /// Recursively builds an answer lattice (Chinese words) from a Viterbi search graph /// of binary predictions. /// </summary> /// <remarks> /// Recursively builds an answer lattice (Chinese words) from a Viterbi search graph /// of binary predictions. This function does a limited amount of post-processing: /// preserve white spaces of the input, and not segment between two latin characters or /// between two digits. Consequently, the probabilities of all paths in answerLattice /// may not sum to 1 (they do sum to 1 if no post processing applies). /// </remarks> /// <param name="tSource">Current node in Viterbi search graph.</param> /// <param name="aSource">Current node in answer lattice.</param> /// <param name="answer">Partial word starting at aSource.</param> /// <param name="nodeId">Currently unused node identifier for answer graph.</param> /// <param name="pos">Current position in docArray.</param> /// <param name="cost">Current cost of answer.</param> /// <param name="stateLinks"> /// Maps nodes of the search graph to nodes in answer lattice /// (when paths of the search graph are recombined, paths of the answer lattice should be /// recombined as well, if at word boundary). /// </param> private void TagLatticeToAnswerLattice(DFSAState <string, int> tSource, DFSAState <string, int> aSource, StringBuilder answer, MutableInteger nodeId, int pos, double cost, IDictionary <DFSAState <string, int>, DFSAState <string, int> > stateLinks, DFSA <string, int> answerLattice, CoreLabel[] docArray) { // Add "1" prediction after the end of the sentence, if applicable: if (tSource.IsAccepting() && tSource.ContinuingInputs().IsEmpty()) { tSource.AddTransition(new DFSATransition <string, int>(string.Empty, tSource, new DFSAState <string, int>(-1, null), "1", string.Empty, 0)); } // Get current label, character, and prediction: CoreLabel curLabel = (pos < docArray.Length) ? docArray[pos] : null; string curChr = null; string origSpace = null; if (curLabel != null) { curChr = curLabel.Get(typeof(CoreAnnotations.OriginalCharAnnotation)); System.Diagnostics.Debug.Assert((curChr.Length == 1)); origSpace = curLabel.Get(typeof(CoreAnnotations.SpaceBeforeAnnotation)); } // Get set of successors in search graph: ICollection <string> inputs = tSource.ContinuingInputs(); // Only keep most probable transition out of initial state: string answerConstraint = null; if (pos == 0) { double minCost = double.PositiveInfinity; // DFSATransition<String, Integer> bestTransition = null; foreach (string predictSpace in inputs) { DFSATransition <string, int> transition = tSource.Transition(predictSpace); double transitionCost = transition.Score(); if (transitionCost < minCost) { if (predictSpace != null) { logger.Info(string.Format("mincost (%s): %e -> %e%n", predictSpace, minCost, transitionCost)); minCost = transitionCost; answerConstraint = predictSpace; } } } } // Follow along each transition: foreach (string predictSpace_1 in inputs) { DFSATransition <string, int> transition = tSource.Transition(predictSpace_1); DFSAState <string, int> tDest = transition.Target(); DFSAState <string, int> newASource = aSource; //logger.info(String.format("tsource=%s tdest=%s asource=%s pos=%d predictSpace=%s%n", tSource, tDest, newASource, pos, predictSpace)); StringBuilder newAnswer = new StringBuilder(answer.ToString()); int answerLen = newAnswer.Length; string prevChr = (answerLen > 0) ? newAnswer.Substring(answerLen - 1) : null; double newCost = cost; // Ignore paths starting with zero: if (answerConstraint != null && !answerConstraint.Equals(predictSpace_1)) { logger.Info(string.Format("Skipping transition %s at pos 0.%n", predictSpace_1)); continue; } // Ignore paths not consistent with input segmentation: if (flags.keepAllWhitespaces && "0".Equals(predictSpace_1) && "1".Equals(origSpace)) { logger.Info(string.Format("Skipping non-boundary at pos %d, since space in the input.%n", pos)); continue; } // Ignore paths adding segment boundaries between two latin characters, or between two digits: // (unless already present in original input) if ("1".Equals(predictSpace_1) && "0".Equals(origSpace) && prevChr != null && curChr != null) { char p = prevChr[0]; char c = curChr[0]; if (ChineseStringUtils.IsLetterASCII(p) && ChineseStringUtils.IsLetterASCII(c)) { logger.Info(string.Format("Not hypothesizing a boundary at pos %d, since between two ASCII letters (%s and %s).%n", pos, prevChr, curChr)); continue; } if (ChineseUtils.IsNumber(p) && ChineseUtils.IsNumber(c)) { logger.Info(string.Format("Not hypothesizing a boundary at pos %d, since between two numeral characters (%s and %s).%n", pos, prevChr, curChr)); continue; } } // If predictSpace==1, create a new transition in answer search graph: if ("1".Equals(predictSpace_1)) { if (newAnswer.ToString().Length > 0) { // If answer destination node visited before, create a new edge and leave: if (stateLinks.Contains(tSource)) { DFSAState <string, int> aDest = stateLinks[tSource]; newASource.AddTransition(new DFSATransition <string, int>(string.Empty, newASource, aDest, newAnswer.ToString(), string.Empty, newCost)); //logger.info(String.format("new transition: asource=%s adest=%s edge=%s%n", newASource, aDest, newAnswer)); continue; } // If answer destination node not visited before, create it + new edge: nodeId.IncValue(1); DFSAState <string, int> aDest_1 = new DFSAState <string, int>(nodeId, answerLattice, 0.0); stateLinks[tSource] = aDest_1; newASource.AddTransition(new DFSATransition <string, int>(string.Empty, newASource, aDest_1, newAnswer.ToString(), string.Empty, newCost)); //logger.info(String.format("new edge: adest=%s%n", newASource, aDest, newAnswer)); //logger.info(String.format("new transition: asource=%s adest=%s edge=%s%n%n%n", newASource, aDest, newAnswer)); // Reached an accepting state: if (tSource.IsAccepting()) { aDest_1.SetAccepting(true); continue; } // Start new answer edge: newASource = aDest_1; newAnswer = new StringBuilder(); newCost = 0.0; } } System.Diagnostics.Debug.Assert((curChr != null)); newAnswer.Append(curChr); newCost += transition.Score(); if (newCost < flags.searchGraphPrune || ChineseStringUtils.IsLetterASCII(curChr[0])) { TagLatticeToAnswerLattice(tDest, newASource, newAnswer, nodeId, pos + 1, newCost, stateLinks, answerLattice, docArray); } } }
public static DFSA <string, int> GetGraph(ISequenceModel ts, IIndex <string> classIndex) { DFSA <string, int> viterbiSearchGraph = new DFSA <string, int>(null); // Set up tag options int length = ts.Length(); int leftWindow = ts.LeftWindow(); int rightWindow = ts.RightWindow(); System.Diagnostics.Debug.Assert((rightWindow == 0)); int padLength = length + leftWindow + rightWindow; // NOTE: tags[i][j] : i is index into pos, and j into product int[][] tags = new int[padLength][]; int[] tagNum = new int[padLength]; for (int pos = 0; pos < padLength; pos++) { tags[pos] = ts.GetPossibleValues(pos); tagNum[pos] = tags[pos].Length; } // Set up Viterbi search graph: DFSAState <string, int>[][] graphStates = null; DFSAState <string, int> startState = null; DFSAState <string, int> endState = null; if (viterbiSearchGraph != null) { int stateId = -1; startState = new DFSAState <string, int>(++stateId, viterbiSearchGraph, 0.0); viterbiSearchGraph.SetInitialState(startState); graphStates = new DFSAState[length][]; for (int pos_1 = 0; pos_1 < length; ++pos_1) { //System.err.printf("%d states at pos %d\n",tags[pos].length,pos); graphStates[pos_1] = new DFSAState[tags[pos_1].Length]; for (int product = 0; product < tags[pos_1].Length; ++product) { graphStates[pos_1][product] = new DFSAState <string, int>(++stateId, viterbiSearchGraph); } } // Accepting state: endState = new DFSAState <string, int>(++stateId, viterbiSearchGraph, 0.0); endState.SetAccepting(true); } int[] tempTags = new int[padLength]; // Set up product space sizes int[] productSizes = new int[padLength]; int curProduct = 1; for (int i = 0; i < leftWindow; i++) { curProduct *= tagNum[i]; } for (int pos_2 = leftWindow; pos_2 < padLength; pos_2++) { if (pos_2 > leftWindow + rightWindow) { curProduct /= tagNum[pos_2 - leftWindow - rightWindow - 1]; } // shift off curProduct *= tagNum[pos_2]; // shift on productSizes[pos_2 - rightWindow] = curProduct; } double[][] windowScore = new double[padLength][]; // Score all of each window's options for (int pos_3 = leftWindow; pos_3 < leftWindow + length; pos_3++) { windowScore[pos_3] = new double[productSizes[pos_3]]; Arrays.Fill(tempTags, tags[0][0]); for (int product = 0; product < productSizes[pos_3]; product++) { int p = product; int shift = 1; for (int curPos = pos_3; curPos >= pos_3 - leftWindow; curPos--) { tempTags[curPos] = tags[curPos][p % tagNum[curPos]]; p /= tagNum[curPos]; if (curPos > pos_3) { shift *= tagNum[curPos]; } } if (tempTags[pos_3] == tags[pos_3][0]) { // get all tags at once double[] scores = ts.ScoresOf(tempTags, pos_3); // fill in the relevant windowScores for (int t = 0; t < tagNum[pos_3]; t++) { windowScore[pos_3][product + t * shift] = scores[t]; } } } } // loop over the classification spot for (int pos_4 = leftWindow; pos_4 < length + leftWindow; pos_4++) { // loop over window product types for (int product = 0; product < productSizes[pos_4]; product++) { if (pos_4 == leftWindow) { // all nodes in the first spot link to startState: int curTag = tags[pos_4][product % tagNum[pos_4]]; //System.err.printf("pos=%d, product=%d, tag=%d score=%.3f\n",pos,product,curTag,windowScore[pos][product]); DFSATransition <string, int> tr = new DFSATransition <string, int>(string.Empty, startState, graphStates[pos_4][product], classIndex.Get(curTag), string.Empty, -windowScore[pos_4][product]); startState.AddTransition(tr); } else { int sharedProduct = product / tagNum[pos_4 + rightWindow]; int factor = productSizes[pos_4] / tagNum[pos_4 + rightWindow]; for (int newTagNum = 0; newTagNum < tagNum[pos_4 - leftWindow - 1]; newTagNum++) { int predProduct = newTagNum * factor + sharedProduct; int predTag = tags[pos_4 - 1][predProduct % tagNum[pos_4 - 1]]; int curTag = tags[pos_4][product % tagNum[pos_4]]; //log.info("pos: "+pos); //log.info("product: "+product); //System.err.printf("pos=%d-%d, product=%d-%d, tag=%d-%d score=%.3f\n",pos-1,pos,predProduct,product,predTag,curTag, // windowScore[pos][product]); DFSAState <string, int> sourceState = graphStates[pos_4 - leftWindow][predTag]; DFSAState <string, int> destState = (pos_4 - leftWindow + 1 == graphStates.Length) ? endState : graphStates[pos_4 - leftWindow + 1][curTag]; DFSATransition <string, int> tr = new DFSATransition <string, int>(string.Empty, sourceState, destState, classIndex.Get(curTag), string.Empty, -windowScore[pos_4][product]); graphStates[pos_4 - leftWindow][predTag].AddTransition(tr); } } } } return(viterbiSearchGraph); }