/// <summary> /// Recursively builds an answer lattice (Chinese words) from a Viterbi search graph /// of binary predictions. /// </summary> /// <remarks> /// Recursively builds an answer lattice (Chinese words) from a Viterbi search graph /// of binary predictions. This function does a limited amount of post-processing: /// preserve white spaces of the input, and not segment between two latin characters or /// between two digits. Consequently, the probabilities of all paths in answerLattice /// may not sum to 1 (they do sum to 1 if no post processing applies). /// </remarks> /// <param name="tSource">Current node in Viterbi search graph.</param> /// <param name="aSource">Current node in answer lattice.</param> /// <param name="answer">Partial word starting at aSource.</param> /// <param name="nodeId">Currently unused node identifier for answer graph.</param> /// <param name="pos">Current position in docArray.</param> /// <param name="cost">Current cost of answer.</param> /// <param name="stateLinks"> /// Maps nodes of the search graph to nodes in answer lattice /// (when paths of the search graph are recombined, paths of the answer lattice should be /// recombined as well, if at word boundary). /// </param> private void TagLatticeToAnswerLattice(DFSAState <string, int> tSource, DFSAState <string, int> aSource, StringBuilder answer, MutableInteger nodeId, int pos, double cost, IDictionary <DFSAState <string, int>, DFSAState <string, int> > stateLinks, DFSA <string, int> answerLattice, CoreLabel[] docArray) { // Add "1" prediction after the end of the sentence, if applicable: if (tSource.IsAccepting() && tSource.ContinuingInputs().IsEmpty()) { tSource.AddTransition(new DFSATransition <string, int>(string.Empty, tSource, new DFSAState <string, int>(-1, null), "1", string.Empty, 0)); } // Get current label, character, and prediction: CoreLabel curLabel = (pos < docArray.Length) ? docArray[pos] : null; string curChr = null; string origSpace = null; if (curLabel != null) { curChr = curLabel.Get(typeof(CoreAnnotations.OriginalCharAnnotation)); System.Diagnostics.Debug.Assert((curChr.Length == 1)); origSpace = curLabel.Get(typeof(CoreAnnotations.SpaceBeforeAnnotation)); } // Get set of successors in search graph: ICollection <string> inputs = tSource.ContinuingInputs(); // Only keep most probable transition out of initial state: string answerConstraint = null; if (pos == 0) { double minCost = double.PositiveInfinity; // DFSATransition<String, Integer> bestTransition = null; foreach (string predictSpace in inputs) { DFSATransition <string, int> transition = tSource.Transition(predictSpace); double transitionCost = transition.Score(); if (transitionCost < minCost) { if (predictSpace != null) { logger.Info(string.Format("mincost (%s): %e -> %e%n", predictSpace, minCost, transitionCost)); minCost = transitionCost; answerConstraint = predictSpace; } } } } // Follow along each transition: foreach (string predictSpace_1 in inputs) { DFSATransition <string, int> transition = tSource.Transition(predictSpace_1); DFSAState <string, int> tDest = transition.Target(); DFSAState <string, int> newASource = aSource; //logger.info(String.format("tsource=%s tdest=%s asource=%s pos=%d predictSpace=%s%n", tSource, tDest, newASource, pos, predictSpace)); StringBuilder newAnswer = new StringBuilder(answer.ToString()); int answerLen = newAnswer.Length; string prevChr = (answerLen > 0) ? newAnswer.Substring(answerLen - 1) : null; double newCost = cost; // Ignore paths starting with zero: if (answerConstraint != null && !answerConstraint.Equals(predictSpace_1)) { logger.Info(string.Format("Skipping transition %s at pos 0.%n", predictSpace_1)); continue; } // Ignore paths not consistent with input segmentation: if (flags.keepAllWhitespaces && "0".Equals(predictSpace_1) && "1".Equals(origSpace)) { logger.Info(string.Format("Skipping non-boundary at pos %d, since space in the input.%n", pos)); continue; } // Ignore paths adding segment boundaries between two latin characters, or between two digits: // (unless already present in original input) if ("1".Equals(predictSpace_1) && "0".Equals(origSpace) && prevChr != null && curChr != null) { char p = prevChr[0]; char c = curChr[0]; if (ChineseStringUtils.IsLetterASCII(p) && ChineseStringUtils.IsLetterASCII(c)) { logger.Info(string.Format("Not hypothesizing a boundary at pos %d, since between two ASCII letters (%s and %s).%n", pos, prevChr, curChr)); continue; } if (ChineseUtils.IsNumber(p) && ChineseUtils.IsNumber(c)) { logger.Info(string.Format("Not hypothesizing a boundary at pos %d, since between two numeral characters (%s and %s).%n", pos, prevChr, curChr)); continue; } } // If predictSpace==1, create a new transition in answer search graph: if ("1".Equals(predictSpace_1)) { if (newAnswer.ToString().Length > 0) { // If answer destination node visited before, create a new edge and leave: if (stateLinks.Contains(tSource)) { DFSAState <string, int> aDest = stateLinks[tSource]; newASource.AddTransition(new DFSATransition <string, int>(string.Empty, newASource, aDest, newAnswer.ToString(), string.Empty, newCost)); //logger.info(String.format("new transition: asource=%s adest=%s edge=%s%n", newASource, aDest, newAnswer)); continue; } // If answer destination node not visited before, create it + new edge: nodeId.IncValue(1); DFSAState <string, int> aDest_1 = new DFSAState <string, int>(nodeId, answerLattice, 0.0); stateLinks[tSource] = aDest_1; newASource.AddTransition(new DFSATransition <string, int>(string.Empty, newASource, aDest_1, newAnswer.ToString(), string.Empty, newCost)); //logger.info(String.format("new edge: adest=%s%n", newASource, aDest, newAnswer)); //logger.info(String.format("new transition: asource=%s adest=%s edge=%s%n%n%n", newASource, aDest, newAnswer)); // Reached an accepting state: if (tSource.IsAccepting()) { aDest_1.SetAccepting(true); continue; } // Start new answer edge: newASource = aDest_1; newAnswer = new StringBuilder(); newCost = 0.0; } } System.Diagnostics.Debug.Assert((curChr != null)); newAnswer.Append(curChr); newCost += transition.Score(); if (newCost < flags.searchGraphPrune || ChineseStringUtils.IsLetterASCII(curChr[0])) { TagLatticeToAnswerLattice(tDest, newASource, newAnswer, nodeId, pos + 1, newCost, stateLinks, answerLattice, docArray); } } }