public override float Score(IntTaggedWord iTW, int loc, string word, string featureSpec) { int wordId = iTW.Word(); int tagId = iTW.Tag(); // Force 1-best path to go through the boundary symbol // (deterministic tagging) int boundaryId = wordIndex.IndexOf(LexiconConstants.Boundary); int boundaryTagId = tagIndex.IndexOf(LexiconConstants.BoundaryTag); if (wordId == boundaryId && tagId == boundaryTagId) { return(0.0f); } // Morphological features string tag = tagIndex.Get(iTW.Tag()); Pair <string, string> lemmaMorph = MorphoFeatureSpecification.SplitMorphString(word, featureSpec); string lemma = lemmaMorph.First(); int lemmaId = wordIndex.IndexOf(lemma); string richMorphTag = lemmaMorph.Second(); string reducedMorphTag = morphoSpec.StrToFeatures(richMorphTag).ToString().Trim(); reducedMorphTag = reducedMorphTag.Length == 0 ? NoMorphAnalysis : reducedMorphTag; int morphId = morphIndex.AddToIndex(reducedMorphTag); // Score the factors and create the rule score p_W_T double p_W_Tf = Math.Log(ProbWordTag(word, loc, wordId, tagId)); // double p_L_T = Math.log(probLemmaTag(word, loc, tagId, lemmaId)); double p_L_T = 0.0; double p_M_T = Math.Log(ProbMorphTag(tagId, morphId)); double p_W_T = p_W_Tf + p_L_T + p_M_T; // String tag = tagIndex.get(tagId); // Filter low probability taggings return(p_W_T > -100.0 ? (float)p_W_T : float.NegativeInfinity); }
/// <summary>Creates a BinaryRule from String s, assuming it was created using toString().</summary> /// <param name="s"> /// A String in which the binary rule is represented as parent, /// left-child, right-child, score, with the items quoted as needed /// </param> /// <param name="index">Index used to convert String names to ints</param> public BinaryRule(string s, IIndex <string> index) { string[] fields = StringUtils.SplitOnCharWithQuoting(s, ' ', '\"', '\\'); // System.out.println("fields:\n" + fields[0] + "\n" + fields[2] + "\n" + fields[3] + "\n" + fields[4]); this.parent = index.AddToIndex(fields[0]); this.leftChild = index.AddToIndex(fields[2]); this.rightChild = index.AddToIndex(fields[3]); this.score = float.ParseFloat(fields[4]); }
/// <summary>Setup the constrained label sets and free bookkeeping resources.</summary> /// <param name="threshold"/> /// <param name="labelIndex"/> public virtual void Lock(int threshold, IIndex <string> labelIndex) { if (labelDictionary != null) { throw new Exception("Label dictionary is already locked"); } log.Info("Label dictionary enabled"); System.Console.Error.Printf("#observations: %d%n", (int)observationCounts.TotalCount()); Counters.RetainAbove(observationCounts, threshold); ICollection <string> constrainedObservations = observationCounts.KeySet(); labelDictionary = new int[constrainedObservations.Count][]; observationIndex = new HashIndex <string>(constrainedObservations.Count); foreach (string observation in constrainedObservations) { int i = observationIndex.AddToIndex(observation); System.Diagnostics.Debug.Assert(i < labelDictionary.Length); ICollection <string> allowedLabels = observedLabels[observation]; labelDictionary[i] = new int[allowedLabels.Count]; int j = 0; foreach (string label in allowedLabels) { labelDictionary[i][j++] = labelIndex.IndexOf(label); } } observationIndex.Lock(); System.Console.Error.Printf("#constraints: %d%n", labelDictionary.Length); // Free bookkeeping data structures observationCounts = null; observedLabels = null; }
private string GetTag(string word) { int iW = wordIndex.AddToIndex(word); EnsureProbs(iW, false); return(Counters.Argmax(logProbs)); }
protected internal override void TallyInternalNode(Tree lt, double weight) { if (lt.Children().Length == 1) { UnaryRule ur = new UnaryRule(stateIndex.AddToIndex(lt.Label().Value()), stateIndex.AddToIndex(lt.Children()[0].Label().Value())); symbolCounter.IncrementCount(stateIndex.Get(ur.parent), weight); unaryRuleCounter.IncrementCount(ur, weight); unaryRules.Add(ur); } else { BinaryRule br = new BinaryRule(stateIndex.AddToIndex(lt.Label().Value()), stateIndex.AddToIndex(lt.Children()[0].Label().Value()), stateIndex.AddToIndex(lt.Children()[1].Label().Value())); symbolCounter.IncrementCount(stateIndex.Get(br.parent), weight); binaryRuleCounter.IncrementCount(br, weight); binaryRules.Add(br); } }
public override IDependencyGrammar FormResult() { wordIndex.AddToIndex(LexiconConstants.UnknownWord); MLEDependencyGrammar dg = new MLEDependencyGrammar(tlpParams, directional, useDistance, useCoarseDistance, basicCategoryTagsInDependencyGrammar, op, wordIndex, tagIndex); foreach (IntDependency dependency in dependencyCounter.KeySet()) { dg.AddRule(dependency, dependencyCounter.GetCount(dependency)); } return(dg); }
private void PopulateTagsToBaseTags(ITreebankLanguagePack tlp) { int total = tagIndex.Size(); tagsToBaseTags = new int[total]; for (int i = 0; i < total; i++) { string tag = tagIndex.Get(i); string baseTag = tlp.BasicCategory(tag); int j = tagIndex.AddToIndex(baseTag); tagsToBaseTags[i] = j; } }
public virtual void TestUnmodifiableViewEtc() { IList <string> list = new List <string>(); list.Add("A"); list.Add("B"); list.Add("A"); list.Add("C"); HashIndex <string> index4 = new HashIndex <string>(list); HashIndex <string> index5 = new HashIndex <string>(); Sharpen.Collections.AddAll(index5, list); NUnit.Framework.Assert.AreEqual("Equality failure", index4, index5); index5.AddToIndex("D"); index5.AddToIndex("E"); index5.IndexOf("F"); Sharpen.Collections.AddAll(index5, list); NUnit.Framework.Assert.AreEqual(5, index5.Count); NUnit.Framework.Assert.AreEqual(3, index4.Count); NUnit.Framework.Assert.IsTrue(index4.Contains("A")); NUnit.Framework.Assert.AreEqual(0, index4.IndexOf("A")); NUnit.Framework.Assert.AreEqual(1, index4.IndexOf("B")); NUnit.Framework.Assert.AreEqual(2, index4.IndexOf("C")); NUnit.Framework.Assert.AreEqual("A", index4.Get(0)); IIndex <string> index4u = index4.UnmodifiableView(); NUnit.Framework.Assert.AreEqual(3, index4u.Size()); NUnit.Framework.Assert.IsTrue(index4u.Contains("A")); NUnit.Framework.Assert.AreEqual(0, index4u.IndexOf("A")); NUnit.Framework.Assert.AreEqual(1, index4u.IndexOf("B")); NUnit.Framework.Assert.AreEqual(2, index4u.IndexOf("C")); NUnit.Framework.Assert.AreEqual("A", index4u.Get(0)); NUnit.Framework.Assert.AreEqual(-1, index4u.AddToIndex("D")); bool okay = false; try { index4u.Unlock(); } catch (NotSupportedException) { okay = true; } finally { NUnit.Framework.Assert.IsTrue(okay); } }
private short TagProject(short tag) { if (smoothTPIndex == null) { smoothTPIndex = new HashIndex <string>(tagIndex); } if (tag < 0) { return(tag); } else { string tagStr = smoothTPIndex.Get(tag); string binStr = TpPrefix + smoothTP.Project(tagStr); return((short)smoothTPIndex.AddToIndex(binStr)); } }
/// <summary>Creates an IntTaggedWord given by the tagString and wordString</summary> public IntTaggedWord(string wordString, string tagString, IIndex <string> wordIndex, IIndex <string> tagIndex) { switch (wordString) { case Any: { word = AnyWordInt; break; } case Stop: { word = StopWordInt; break; } default: { word = wordIndex.AddToIndex(wordString); break; } } switch (tagString) { case Any: { tag = (short)AnyTagInt; break; } case Stop: { tag = (short)StopTagInt; break; } default: { tag = (short)tagIndex.AddToIndex(tagString); break; } } }
// i.e., return -1 public virtual int AddToIndex(E o) { int index = backingIndex.IndexOf(o); if (index >= 0) { return(index); } if (locked) { index = spilloverIndex.IndexOf(o); } else { index = spilloverIndex.AddToIndex(o); } if (index >= 0) { return(index + backingIndexSize); } return(index); }
/// <summary>Returns the possible POS taggings for a word.</summary> /// <param name="word">The word, represented as an integer in wordIndex</param> /// <param name="loc"> /// The position of the word in the sentence (counting from 0). /// <i>Implementation note: The BaseLexicon class doesn't actually /// make use of this position information.</i> /// </param> /// <returns> /// An Iterator over a List ofIntTaggedWords, which pair the word with /// possible taggings as integer pairs. (Each can be thought of as a /// <code>tag -> word<code> rule.) /// </returns> public virtual IEnumerator <IntTaggedWord> RuleIteratorByWord(string word, int loc) { return(RuleIteratorByWord(wordIndex.AddToIndex(word), loc, null)); }
protected internal virtual int Add(string tag) { return(index.AddToIndex(tag)); }
internal virtual int Add(History h) { return(idx.AddToIndex(h)); }
/// <param name="graphs">a Map from String categories to TransducerGraph objects</param> /// <param name="unaryRules">is a Set of UnaryRule objects that we need to add</param> /// <param name="binaryRules">is a Set of BinaryRule objects that we need to add</param> /// <returns>a new Pair of UnaryGrammar, BinaryGrammar</returns> protected internal virtual Pair <UnaryGrammar, BinaryGrammar> ConvertGraphsToGrammar(ICollection <TransducerGraph> graphs, ICollection <UnaryRule> unaryRules, ICollection <BinaryRule> binaryRules) { // first go through all the existing rules and number them with new numberer newStateIndex = new HashIndex <string>(); foreach (UnaryRule rule in unaryRules) { string parent = stateIndex.Get(rule.parent); rule.parent = newStateIndex.AddToIndex(parent); string child = stateIndex.Get(rule.child); rule.child = newStateIndex.AddToIndex(child); } foreach (BinaryRule rule_1 in binaryRules) { string parent = stateIndex.Get(rule_1.parent); rule_1.parent = newStateIndex.AddToIndex(parent); string leftChild = stateIndex.Get(rule_1.leftChild); rule_1.leftChild = newStateIndex.AddToIndex(leftChild); string rightChild = stateIndex.Get(rule_1.rightChild); rule_1.rightChild = newStateIndex.AddToIndex(rightChild); } // now go through the graphs and add the rules foreach (TransducerGraph graph in graphs) { object startNode = graph.GetStartNode(); foreach (TransducerGraph.Arc arc in graph.GetArcs()) { // TODO: make sure these are the strings we're looking for string source = arc.GetSourceNode().ToString(); string target = arc.GetTargetNode().ToString(); object input = arc.GetInput(); string inputString = input.ToString(); double output = ((double)arc.GetOutput()); if (source.Equals(startNode)) { // make a UnaryRule UnaryRule ur = new UnaryRule(newStateIndex.AddToIndex(target), newStateIndex.AddToIndex(inputString), SmartNegate(output)); unaryRules.Add(ur); } else { if (inputString.Equals(End) || inputString.Equals(Epsilon)) { // make a UnaryRule UnaryRule ur = new UnaryRule(newStateIndex.AddToIndex(target), newStateIndex.AddToIndex(source), SmartNegate(output)); unaryRules.Add(ur); } else { // make a BinaryRule // figure out whether the input was generated on the left or right int length = inputString.Length; char leftOrRight = inputString[length - 1]; inputString = Sharpen.Runtime.Substring(inputString, 0, length - 1); BinaryRule br; if (leftOrRight == '<' || leftOrRight == '[') { br = new BinaryRule(newStateIndex.AddToIndex(target), newStateIndex.AddToIndex(inputString), newStateIndex.AddToIndex(source), SmartNegate(output)); } else { if (leftOrRight == '>' || leftOrRight == ']') { br = new BinaryRule(newStateIndex.AddToIndex(target), newStateIndex.AddToIndex(source), newStateIndex.AddToIndex(inputString), SmartNegate(output)); } else { throw new Exception("Arc input is in unexpected format: " + arc); } } binaryRules.Add(br); } } } } // by now, the unaryRules and binaryRules Sets have old untouched and new rules with scores ClassicCounter <string> symbolCounter = new ClassicCounter <string>(); if (outputType == RawCounts) { // now we take the sets of rules and turn them into grammars // the scores of the rules we are given are actually counts // so we count parent symbol occurrences foreach (UnaryRule rule_2 in unaryRules) { symbolCounter.IncrementCount(newStateIndex.Get(rule_2.parent), rule_2.score); } foreach (BinaryRule rule_3 in binaryRules) { symbolCounter.IncrementCount(newStateIndex.Get(rule_3.parent), rule_3.score); } } // now we put the rules in the grammars int numStates = newStateIndex.Size(); // this should be smaller than last one int numRules = 0; UnaryGrammar ug = new UnaryGrammar(newStateIndex); BinaryGrammar bg = new BinaryGrammar(newStateIndex); foreach (UnaryRule rule_4 in unaryRules) { if (outputType == RawCounts) { double count = symbolCounter.GetCount(newStateIndex.Get(rule_4.parent)); rule_4.score = (float)Math.Log(rule_4.score / count); } ug.AddRule(rule_4); numRules++; } foreach (BinaryRule rule_5 in binaryRules) { if (outputType == RawCounts) { double count = symbolCounter.GetCount(newStateIndex.Get(rule_5.parent)); rule_5.score = (float)Math.Log((rule_5.score - op.trainOptions.ruleDiscount) / count); } bg.AddRule(rule_5); numRules++; } if (verbose) { System.Console.Out.WriteLine("Number of minimized rules: " + numRules); System.Console.Out.WriteLine("Number of minimized states: " + newStateIndex.Size()); } ug.PurgeRules(); bg.SplitRules(); return(new Pair <UnaryGrammar, BinaryGrammar>(ug, bg)); }
/// <summary>Do max language model markov segmentation.</summary> /// <remarks> /// Do max language model markov segmentation. /// Note that this algorithm inherently tags words as it goes, but that /// we throw away the tags in the final result so that the segmented words /// are untagged. (Note: for a couple of years till Aug 2007, a tagged /// result was returned, but this messed up the parser, because it could /// use no tagging but the given tagging, which often wasn't very good. /// Or in particular it was a subcategorized tagging which never worked /// with the current forceTags option which assumes that gold taggings are /// inherently basic taggings.) /// </remarks> /// <param name="s">A String to segment</param> /// <returns>The list of segmented words.</returns> private List <IHasWord> SegmentWordsWithMarkov(string s) { // We don't want to accidentally register words that we don't know // about in the wordIndex, so we wrap it with a DeltaIndex DeltaIndex <string> deltaWordIndex = new DeltaIndex <string>(wordIndex); int length = s.Length; // Set<String> POSes = (Set<String>) POSDistribution.keySet(); // 1.5 int numTags = POSes.Count; // score of span with initial word of this tag double[][][] scores = new double[length][][]; // best (length of) first word for this span with this tag int[][][] splitBacktrace = new int[length][][]; // best tag for second word over this span, if first is this tag int[][][] POSbacktrace = new int[length][][]; for (int i = 0; i < length; i++) { for (int j = 0; j < length + 1; j++) { Arrays.Fill(scores[i][j], double.NegativeInfinity); } } // first fill in word probabilities for (int diff = 1; diff <= 10; diff++) { for (int start = 0; start + diff <= length; start++) { int end = start + diff; StringBuilder wordBuf = new StringBuilder(); for (int pos = start; pos < end; pos++) { wordBuf.Append(s[pos]); } string word = wordBuf.ToString(); foreach (string tag in POSes) { IntTaggedWord itw = new IntTaggedWord(word, tag, deltaWordIndex, tagIndex); double score = lex.Score(itw, 0, word, null); if (start == 0) { score += Math.Log(initialPOSDist.ProbabilityOf(tag)); } scores[start][end][itw.Tag()] = score; splitBacktrace[start][end][itw.Tag()] = end; } } } // now fill in word combination probabilities for (int diff_1 = 2; diff_1 <= length; diff_1++) { for (int start = 0; start + diff_1 <= length; start++) { int end = start + diff_1; for (int split = start + 1; split < end && split - start <= 10; split++) { foreach (string tag in POSes) { int tagNum = tagIndex.AddToIndex(tag); if (splitBacktrace[start][split][tagNum] != split) { continue; } Distribution <string> rTagDist = markovPOSDists[tag]; if (rTagDist == null) { continue; } // this happens with "*" POS foreach (string rTag in POSes) { int rTagNum = tagIndex.AddToIndex(rTag); double newScore = scores[start][split][tagNum] + scores[split][end][rTagNum] + Math.Log(rTagDist.ProbabilityOf(rTag)); if (newScore > scores[start][end][tagNum]) { scores[start][end][tagNum] = newScore; splitBacktrace[start][end][tagNum] = split; POSbacktrace[start][end][tagNum] = rTagNum; } } } } } } int nextPOS = ArrayMath.Argmax(scores[0][length]); List <IHasWord> words = new List <IHasWord>(); int start_1 = 0; while (start_1 < length) { int split = splitBacktrace[start_1][length][nextPOS]; StringBuilder wordBuf = new StringBuilder(); for (int i_1 = start_1; i_1 < split; i_1++) { wordBuf.Append(s[i_1]); } string word = wordBuf.ToString(); // String tag = tagIndex.get(nextPOS); // words.add(new TaggedWord(word, tag)); words.Add(new Word(word)); if (split < length) { nextPOS = POSbacktrace[start_1][length][nextPOS]; } start_1 = split; } return(words); }
public virtual bool Parse <_T0>(IList <_T0> sentence) where _T0 : IHasWord { if (op.testOptions.verbose) { Timing.Tick("Starting dependency parse."); } this.sentence = sentence; int length = sentence.Count; if (length > arraySize) { if (length > op.testOptions.maxLength + 1 || length >= myMaxLength) { throw new OutOfMemoryException("Refusal to create such large arrays."); } else { try { CreateArrays(length + 1); } catch (OutOfMemoryException e) { myMaxLength = length; if (arraySize > 0) { try { CreateArrays(arraySize); } catch (OutOfMemoryException) { throw new Exception("CANNOT EVEN CREATE ARRAYS OF ORIGINAL SIZE!!! " + arraySize); } } throw; } arraySize = length + 1; if (op.testOptions.verbose) { log.Info("Created dparser arrays of size " + arraySize); } } } if (op.testOptions.verbose) { log.Info("Initializing..."); } // map to words words = new int[length]; int numTags = dg.NumTagBins(); //tagIndex.size(); //System.out.println("\nNumTags: "+numTags); //System.out.println(tagIndex); bool[][] hasTag = new bool[length][]; for (int i = 0; i < length; i++) { //if (wordIndex.contains(sentence.get(i).toString())) words[i] = wordIndex.AddToIndex(sentence[i].Word()); } //else //words[i] = wordIndex.indexOf(Lexicon.UNKNOWN_WORD); for (int head = 0; head < length; head++) { for (int tag = 0; tag < numTags; tag++) { Arrays.Fill(iScoreH[head][tag], float.NegativeInfinity); Arrays.Fill(oScoreH[head][tag], float.NegativeInfinity); } } for (int head_1 = 0; head_1 < length; head_1++) { for (int loc = 0; loc <= length; loc++) { rawDistance[head_1][loc] = (head_1 >= loc ? head_1 - loc : loc - head_1 - 1); binDistance[head_1][loc] = dg.DistanceBin(rawDistance[head_1][loc]); } } if (Thread.Interrupted()) { throw new RuntimeInterruptedException(); } // do tags for (int start = 0; start + 1 <= length; start++) { //Force tags string trueTagStr = null; if (sentence[start] is IHasTag) { trueTagStr = ((IHasTag)sentence[start]).Tag(); if (string.Empty.Equals(trueTagStr)) { trueTagStr = null; } } //Word context (e.g., morphosyntactic info) string wordContextStr = null; if (sentence[start] is IHasContext) { wordContextStr = ((IHasContext)sentence[start]).OriginalText(); if (string.Empty.Equals(wordContextStr)) { wordContextStr = null; } } int word = words[start]; for (IEnumerator <IntTaggedWord> taggingI = lex.RuleIteratorByWord(word, start, wordContextStr); taggingI.MoveNext();) { IntTaggedWord tagging = taggingI.Current; if (trueTagStr != null) { if (!tlp.BasicCategory(tagging.TagString(tagIndex)).Equals(trueTagStr)) { continue; } } float score = lex.Score(tagging, start, wordIndex.Get(tagging.word), wordContextStr); //iScoreH[start][tag][start] = (op.dcTags ? (float)op.testOptions.depWeight*score : 0.0f); if (score > float.NegativeInfinity) { int tag = tagging.tag; iScoreH[start][dg.TagBin(tag)][start] = 0.0f; iScoreH[start][dg.TagBin(tag)][start + 1] = 0.0f; } } } for (int hWord = 0; hWord < length; hWord++) { for (int hTag = 0; hTag < numTags; hTag++) { hasTag[hWord][hTag] = (iScoreH[hWord][hTag][hWord] + iScoreH[hWord][hTag][hWord + 1] > float.NegativeInfinity); Arrays.Fill(headStop[hWord][hTag], float.NegativeInfinity); for (int aWord = 0; aWord < length; aWord++) { for (int dist = 0; dist < dg.NumDistBins(); dist++) { Arrays.Fill(headScore[dist][hWord][hTag][aWord], float.NegativeInfinity); } } } } // score and cache all pairs -- headScores and stops //int hit = 0; for (int hWord_1 = 0; hWord_1 < length; hWord_1++) { for (int hTag = 0; hTag < numTags; hTag++) { //Arrays.fill(headStopL[hWord][hTag], Float.NEGATIVE_INFINITY); //Arrays.fill(headStopR[hWord][hTag], Float.NEGATIVE_INFINITY); //Arrays.fill(headStop[hWord][hTag], Float.NEGATIVE_INFINITY); if (!hasTag[hWord_1][hTag]) { continue; } for (int split = 0; split <= length; split++) { if (split <= hWord_1) { headStop[hWord_1][hTag][split] = (float)dg.ScoreTB(words[hWord_1], hTag, -2, -2, false, hWord_1 - split); } else { //System.out.println("headstopL " + hWord +" " + hTag + " " + split + " " + headStopL[hWord][hTag][split]); // debugging headStop[hWord_1][hTag][split] = (float)dg.ScoreTB(words[hWord_1], hTag, -2, -2, true, split - hWord_1 - 1); } } //System.out.println("headstopR " + hWord +" " + hTag + " " + split + " " + headStopR[hWord][hTag][split]); // debugging //hit++; //Timing.tick("hWord: "+hWord+" hTag: "+hTag+" piddle count: "+hit); for (int aWord = 0; aWord < length; aWord++) { if (aWord == hWord_1) { continue; } // can't be argument of yourself bool leftHeaded = hWord_1 < aWord; int start_1; int end; if (leftHeaded) { start_1 = hWord_1 + 1; end = aWord + 1; } else { start_1 = aWord + 1; end = hWord_1 + 1; } for (int aTag = 0; aTag < numTags; aTag++) { if (!hasTag[aWord][aTag]) { continue; } for (int split_1 = start_1; split_1 < end; split_1++) { // Moved this stuff out two loops- GMA // for (int split = 0; split <= length; split++) { // if leftHeaded, go from hWord+1 to aWord // else go from aWord+1 to hWord // if ((leftHeaded && (split <= hWord || split > aWord)) || // ((!leftHeaded) && (split <= aWord || split > hWord))) // continue; int headDistance = rawDistance[hWord_1][split_1]; int binDist = binDistance[hWord_1][split_1]; headScore[binDist][hWord_1][hTag][aWord][aTag] = (float)dg.ScoreTB(words[hWord_1], hTag, words[aWord], aTag, leftHeaded, headDistance); //hit++; // skip other splits with same binDist while (split_1 + 1 < end && binDistance[hWord_1][split_1 + 1] == binDist) { split_1++; } } } } } } // end split // end aTag // end aWord // end hTag // end hWord if (op.testOptions.verbose) { Timing.Tick("done."); // displayHeadScores(); log.Info("Starting insides..."); } // do larger spans for (int diff = 2; diff <= length; diff++) { if (Thread.Interrupted()) { throw new RuntimeInterruptedException(); } for (int start_1 = 0; start_1 + diff <= length; start_1++) { int end = start_1 + diff; // left extension int endHead = end - 1; for (int endTag = 0; endTag < numTags; endTag++) { if (!hasTag[endHead][endTag]) { continue; } // bestScore is max for iScoreH float bestScore = float.NegativeInfinity; for (int argHead = start_1; argHead < endHead; argHead++) { for (int argTag = 0; argTag < numTags; argTag++) { if (!hasTag[argHead][argTag]) { continue; } float argLeftScore = iScoreH[argHead][argTag][start_1]; if (argLeftScore == float.NegativeInfinity) { continue; } float stopLeftScore = headStop[argHead][argTag][start_1]; if (stopLeftScore == float.NegativeInfinity) { continue; } for (int split = argHead + 1; split < end; split++) { // short circuit if dependency is impossible float depScore = headScore[binDistance[endHead][split]][endHead][endTag][argHead][argTag]; if (depScore == float.NegativeInfinity) { continue; } float score = iScoreH[endHead][endTag][split] + argLeftScore + iScoreH[argHead][argTag][split] + depScore + stopLeftScore + headStop[argHead][argTag][split]; if (score > bestScore) { bestScore = score; } } } } // end for split // sum for iScoreHSum // end for argTag : tags // end for argHead iScoreH[endHead][endTag][start_1] = bestScore; } // end for endTag : tags // right extension int startHead = start_1; for (int startTag = 0; startTag < numTags; startTag++) { if (!hasTag[startHead][startTag]) { continue; } // bestScore is max for iScoreH float bestScore = float.NegativeInfinity; for (int argHead = start_1 + 1; argHead < end; argHead++) { for (int argTag = 0; argTag < numTags; argTag++) { if (!hasTag[argHead][argTag]) { continue; } float argRightScore = iScoreH[argHead][argTag][end]; if (argRightScore == float.NegativeInfinity) { continue; } float stopRightScore = headStop[argHead][argTag][end]; if (stopRightScore == float.NegativeInfinity) { continue; } for (int split = start_1 + 1; split <= argHead; split++) { // short circuit if dependency is impossible float depScore = headScore[binDistance[startHead][split]][startHead][startTag][argHead][argTag]; if (depScore == float.NegativeInfinity) { continue; } float score = iScoreH[startHead][startTag][split] + iScoreH[argHead][argTag][split] + argRightScore + depScore + stopRightScore + headStop[argHead][argTag][split]; if (score > bestScore) { bestScore = score; } } } } // sum for iScoreHSum // end for argTag: tags // end for argHead iScoreH[startHead][startTag][end] = bestScore; } } } // end for startTag: tags // end for start // end for diff (i.e., span) int goalTag = dg.TagBin(tagIndex.IndexOf(LexiconConstants.BoundaryTag)); if (op.testOptions.verbose) { Timing.Tick("done."); log.Info("Dep parsing " + length + " words (incl. stop): insideScore " + (iScoreH[length - 1][goalTag][0] + iScoreH[length - 1][goalTag][length])); } if (!op.doPCFG) { return(HasParse()); } if (op.testOptions.verbose) { log.Info("Starting outsides..."); } oScoreH[length - 1][goalTag][0] = 0.0f; oScoreH[length - 1][goalTag][length] = 0.0f; for (int diff_1 = length; diff_1 > 1; diff_1--) { if (Thread.Interrupted()) { throw new RuntimeInterruptedException(); } for (int start_1 = 0; start_1 + diff_1 <= length; start_1++) { int end = start_1 + diff_1; // left half int endHead = end - 1; for (int endTag = 0; endTag < numTags; endTag++) { if (!hasTag[endHead][endTag]) { continue; } for (int argHead = start_1; argHead < endHead; argHead++) { for (int argTag = 0; argTag < numTags; argTag++) { if (!hasTag[argHead][argTag]) { continue; } for (int split = argHead; split <= endHead; split++) { float subScore = (oScoreH[endHead][endTag][start_1] + headScore[binDistance[endHead][split]][endHead][endTag][argHead][argTag] + headStop[argHead][argTag][start_1] + headStop[argHead][argTag][split]); float scoreRight = (subScore + iScoreH[argHead][argTag][start_1] + iScoreH[argHead][argTag][split]); float scoreMid = (subScore + iScoreH[argHead][argTag][start_1] + iScoreH[endHead][endTag][split]); float scoreLeft = (subScore + iScoreH[argHead][argTag][split] + iScoreH[endHead][endTag][split]); if (scoreRight > oScoreH[endHead][endTag][split]) { oScoreH[endHead][endTag][split] = scoreRight; } if (scoreMid > oScoreH[argHead][argTag][split]) { oScoreH[argHead][argTag][split] = scoreMid; } if (scoreLeft > oScoreH[argHead][argTag][start_1]) { oScoreH[argHead][argTag][start_1] = scoreLeft; } } } } } // right half int startHead = start_1; for (int startTag = 0; startTag < numTags; startTag++) { if (!hasTag[startHead][startTag]) { continue; } for (int argHead = startHead + 1; argHead < end; argHead++) { for (int argTag = 0; argTag < numTags; argTag++) { if (!hasTag[argHead][argTag]) { continue; } for (int split = startHead + 1; split <= argHead; split++) { float subScore = (oScoreH[startHead][startTag][end] + headScore[binDistance[startHead][split]][startHead][startTag][argHead][argTag] + headStop[argHead][argTag][split] + headStop[argHead][argTag][end]); float scoreLeft = (subScore + iScoreH[argHead][argTag][split] + iScoreH[argHead][argTag][end]); float scoreMid = (subScore + iScoreH[startHead][startTag][split] + iScoreH[argHead][argTag][end]); float scoreRight = (subScore + iScoreH[startHead][startTag][split] + iScoreH[argHead][argTag][split]); if (scoreLeft > oScoreH[startHead][startTag][split]) { oScoreH[startHead][startTag][split] = scoreLeft; } if (scoreMid > oScoreH[argHead][argTag][split]) { oScoreH[argHead][argTag][split] = scoreMid; } if (scoreRight > oScoreH[argHead][argTag][end]) { oScoreH[argHead][argTag][end] = scoreRight; } } } } } } } if (op.testOptions.verbose) { Timing.Tick("done."); log.Info("Starting half-filters..."); } for (int loc_1 = 0; loc_1 <= length; loc_1++) { for (int head_2 = 0; head_2 < length; head_2++) { Arrays.Fill(iPossibleByL[loc_1][head_2], false); Arrays.Fill(iPossibleByR[loc_1][head_2], false); Arrays.Fill(oPossibleByL[loc_1][head_2], false); Arrays.Fill(oPossibleByR[loc_1][head_2], false); } } if (Thread.Interrupted()) { throw new RuntimeInterruptedException(); } for (int head_3 = 0; head_3 < length; head_3++) { for (int tag = 0; tag < numTags; tag++) { if (!hasTag[head_3][tag]) { continue; } for (int start_1 = 0; start_1 <= head_3; start_1++) { for (int end = head_3 + 1; end <= length; end++) { if (iScoreH[head_3][tag][start_1] + iScoreH[head_3][tag][end] > float.NegativeInfinity && oScoreH[head_3][tag][start_1] + oScoreH[head_3][tag][end] > float.NegativeInfinity) { iPossibleByR[end][head_3][tag] = true; iPossibleByL[start_1][head_3][tag] = true; oPossibleByR[end][head_3][tag] = true; oPossibleByL[start_1][head_3][tag] = true; } } } } } if (op.testOptions.verbose) { Timing.Tick("done."); } return(HasParse()); }