// Public static utility methods public static bool IsAcronymImpl(string str, IList <string> tokens) { // Remove some words from the candidate acronym str = discardPattern.Matcher(str).ReplaceAll(string.Empty); // Remove stopwords if we need to if (str.Length != tokens.Count) { tokens = tokens.Stream().Filter(null).Collect(Collectors.ToList()); } // Run the matcher if (str.Length == tokens.Count) { for (int i = 0; i < str.Length; i++) { char ch = char.ToUpperCase(str[i]); if (!tokens[i].IsEmpty() && char.ToUpperCase(tokens[i][0]) != ch) { return(false); } } return(true); } else { return(false); } }
public static string PrintConllOutput(Document document, bool gold, bool filterSingletons) { IList <IList <Mention> > orderedMentions = gold ? document.goldMentions : document.predictedMentions; if (filterSingletons) { orderedMentions = orderedMentions.Stream().Map(null).Collect(Collectors.ToList()); } return(CorefPrinter.PrintConllOutput(document, orderedMentions, gold)); }
/// <summary>Convenience function to construct a subset to superset assignment map.</summary> /// <remarks> /// Convenience function to construct a subset to superset assignment map. Each subset assignment will be mapping /// to a large number of superset assignments. /// </remarks> /// <param name="superset">the superset factor to map to</param> /// <param name="subset">the subset factor to map from</param> /// <returns>a map from subset assignment to list of superset assignment</returns> private IDictionary<IList<int>, IList<int[]>> SubsetToSupersetAssignments(TableFactor superset, TableFactor subset) { IDictionary<IList<int>, IList<int[]>> subsetToSupersets = new Dictionary<IList<int>, IList<int[]>>(); foreach (int[] assignment in subset) { IList<int> subsetAssignmentList = Arrays.Stream(assignment).Boxed().Collect(Collectors.ToList()); IList<int[]> supersetAssignments = new List<int[]>(); foreach (int[] supersetAssignment in superset) { if (Arrays.Equals(assignment, SubsetAssignment(supersetAssignment, superset, subset))) { supersetAssignments.Add(supersetAssignment); } } subsetToSupersets[subsetAssignmentList] = supersetAssignments; } return subsetToSupersets; }
public virtual void TestAssignmentsIterator(NDArrayTest.NDArrayWithGold <double> testPair) { ICollection <IList <int> > assignmentSet = new HashSet <IList <int> >(); foreach (int[] assignment in testPair.gold.Keys) { assignmentSet.Add(Arrays.Stream(assignment).Boxed().Collect(Collectors.ToList())); } foreach (int[] assignment_1 in testPair.array) { IList <int> l = new List <int>(); foreach (int i in assignment_1) { l.Add(i); } NUnit.Framework.Assert.IsTrue(assignmentSet.Contains(l)); assignmentSet.Remove(l); } NUnit.Framework.Assert.IsTrue(assignmentSet.IsEmpty()); }
public virtual IList <IList <ICoreMap> > ClusterEntityMentions(IList <ICoreMap> entityMentions) { IList <CoreEntityMention> wrappedEntityMentions = WrapEntityMentions(entityMentions); List <List <CoreEntityMention> > entityMentionClusters = new List <List <CoreEntityMention> >(); foreach (CoreEntityMention newEM in wrappedEntityMentions) { bool clusterMatch = false; foreach (List <CoreEntityMention> emCluster in entityMentionClusters) { foreach (CoreEntityMention clusterEM in emCluster) { if (SameEntityWithoutLinking(newEM, clusterEM)) { emCluster.Add(newEM); clusterMatch = true; break; } } if (clusterMatch) { break; } } if (!clusterMatch) { List <CoreEntityMention> newCluster = new List <CoreEntityMention>(); newCluster.Add(newEM); entityMentionClusters.Add(newCluster); } } IList <IList <ICoreMap> > coreMapEntityMentionClusters = new List <IList <ICoreMap> >(); foreach (List <CoreEntityMention> emCluster_1 in entityMentionClusters) { IList <ICoreMap> coreMapCluster = emCluster_1.Stream().Map(null).Collect(Collectors.ToList()); coreMapEntityMentionClusters.Add(coreMapCluster); } return(coreMapEntityMentionClusters); }
/// <summary>list of ner tags</summary> public virtual IList <string> NerTags() { return(Tokens().Stream().Map(null).Collect(Collectors.ToList())); }
protected override void PaintComponent(Graphics g) { base.PaintComponent(g); // Dimensions Graphics2D g2d = (Graphics2D)g.Create(); g.SetFont(new Font("Arial", Font.Plain, 10)); int width = this.GetWidth(); int height = this.GetHeight(); int cellWidth = width / this.columnCount; int cellHeight = height / this.rowCount; int xOffset = (width - (this.columnCount * cellWidth)) / 2; int yOffset = (height - (this.rowCount * cellHeight)) / 2; // Get label index IList <U> labels = this._enclosing._enclosing.UniqueLabels().Stream().Collect(Collectors.ToList()); // Get color gradient int maxDiag = 0; int maxOffdiag = 0; foreach (KeyValuePair <Pair <U, U>, int> entry in this._enclosing._enclosing.confTable) { if (entry.Key.first == entry.Key.second) { maxDiag = Math.Max(maxDiag, entry.Value); } else { maxOffdiag = Math.Max(maxOffdiag, entry.Value); } } // Render the grid float[] hsb = new float[3]; for (int row = 0; row < this.rowCount; row++) { for (int col = 0; col < this.columnCount; col++) { // Position int x = xOffset + (col * cellWidth); int y = yOffset + (row * cellHeight); float xCenter = xOffset + (col * cellWidth) + cellWidth / 3.0f; float yCenter = yOffset + (row * cellHeight) + cellHeight / 2.0f; // Get text + Color string text; Color bg = Color.White; if (row == 0 && col == 0) { text = "V guess | gold >"; } else { if (row == 0) { text = labels[col - 1].ToString(); } else { if (col == 0) { text = labels[row - 1].ToString(); } else { // Set value int count = this._enclosing._enclosing.confTable[Pair.MakePair(labels[row - 1], labels[col - 1])]; if (count == null) { count = 0; } text = string.Empty + count; // Get color if (row == col) { double percentGood = ((double)count) / ((double)maxDiag); hsb = Color.RGBtoHSB((int)(255 - (255.0 * percentGood)), (int)(255 - (255.0 * percentGood / 2.0)), (int)(255 - (255.0 * percentGood)), hsb); bg = Color.GetHSBColor(hsb[0], hsb[1], hsb[2]); } else { double percentBad = ((double)count) / ((double)maxOffdiag); hsb = Color.RGBtoHSB((int)(255 - (255.0 * percentBad / 2.0)), (int)(255 - (255.0 * percentBad)), (int)(255 - (255.0 * percentBad)), hsb); bg = Color.GetHSBColor(hsb[0], hsb[1], hsb[2]); } } } } // Draw Rectangle cell = new Rectangle(x, y, cellWidth, cellHeight); g2d.SetColor(bg); g2d.Fill(cell); g2d.SetColor(Color.Black); g2d.DrawString(text, xCenter, yCenter); this.cells.Add(cell); } } // Mouse over if (this.selectedCell != null && this.selectedCell.x > 0 && this.selectedCell.y > 0) { int index = this.selectedCell.x + (this.selectedCell.y * this.columnCount); Rectangle cell = this.cells[index]; this.OnMouseOver(g2d, cell, labels[this.selectedCell.y - 1], labels[this.selectedCell.x - 1]); } // Clean up g2d.Dispose(); }
private IList <CoreLabel> MockLabels(string input) { return(Arrays.Stream(input.Split(" ")).Map(null).Collect(Collectors.ToList())); }
/// <summary>Returns a list of OpenIE relations from the given set of sentence fragments.</summary> /// <param name="fragments">The sentence fragments to extract relations from.</param> /// <param name="sentence">The containing sentence that these fragments were extracted from.</param> /// <returns>A list of OpenIE triples, corresponding to all the triples that could be extracted from the given fragments.</returns> private IList <RelationTriple> RelationsInFragments(ICollection <SentenceFragment> fragments, ICoreMap sentence) { return(fragments.Stream().Map(null).Filter(null).Map(null).Collect(Collectors.ToList())); }
/// <summary>Returns the possible relation triple in this set of sentence fragments.</summary> /// <seealso cref="RelationsInFragments(System.Collections.Generic.ICollection{E}, Edu.Stanford.Nlp.Util.ICoreMap)"/> public virtual IList <RelationTriple> RelationsInFragments(ICollection <SentenceFragment> fragments) { return(fragments.Stream().Map(null).Filter(null).Map(null).Collect(Collectors.ToList())); }
/// <summary>Returns all of the entailed shortened clauses (as per natural logic) from the given clause.</summary> /// <remarks> /// Returns all of the entailed shortened clauses (as per natural logic) from the given clause. /// This runs the forward entailment component of the OpenIE system only. /// It is usually chained together with the clause splitting component: /// <see cref="ClausesInSentence(Edu.Stanford.Nlp.Util.ICoreMap)"/> /// . /// </remarks> /// <param name="clause">The premise clause, as a sentence fragment in itself.</param> /// <returns>A list of entailed clauses.</returns> public virtual IList <SentenceFragment> EntailmentsFromClause(SentenceFragment clause) { if (clause.parseTree.IsEmpty()) { return(Java.Util.Collections.EmptyList()); } else { // Get the forward entailments IList <SentenceFragment> list = new List <SentenceFragment>(); if (entailmentsPerSentence > 0) { Sharpen.Collections.AddAll(list, forwardEntailer.Apply(clause.parseTree, true).Search().Stream().Map(null).Collect(Collectors.ToList())); } list.Add(clause); // A special case for adjective entailments IList <SentenceFragment> adjFragments = new List <SentenceFragment>(); SemgrexMatcher matcher = adjectivePattern.Matcher(clause.parseTree); while (matcher.Find()) { // (get nodes) IndexedWord subj = matcher.GetNode("subj"); IndexedWord be = matcher.GetNode("be"); IndexedWord adj = matcher.GetNode("adj"); IndexedWord obj = matcher.GetNode("obj"); IndexedWord pobj = matcher.GetNode("pobj"); string prep = matcher.GetRelnString("prep"); // (if the adjective, or any earlier adjective, is privative, then all bets are off) foreach (SemanticGraphEdge edge in clause.parseTree.OutgoingEdgeIterable(obj)) { if ("amod".Equals(edge.GetRelation().ToString()) && edge.GetDependent().Index() <= adj.Index() && Edu.Stanford.Nlp.Naturalli.Util.PrivativeAdjectives.Contains(edge.GetDependent().Word().ToLower())) { goto OUTER_continue; } } // (create the core tree) SemanticGraph tree = new SemanticGraph(); tree.AddRoot(adj); tree.AddVertex(subj); tree.AddVertex(be); tree.AddEdge(adj, be, GrammaticalRelation.ValueOf(Language.English, "cop"), double.NegativeInfinity, false); tree.AddEdge(adj, subj, GrammaticalRelation.ValueOf(Language.English, "nsubj"), double.NegativeInfinity, false); // (add pp attachment, if it existed) if (pobj != null) { System.Diagnostics.Debug.Assert(prep != null); tree.AddEdge(adj, pobj, GrammaticalRelation.ValueOf(Language.English, prep), double.NegativeInfinity, false); } // (check for monotonicity) if (adj.Get(typeof(NaturalLogicAnnotations.PolarityAnnotation)).IsUpwards() && be.Get(typeof(NaturalLogicAnnotations.PolarityAnnotation)).IsUpwards()) { // (add tree) adjFragments.Add(new SentenceFragment(tree, clause.assumedTruth, false)); } OUTER_continue :; } OUTER_break :; Sharpen.Collections.AddAll(list, adjFragments); return(list); } }
/// <summary>Get the keyphrases of the sentence as a list of Strings.</summary> /// <param name="toString">The function to use to convert a span to a string. The canonical case is Sentence::words</param> /// <returns>A list of keyphrases, as Strings.</returns> /// <seealso cref="KeyphraseSpans()"/> public virtual IList <string> Keyphrases(IFunction <Sentence, IList <string> > toString) { return(KeyphraseSpans().Stream().Map(null).Collect(Collectors.ToList())); }
/// <summary>A helper function for dumping the accuracy of the trained classifier.</summary> /// <param name="classifier">The classifier to evaluate.</param> /// <param name="dataset">The dataset to evaluate the classifier on.</param> public static void DumpAccuracy(IClassifier <ClauseSplitter.ClauseClassifierLabel, string> classifier, GeneralDataset <ClauseSplitter.ClauseClassifierLabel, string> dataset) { DecimalFormat df = new DecimalFormat("0.00%"); Redwood.Log("size: " + dataset.Size()); Redwood.Log("split count: " + StreamSupport.Stream(dataset.Spliterator(), false).Filter(null).Collect(Collectors.ToList()).Count); Redwood.Log("interm count: " + StreamSupport.Stream(dataset.Spliterator(), false).Filter(null).Collect(Collectors.ToList()).Count); Pair <double, double> pr = classifier.EvaluatePrecisionAndRecall(dataset, ClauseSplitter.ClauseClassifierLabel.ClauseSplit); Redwood.Log("p (split): " + df.Format(pr.first)); Redwood.Log("r (split): " + df.Format(pr.second)); Redwood.Log("f1 (split): " + df.Format(2 * pr.first * pr.second / (pr.first + pr.second))); pr = classifier.EvaluatePrecisionAndRecall(dataset, ClauseSplitter.ClauseClassifierLabel.ClauseInterm); Redwood.Log("p (interm): " + df.Format(pr.first)); Redwood.Log("r (interm): " + df.Format(pr.second)); Redwood.Log("f1 (interm): " + df.Format(2 * pr.first * pr.second / (pr.first + pr.second))); }
/// <exception cref="System.Exception"/> public static IList <ClustererDataLoader.ClustererDoc> LoadDocuments(int maxDocs) { IDictionary <int, IDictionary <Pair <int, int>, bool> > labeledPairs = IOUtils.ReadObjectFromFile(StatisticalCorefTrainer.datasetFile); IDictionary <int, IDictionary <int, string> > mentionTypes = IOUtils.ReadObjectFromFile(StatisticalCorefTrainer.mentionTypesFile); IDictionary <int, IList <IList <int> > > goldClusters = IOUtils.ReadObjectFromFile(StatisticalCorefTrainer.goldClustersFile); IDictionary <int, ICounter <Pair <int, int> > > classificationScores = IOUtils.ReadObjectFromFile(StatisticalCorefTrainer.pairwiseModelsPath + StatisticalCorefTrainer.ClassificationModel + "/" + StatisticalCorefTrainer.predictionsName + ".ser"); IDictionary <int, ICounter <Pair <int, int> > > rankingScores = IOUtils.ReadObjectFromFile(StatisticalCorefTrainer.pairwiseModelsPath + StatisticalCorefTrainer.RankingModel + "/" + StatisticalCorefTrainer.predictionsName + ".ser"); IDictionary <int, ICounter <Pair <int, int> > > anaphoricityScoresLoaded = IOUtils.ReadObjectFromFile(StatisticalCorefTrainer.pairwiseModelsPath + StatisticalCorefTrainer.AnaphoricityModel + "/" + StatisticalCorefTrainer.predictionsName + ".ser"); IDictionary <int, ICounter <int> > anaphoricityScores = new Dictionary <int, ICounter <int> >(); foreach (KeyValuePair <int, ICounter <Pair <int, int> > > e in anaphoricityScoresLoaded) { ICounter <int> scores = new ClassicCounter <int>(); e.Value.EntrySet().ForEach(null); anaphoricityScores[e.Key] = scores; } return(labeledPairs.Keys.Stream().Sorted().Limit(maxDocs).Map(null).Collect(Collectors.ToList())); }
public virtual IList <CoreEntityMention> WrapEntityMentions(IList <ICoreMap> entityMentions) { return(entityMentions.Stream().Map(null).Collect(Collectors.ToList())); }
// { Cats eat _some_ mice, // Cats eat _most_ mice } /// <summary> /// A helper method for /// <see cref="GetModifierSubtreeSpan(Edu.Stanford.Nlp.Semgraph.SemanticGraph, Edu.Stanford.Nlp.Ling.IndexedWord)"/> /// and /// <see cref="GetSubtreeSpan(Edu.Stanford.Nlp.Semgraph.SemanticGraph, Edu.Stanford.Nlp.Ling.IndexedWord)"/> /// . /// </summary> private static Pair <int, int> GetGeneralizedSubtreeSpan(SemanticGraph tree, IndexedWord root, ICollection <string> validArcs) { int min = root.Index(); int max = root.Index(); IQueue <IndexedWord> fringe = new LinkedList <IndexedWord>(); foreach (SemanticGraphEdge edge in tree.OutgoingEdgeIterable(root)) { string edgeLabel = edge.GetRelation().GetShortName(); if ((validArcs == null || validArcs.Contains(edgeLabel)) && !"punct".Equals(edgeLabel)) { fringe.Add(edge.GetDependent()); } } while (!fringe.IsEmpty()) { IndexedWord node = fringe.Poll(); min = Math.Min(node.Index(), min); max = Math.Max(node.Index(), max); // ignore punctuation Sharpen.Collections.AddAll(fringe, tree.GetOutEdgesSorted(node).Stream().Filter(null).Map(null).Collect(Collectors.ToList())); } return(Pair.MakePair(min, max + 1)); }
// run a particular CRF of this ClassifierCombiner on a testFile // user can say -crfToExamine 0 to get 1st element or -crfToExamine /edu/stanford/models/muc7.crf.ser.gz // this does not currently support drill down on CMM's /// <exception cref="System.Exception"/> public static void ExamineCRF(Edu.Stanford.Nlp.IE.ClassifierCombiner cc, string crfNameOrIndex, SeqClassifierFlags flags, string testFile, string testFiles, IDocumentReaderAndWriter <CoreLabel> readerAndWriter) { CRFClassifier <CoreLabel> crf; // potential index into baseClassifiers int ci; // set ci with the following rules // 1. first see if ci is an index into baseClassifiers // 2. if its not an integer or wrong size, see if its a file name of a loadPath try { ci = System.Convert.ToInt32(crfNameOrIndex); if (ci < 0 || ci >= cc.baseClassifiers.Count) { // ci is not an int corresponding to an element in baseClassifiers, see if name of a crf loadPath ci = cc.initLoadPaths.IndexOf(crfNameOrIndex); } } catch (NumberFormatException) { // cannot interpret crfNameOrIndex as an integer, see if name of a crf loadPath ci = cc.initLoadPaths.IndexOf(crfNameOrIndex); } // if ci corresponds to an index in baseClassifiers, get the crf at that index, otherwise set crf to null if (ci >= 0 && ci < cc.baseClassifiers.Count) { // TODO: this will break if baseClassifiers contains something that is not a CRF crf = (CRFClassifier <CoreLabel>)cc.baseClassifiers[ci]; } else { crf = null; } // if you can get a specific crf, generate the appropriate report, if null do nothing if (crf != null) { // if there is a crf and testFile was set , do the crf stuff for a single testFile if (testFile != null) { if (flags.searchGraphPrefix != null) { crf.ClassifyAndWriteViterbiSearchGraph(testFile, flags.searchGraphPrefix, crf.MakeReaderAndWriter()); } else { if (flags.printFirstOrderProbs) { crf.PrintFirstOrderProbs(testFile, readerAndWriter); } else { if (flags.printFactorTable) { crf.PrintFactorTable(testFile, readerAndWriter); } else { if (flags.printProbs) { crf.PrintProbs(testFile, readerAndWriter); } else { if (flags.useKBest) { // TO DO: handle if user doesn't provide kBest int k = flags.kBest; crf.ClassifyAndWriteAnswersKBest(testFile, k, readerAndWriter); } else { if (flags.printLabelValue) { crf.PrintLabelInformation(testFile, readerAndWriter); } else { // no crf test flag provided log.Info("Warning: no crf test flag was provided, running classify and write answers"); crf.ClassifyAndWriteAnswers(testFile, readerAndWriter, true); } } } } } } } else { if (testFiles != null) { // if there is a crf and testFiles was set , do the crf stuff for testFiles // if testFile was set as well, testFile overrides IList <File> files = Arrays.Stream(testFiles.Split(",")).Map(null).Collect(Collectors.ToList()); if (flags.printProbs) { // there is a crf and printProbs crf.PrintProbs(files, crf.DefaultReaderAndWriter()); } else { log.Info("Warning: no crf test flag was provided, running classify files and write answers"); crf.ClassifyFilesAndWriteAnswers(files, crf.DefaultReaderAndWriter(), true); } } } } }
/// <summary>The main method.</summary> /// <exception cref="System.Exception"/> public static void Main(string[] args) { StringUtils.LogInvocationString(log, args); Properties props = StringUtils.ArgsToProperties(args); SeqClassifierFlags flags = new SeqClassifierFlags(props, false); // false for print probs as printed in next code block string loadPath = props.GetProperty("loadClassifier"); NERClassifierCombiner ncc; if (loadPath != null) { // note that when loading a serialized classifier, the philosophy is override // any settings in props with those given in the commandline // so if you dumped it with useSUTime = false, and you say -useSUTime at // the commandline, the commandline takes precedence ncc = ((NERClassifierCombiner)GetClassifier(loadPath, props)); } else { // pass null for passDownProperties to let all props go through ncc = CreateNERClassifierCombiner("ner", null, props); } // write the NERClassifierCombiner to the given path on disk string serializeTo = props.GetProperty("serializeTo"); if (serializeTo != null) { ncc.SerializeClassifier(serializeTo); } string textFile = props.GetProperty("textFile"); if (textFile != null) { ncc.ClassifyAndWriteAnswers(textFile); } // run on multiple textFiles , based off CRFClassifier code string textFiles = props.GetProperty("textFiles"); if (textFiles != null) { IList <File> files = new List <File>(); foreach (string filename in textFiles.Split(",")) { files.Add(new File(filename)); } ncc.ClassifyFilesAndWriteAnswers(files); } // options for run the NERClassifierCombiner on a testFile or testFiles string testFile = props.GetProperty("testFile"); string testFiles = props.GetProperty("testFiles"); string crfToExamine = props.GetProperty("crfToExamine"); IDocumentReaderAndWriter <CoreLabel> readerAndWriter = ncc.DefaultReaderAndWriter(); if (testFile != null || testFiles != null) { // check if there is not a crf specific request if (crfToExamine == null) { // in this case there is no crfToExamine if (testFile != null) { ncc.ClassifyAndWriteAnswers(testFile, readerAndWriter, true); } else { IList <File> files = Arrays.Stream(testFiles.Split(",")).Map(null).Collect(Collectors.ToList()); ncc.ClassifyFilesAndWriteAnswers(files, ncc.DefaultReaderAndWriter(), true); } } else { ClassifierCombiner.ExamineCRF(ncc, crfToExamine, flags, testFile, testFiles, readerAndWriter); } } // option for showing info about the NERClassifierCombiner string showNCCInfo = props.GetProperty("showNCCInfo"); if (showNCCInfo != null) { ShowNCCInfo(ncc); } // option for reading in from stdin if (flags.readStdin) { ncc.ClassifyStdin(); } }
/// <summary>Add NER tags to a tree.</summary> private static void AddNERTags(Tree tree) { // set up tagger if necessary if (NerTagger == null || NerClassifyMethod == null) { SetupNERTagger(); } if (NerTagger != null && NerClassifyMethod != null) { // we have everything successfully setup and so can act. try { // classify IList <CoreLabel> labels = tree.Yield().Stream().Map(null).Collect(Collectors.ToList()); NerClassifyMethod.Invoke(NerTagger, labels); } catch (Exception) { log.Warn("Error running " + NerCombinerName + " on Tree! Not applying NER tags!"); } } }
private ICollection <ExtractedPhrase> GetMatchedTokensIndex(SemanticGraph graph, SemgrexPattern pattern, DataInstance sent, string label) { //TODO: look at the ignoreCommonTags flag ExtractPhraseFromPattern extract = new ExtractPhraseFromPattern(false, PatternFactory.numWordsCompoundMapped[label]); ICollection <IntPair> outputIndices = new List <IntPair>(); bool findSubTrees = true; IList <CoreLabel> tokensC = sent.GetTokens(); //TODO: see if you can get rid of this (only used for matchedGraphs) IList <string> tokens = tokensC.Stream().Map(null).Collect(Collectors.ToList()); IList <string> outputPhrases = new List <string>(); IList <ExtractedPhrase> extractedPhrases = new List <ExtractedPhrase>(); Func <Pair <IndexedWord, SemanticGraph>, ICounter <string> > extractFeatures = new _IFunction_206(); //TODO: make features; extract.GetSemGrexPatternNodes(graph, tokens, outputPhrases, outputIndices, pattern, findSubTrees, extractedPhrases, constVars.matchLowerCaseContext, matchingWordRestriction); /* * //TODO: probably a bad idea to add ALL ngrams * Collection<ExtractedPhrase> outputIndicesMaxPhraseLen = new ArrayList<ExtractedPhrase>(); * for(IntPair o: outputIndices){ * int min = o.get(0); * int max = o.get(1); * * for (int i = min; i <= max ; i++) { * * CoreLabel t = tokensC.get(i); * String phrase = t.word(); * if(!matchedRestriction(t, label)) * continue; * for (int ngramSize = 1; ngramSize < PatternFactory.numWordsCompound; ++ngramSize) { * int j = i + ngramSize - 1; * if(j > max) * break; * * CoreLabel tokenj = tokensC.get(j); * * if(ngramSize > 1) * phrase += " " + tokenj.word(); * * if (matchedRestriction(tokenj, label)) { * outputIndicesMaxPhraseLen.add(new ExtractedPhrase(i, j, phrase)); * //outputIndicesMaxPhraseLen.add(new IntPair(i, j)); * } * } * } * }*/ //System.out.println("extracted phrases are " + extractedPhrases + " and output indices are " + outputIndices); return(extractedPhrases); }
/// <summary>Run a search from this entailer.</summary> /// <remarks> /// Run a search from this entailer. This will return a list of sentence fragments /// that are entailed by the original sentence / fragment. /// </remarks> /// <returns>A list of entailed fragments.</returns> public virtual IList <SentenceFragment> Search() { return(SearchImplementation().Stream().Map(null).Filter(null).Collect(Collectors.ToList())); }
/// <summary>create list of CoreSentence's based on the Annotation's sentences</summary> private void WrapSentences() { sentences = this.annotationDocument.Get(typeof(CoreAnnotations.SentencesAnnotation)).Stream().Map(null).Collect(Collectors.ToList()); sentences.ForEach(null); }
public static IList <string> GetMainStrs(IList <string> tokens) { IList <string> mainTokenStrs = new List <string>(tokens.Count); Sharpen.Collections.AddAll(mainTokenStrs, tokens.Stream().Filter(null).Collect(Collectors.ToList())); return(mainTokenStrs); }
/// <summary>build a list of all entity mentions in the document from the sentences</summary> private void BuildDocumentEntityMentionsList() { entityMentions = sentences.Stream().FlatMap(null).Collect(Collectors.ToList()); }
/// <summary>create list of CoreEntityMention's based on the CoreMap's entity mentions</summary> public virtual void WrapEntityMentions() { if (this.sentenceCoreMap.Get(typeof(CoreAnnotations.MentionsAnnotation)) != null) { entityMentions = this.sentenceCoreMap.Get(typeof(CoreAnnotations.MentionsAnnotation)).Stream().Map(null).Collect(Collectors.ToList()); } }
private void BuildDocumentQuotesList() { this.quotes = QuoteAnnotator.GatherQuotes(this.annotationDocument).Stream().Map(null).Collect(Collectors.ToList()); }
private void RouteObject(int indent, object value) { if (value is string) { // Case: simple string (this is easy!) writer.Write("\""); writer.Write(StringUtils.EscapeJsonString(value.ToString())); writer.Write("\""); } else { if (value is ICollection) { // Case: collection writer.Write("["); Newline(); IEnumerator <object> elems = ((ICollection <object>)value).GetEnumerator(); while (elems.MoveNext()) { Indent(indent + 1); RouteObject(indent + 1, elems.Current); if (elems.MoveNext()) { writer.Write(","); } Newline(); } Indent(indent); writer.Write("]"); } else { if (value is Enum) { // Case: enumeration constant writer.Write("\""); writer.Write(StringUtils.EscapeJsonString(((Enum)value).Name())); writer.Write("\""); } else { if (value is Pair) { RouteObject(indent, Arrays.AsList(((Pair)value).first, ((Pair)value).second)); } else { if (value is Span) { writer.Write("["); writer.Write(int.ToString(((Span)value).Start())); writer.Write(","); Space(); writer.Write(int.ToString(((Span)value).End())); writer.Write("]"); } else { if (value is IConsumer) { Object(indent, (IConsumer <JSONOutputter.IWriter>)value); } else { if (value is IStream) { RouteObject(indent, ((IStream)value).Collect(Collectors.ToList())); } else { if (value.GetType().IsArray) { // Arrays make life miserable in Java Type componentType = value.GetType().GetElementType(); if (componentType.IsPrimitive) { if (typeof(int).IsAssignableFrom(componentType)) { List <int> lst = new List <int>(); //noinspection Convert2streamapi foreach (int elem in ((int[])value)) { lst.Add(elem); } RouteObject(indent, lst); } else { if (typeof(short).IsAssignableFrom(componentType)) { List <short> lst = new List <short>(); foreach (short elem in ((short[])value)) { lst.Add(elem); } RouteObject(indent, lst); } else { if (typeof(byte).IsAssignableFrom(componentType)) { List <byte> lst = new List <byte>(); foreach (byte elem in ((byte[])value)) { lst.Add(elem); } RouteObject(indent, lst); } else { if (typeof(long).IsAssignableFrom(componentType)) { List <long> lst = new List <long>(); //noinspection Convert2streamapi foreach (long elem in ((long[])value)) { lst.Add(elem); } RouteObject(indent, lst); } else { if (typeof(char).IsAssignableFrom(componentType)) { List <char> lst = new List <char>(); foreach (char elem in ((char[])value)) { lst.Add(elem); } RouteObject(indent, lst); } else { if (typeof(float).IsAssignableFrom(componentType)) { List <float> lst = new List <float>(); foreach (float elem in ((float[])value)) { lst.Add(elem); } RouteObject(indent, lst); } else { if (typeof(double).IsAssignableFrom(componentType)) { List <double> lst = new List <double>(); //noinspection Convert2streamapi foreach (double elem in ((double[])value)) { lst.Add(elem); } RouteObject(indent, lst); } else { if (typeof(bool).IsAssignableFrom(componentType)) { List <bool> lst = new List <bool>(); foreach (bool elem in ((bool[])value)) { lst.Add(elem); } RouteObject(indent, lst); } else { throw new InvalidOperationException("Unhandled primitive type in array: " + componentType); } } } } } } } } } else { RouteObject(indent, Arrays.AsList((object[])value)); } } else { if (value is int) { writer.Write(int.ToString((int)value)); } else { if (value is short) { writer.Write(short.ToString((short)value)); } else { if (value is byte) { writer.Write(byte.ToString((byte)value)); } else { if (value is long) { writer.Write(System.Convert.ToString((long)value)); } else { if (value is char) { writer.Write(char.ToString((char)(char)value)); } else { if (value is float) { writer.Write(new DecimalFormat("0.#######").Format(value)); } else { if (value is double) { writer.Write(new DecimalFormat("0.##############").Format(value)); } else { if (value is bool) { writer.Write(bool.ToString((bool)value)); } else { if (typeof(int).IsAssignableFrom(value.GetType())) { RouteObject(indent, int.Parse((int)value)); } else { if (typeof(short).IsAssignableFrom(value.GetType())) { RouteObject(indent, short.ValueOf((short)value)); } else { if (typeof(byte).IsAssignableFrom(value.GetType())) { RouteObject(indent, byte.ValueOf(unchecked ((byte)value))); } else { if (typeof(long).IsAssignableFrom(value.GetType())) { RouteObject(indent, long.ValueOf((long)value)); } else { if (typeof(char).IsAssignableFrom(value.GetType())) { RouteObject(indent, char.ValueOf((char)value)); } else { if (typeof(float).IsAssignableFrom(value.GetType())) { RouteObject(indent, float.ValueOf((float)value)); } else { if (typeof(double).IsAssignableFrom(value.GetType())) { RouteObject(indent, double.ValueOf((double)value)); } else { if (typeof(bool).IsAssignableFrom(value.GetType())) { RouteObject(indent, bool.ValueOf((bool)value)); } else { throw new Exception("Unknown object to serialize: " + value); } } } } } } } } } } } } } } } } } } } } } } } } }
/// <summary>Process all the trees in the given directory.</summary> /// <remarks>Process all the trees in the given directory. For example, the WSJ section of the Penn Treebank.</remarks> /// <param name="name">The name of the directory we are processing.</param> /// <param name="directory">The directory we are processing.</param> /// <returns> /// A dataset of subject/object pairs in the trees in the directory. /// This is a list of sentences, such that each sentence has a collection of pairs of spans. /// Each pair of spans is a subject/object span pair that constitutes a valid extraction. /// </returns> /// <exception cref="System.IO.IOException"/> private static IList <Pair <ICoreMap, ICollection <Pair <Span, Span> > > > ProcessDirectory(string name, File directory) { Redwood.Util.ForceTrack("Processing " + name); // Prepare the files to iterate over IEnumerable <File> files = IOUtils.IterFilesRecursive(directory, "mrg"); int numTreesProcessed = 0; IList <Pair <ICoreMap, ICollection <Pair <Span, Span> > > > trainingData = new List <Pair <ICoreMap, ICollection <Pair <Span, Span> > > >(1024); // Iterate over the files foreach (File file in files) { // log(file); ITreeReader reader = new PennTreeReader(IOUtils.ReaderFromFile(file)); Tree tree; while ((tree = reader.ReadTree()) != null) { try { // Prepare the tree tree.IndexSpans(); tree.SetSpans(); // Get relevant information from sentence IList <CoreLabel> tokens = tree.GetLeaves().Stream().Map(null).Collect(Collectors.ToList()); // .filter(leaf -> !TRACE_SOURCE_PATTERN.matcher(leaf.word()).matches() && !leaf.tag().equals("-NONE-")) SemanticGraph graph = Parse(tree); IDictionary <int, Span> targets = FindTraceTargets(tree); IDictionary <int, int> sources = FindTraceSources(tree); // Create a sentence object ICoreMap sentence = new _ArrayCoreMap_325(tokens, graph, 4); natlog.DoOneSentence(null, sentence); // Generate training data ICollection <Pair <Span, Span> > trainingDataFromSentence = SubjectObjectPairs(graph, tokens, targets, sources); trainingData.Add(Pair.MakePair(sentence, trainingDataFromSentence)); // Debug print numTreesProcessed += 1; if (numTreesProcessed % 100 == 0) { Redwood.Util.Log("[" + new DecimalFormat("00000").Format(numTreesProcessed) + "] " + CountDatums(trainingData) + " known extractions"); } } catch (Exception t) { Sharpen.Runtime.PrintStackTrace(t); } } } // End Redwood.Util.Log(string.Empty + numTreesProcessed + " trees processed yielding " + CountDatums(trainingData) + " known extractions"); Redwood.Util.EndTrack("Processing " + name); return(trainingData); }
/// <summary>Applies {#processPattern} to a collection of trees.</summary> /// <param name="matchPattern"> /// A /// <see cref="Edu.Stanford.Nlp.Trees.Tregex.TregexPattern"/> /// to be matched against a /// <see cref="Edu.Stanford.Nlp.Trees.Tree"/> /// . /// </param> /// <param name="p"> /// A /// <see cref="TsurgeonPattern"/> /// to apply. /// </param> /// <param name="inputTrees">The input trees to be processed</param> /// <returns>A List of the transformed trees</returns> public static IList <Tree> ProcessPatternOnTrees(TregexPattern matchPattern, TsurgeonPattern p, ICollection <Tree> inputTrees) { IList <Tree> result = inputTrees.Stream().Map(null).Collect(Collectors.ToList()); return(result); }
public virtual void Update(IList <IList <int> > gold, IList <Clusterer.Cluster> clusters, IDictionary <int, IList <int> > mentionToGold, IDictionary <int, Clusterer.Cluster> mentionToSystem) { IList <IList <int> > clustersAsList = clusters.Stream().Map(null).Collect(Collectors.ToList()); IDictionary <int, IList <int> > mentionToSystemLists = mentionToSystem.Stream().Collect(Collectors.ToMap(null, null)); Pair <double, double> prec = GetScore(clustersAsList, mentionToGold); Pair <double, double> rec = GetScore(gold, mentionToSystemLists); pNum += prec.first; pDen += prec.second; rNum += rec.first; rDen += rec.second; }