// Public static utility methods
 public static bool IsAcronymImpl(string str, IList <string> tokens)
 {
     // Remove some words from the candidate acronym
     str = discardPattern.Matcher(str).ReplaceAll(string.Empty);
     // Remove stopwords if we need to
     if (str.Length != tokens.Count)
     {
         tokens = tokens.Stream().Filter(null).Collect(Collectors.ToList());
     }
     // Run the matcher
     if (str.Length == tokens.Count)
     {
         for (int i = 0; i < str.Length; i++)
         {
             char ch = char.ToUpperCase(str[i]);
             if (!tokens[i].IsEmpty() && char.ToUpperCase(tokens[i][0]) != ch)
             {
                 return(false);
             }
         }
         return(true);
     }
     else
     {
         return(false);
     }
 }
示例#2
0
        public static string PrintConllOutput(Document document, bool gold, bool filterSingletons)
        {
            IList <IList <Mention> > orderedMentions = gold ? document.goldMentions : document.predictedMentions;

            if (filterSingletons)
            {
                orderedMentions = orderedMentions.Stream().Map(null).Collect(Collectors.ToList());
            }
            return(CorefPrinter.PrintConllOutput(document, orderedMentions, gold));
        }
示例#3
0
		/// <summary>Convenience function to construct a subset to superset assignment map.</summary>
		/// <remarks>
		/// Convenience function to construct a subset to superset assignment map. Each subset assignment will be mapping
		/// to a large number of superset assignments.
		/// </remarks>
		/// <param name="superset">the superset factor to map to</param>
		/// <param name="subset">the subset factor to map from</param>
		/// <returns>a map from subset assignment to list of superset assignment</returns>
		private IDictionary<IList<int>, IList<int[]>> SubsetToSupersetAssignments(TableFactor superset, TableFactor subset)
		{
			IDictionary<IList<int>, IList<int[]>> subsetToSupersets = new Dictionary<IList<int>, IList<int[]>>();
			foreach (int[] assignment in subset)
			{
				IList<int> subsetAssignmentList = Arrays.Stream(assignment).Boxed().Collect(Collectors.ToList());
				IList<int[]> supersetAssignments = new List<int[]>();
				foreach (int[] supersetAssignment in superset)
				{
					if (Arrays.Equals(assignment, SubsetAssignment(supersetAssignment, superset, subset)))
					{
						supersetAssignments.Add(supersetAssignment);
					}
				}
				subsetToSupersets[subsetAssignmentList] = supersetAssignments;
			}
			return subsetToSupersets;
		}
示例#4
0
        public virtual void TestAssignmentsIterator(NDArrayTest.NDArrayWithGold <double> testPair)
        {
            ICollection <IList <int> > assignmentSet = new HashSet <IList <int> >();

            foreach (int[] assignment in testPair.gold.Keys)
            {
                assignmentSet.Add(Arrays.Stream(assignment).Boxed().Collect(Collectors.ToList()));
            }
            foreach (int[] assignment_1 in testPair.array)
            {
                IList <int> l = new List <int>();
                foreach (int i in assignment_1)
                {
                    l.Add(i);
                }
                NUnit.Framework.Assert.IsTrue(assignmentSet.Contains(l));
                assignmentSet.Remove(l);
            }
            NUnit.Framework.Assert.IsTrue(assignmentSet.IsEmpty());
        }
        public virtual IList <IList <ICoreMap> > ClusterEntityMentions(IList <ICoreMap> entityMentions)
        {
            IList <CoreEntityMention>        wrappedEntityMentions = WrapEntityMentions(entityMentions);
            List <List <CoreEntityMention> > entityMentionClusters = new List <List <CoreEntityMention> >();

            foreach (CoreEntityMention newEM in wrappedEntityMentions)
            {
                bool clusterMatch = false;
                foreach (List <CoreEntityMention> emCluster in entityMentionClusters)
                {
                    foreach (CoreEntityMention clusterEM in emCluster)
                    {
                        if (SameEntityWithoutLinking(newEM, clusterEM))
                        {
                            emCluster.Add(newEM);
                            clusterMatch = true;
                            break;
                        }
                    }
                    if (clusterMatch)
                    {
                        break;
                    }
                }
                if (!clusterMatch)
                {
                    List <CoreEntityMention> newCluster = new List <CoreEntityMention>();
                    newCluster.Add(newEM);
                    entityMentionClusters.Add(newCluster);
                }
            }
            IList <IList <ICoreMap> > coreMapEntityMentionClusters = new List <IList <ICoreMap> >();

            foreach (List <CoreEntityMention> emCluster_1 in entityMentionClusters)
            {
                IList <ICoreMap> coreMapCluster = emCluster_1.Stream().Map(null).Collect(Collectors.ToList());
                coreMapEntityMentionClusters.Add(coreMapCluster);
            }
            return(coreMapEntityMentionClusters);
        }
示例#6
0
 /// <summary>list of ner tags</summary>
 public virtual IList <string> NerTags()
 {
     return(Tokens().Stream().Map(null).Collect(Collectors.ToList()));
 }
                protected override void PaintComponent(Graphics g)
                {
                    base.PaintComponent(g);
                    // Dimensions
                    Graphics2D g2d = (Graphics2D)g.Create();

                    g.SetFont(new Font("Arial", Font.Plain, 10));
                    int width      = this.GetWidth();
                    int height     = this.GetHeight();
                    int cellWidth  = width / this.columnCount;
                    int cellHeight = height / this.rowCount;
                    int xOffset    = (width - (this.columnCount * cellWidth)) / 2;
                    int yOffset    = (height - (this.rowCount * cellHeight)) / 2;
                    // Get label index
                    IList <U> labels = this._enclosing._enclosing.UniqueLabels().Stream().Collect(Collectors.ToList());
                    // Get color gradient
                    int maxDiag    = 0;
                    int maxOffdiag = 0;

                    foreach (KeyValuePair <Pair <U, U>, int> entry in this._enclosing._enclosing.confTable)
                    {
                        if (entry.Key.first == entry.Key.second)
                        {
                            maxDiag = Math.Max(maxDiag, entry.Value);
                        }
                        else
                        {
                            maxOffdiag = Math.Max(maxOffdiag, entry.Value);
                        }
                    }
                    // Render the grid
                    float[] hsb = new float[3];
                    for (int row = 0; row < this.rowCount; row++)
                    {
                        for (int col = 0; col < this.columnCount; col++)
                        {
                            // Position
                            int   x       = xOffset + (col * cellWidth);
                            int   y       = yOffset + (row * cellHeight);
                            float xCenter = xOffset + (col * cellWidth) + cellWidth / 3.0f;
                            float yCenter = yOffset + (row * cellHeight) + cellHeight / 2.0f;
                            // Get text + Color
                            string text;
                            Color  bg = Color.White;
                            if (row == 0 && col == 0)
                            {
                                text = "V guess | gold >";
                            }
                            else
                            {
                                if (row == 0)
                                {
                                    text = labels[col - 1].ToString();
                                }
                                else
                                {
                                    if (col == 0)
                                    {
                                        text = labels[row - 1].ToString();
                                    }
                                    else
                                    {
                                        // Set value
                                        int count = this._enclosing._enclosing.confTable[Pair.MakePair(labels[row - 1], labels[col - 1])];
                                        if (count == null)
                                        {
                                            count = 0;
                                        }
                                        text = string.Empty + count;
                                        // Get color
                                        if (row == col)
                                        {
                                            double percentGood = ((double)count) / ((double)maxDiag);
                                            hsb = Color.RGBtoHSB((int)(255 - (255.0 * percentGood)), (int)(255 - (255.0 * percentGood / 2.0)), (int)(255 - (255.0 * percentGood)), hsb);
                                            bg  = Color.GetHSBColor(hsb[0], hsb[1], hsb[2]);
                                        }
                                        else
                                        {
                                            double percentBad = ((double)count) / ((double)maxOffdiag);
                                            hsb = Color.RGBtoHSB((int)(255 - (255.0 * percentBad / 2.0)), (int)(255 - (255.0 * percentBad)), (int)(255 - (255.0 * percentBad)), hsb);
                                            bg  = Color.GetHSBColor(hsb[0], hsb[1], hsb[2]);
                                        }
                                    }
                                }
                            }
                            // Draw
                            Rectangle cell = new Rectangle(x, y, cellWidth, cellHeight);
                            g2d.SetColor(bg);
                            g2d.Fill(cell);
                            g2d.SetColor(Color.Black);
                            g2d.DrawString(text, xCenter, yCenter);
                            this.cells.Add(cell);
                        }
                    }
                    // Mouse over
                    if (this.selectedCell != null && this.selectedCell.x > 0 && this.selectedCell.y > 0)
                    {
                        int       index = this.selectedCell.x + (this.selectedCell.y * this.columnCount);
                        Rectangle cell  = this.cells[index];
                        this.OnMouseOver(g2d, cell, labels[this.selectedCell.y - 1], labels[this.selectedCell.x - 1]);
                    }
                    // Clean up
                    g2d.Dispose();
                }
 private IList <CoreLabel> MockLabels(string input)
 {
     return(Arrays.Stream(input.Split(" ")).Map(null).Collect(Collectors.ToList()));
 }
示例#9
0
 /// <summary>Returns a list of OpenIE relations from the given set of sentence fragments.</summary>
 /// <param name="fragments">The sentence fragments to extract relations from.</param>
 /// <param name="sentence">The containing sentence that these fragments were extracted from.</param>
 /// <returns>A list of OpenIE triples, corresponding to all the triples that could be extracted from the given fragments.</returns>
 private IList <RelationTriple> RelationsInFragments(ICollection <SentenceFragment> fragments, ICoreMap sentence)
 {
     return(fragments.Stream().Map(null).Filter(null).Map(null).Collect(Collectors.ToList()));
 }
示例#10
0
 /// <summary>Returns the possible relation triple in this set of sentence fragments.</summary>
 /// <seealso cref="RelationsInFragments(System.Collections.Generic.ICollection{E}, Edu.Stanford.Nlp.Util.ICoreMap)"/>
 public virtual IList <RelationTriple> RelationsInFragments(ICollection <SentenceFragment> fragments)
 {
     return(fragments.Stream().Map(null).Filter(null).Map(null).Collect(Collectors.ToList()));
 }
示例#11
0
 /// <summary>Returns all of the entailed shortened clauses (as per natural logic) from the given clause.</summary>
 /// <remarks>
 /// Returns all of the entailed shortened clauses (as per natural logic) from the given clause.
 /// This runs the forward entailment component of the OpenIE system only.
 /// It is usually chained together with the clause splitting component:
 /// <see cref="ClausesInSentence(Edu.Stanford.Nlp.Util.ICoreMap)"/>
 /// .
 /// </remarks>
 /// <param name="clause">The premise clause, as a sentence fragment in itself.</param>
 /// <returns>A list of entailed clauses.</returns>
 public virtual IList <SentenceFragment> EntailmentsFromClause(SentenceFragment clause)
 {
     if (clause.parseTree.IsEmpty())
     {
         return(Java.Util.Collections.EmptyList());
     }
     else
     {
         // Get the forward entailments
         IList <SentenceFragment> list = new List <SentenceFragment>();
         if (entailmentsPerSentence > 0)
         {
             Sharpen.Collections.AddAll(list, forwardEntailer.Apply(clause.parseTree, true).Search().Stream().Map(null).Collect(Collectors.ToList()));
         }
         list.Add(clause);
         // A special case for adjective entailments
         IList <SentenceFragment> adjFragments = new List <SentenceFragment>();
         SemgrexMatcher           matcher      = adjectivePattern.Matcher(clause.parseTree);
         while (matcher.Find())
         {
             // (get nodes)
             IndexedWord subj = matcher.GetNode("subj");
             IndexedWord be   = matcher.GetNode("be");
             IndexedWord adj  = matcher.GetNode("adj");
             IndexedWord obj  = matcher.GetNode("obj");
             IndexedWord pobj = matcher.GetNode("pobj");
             string      prep = matcher.GetRelnString("prep");
             // (if the adjective, or any earlier adjective, is privative, then all bets are off)
             foreach (SemanticGraphEdge edge in clause.parseTree.OutgoingEdgeIterable(obj))
             {
                 if ("amod".Equals(edge.GetRelation().ToString()) && edge.GetDependent().Index() <= adj.Index() && Edu.Stanford.Nlp.Naturalli.Util.PrivativeAdjectives.Contains(edge.GetDependent().Word().ToLower()))
                 {
                     goto OUTER_continue;
                 }
             }
             // (create the core tree)
             SemanticGraph tree = new SemanticGraph();
             tree.AddRoot(adj);
             tree.AddVertex(subj);
             tree.AddVertex(be);
             tree.AddEdge(adj, be, GrammaticalRelation.ValueOf(Language.English, "cop"), double.NegativeInfinity, false);
             tree.AddEdge(adj, subj, GrammaticalRelation.ValueOf(Language.English, "nsubj"), double.NegativeInfinity, false);
             // (add pp attachment, if it existed)
             if (pobj != null)
             {
                 System.Diagnostics.Debug.Assert(prep != null);
                 tree.AddEdge(adj, pobj, GrammaticalRelation.ValueOf(Language.English, prep), double.NegativeInfinity, false);
             }
             // (check for monotonicity)
             if (adj.Get(typeof(NaturalLogicAnnotations.PolarityAnnotation)).IsUpwards() && be.Get(typeof(NaturalLogicAnnotations.PolarityAnnotation)).IsUpwards())
             {
                 // (add tree)
                 adjFragments.Add(new SentenceFragment(tree, clause.assumedTruth, false));
             }
             OUTER_continue :;
         }
         OUTER_break :;
         Sharpen.Collections.AddAll(list, adjFragments);
         return(list);
     }
 }
示例#12
0
 /// <summary>Get the keyphrases of the sentence as a list of Strings.</summary>
 /// <param name="toString">The function to use to convert a span to a string. The canonical case is Sentence::words</param>
 /// <returns>A list of keyphrases, as Strings.</returns>
 /// <seealso cref="KeyphraseSpans()"/>
 public virtual IList <string> Keyphrases(IFunction <Sentence, IList <string> > toString)
 {
     return(KeyphraseSpans().Stream().Map(null).Collect(Collectors.ToList()));
 }
示例#13
0
        /// <summary>A helper function for dumping the accuracy of the trained classifier.</summary>
        /// <param name="classifier">The classifier to evaluate.</param>
        /// <param name="dataset">The dataset to evaluate the classifier on.</param>
        public static void DumpAccuracy(IClassifier <ClauseSplitter.ClauseClassifierLabel, string> classifier, GeneralDataset <ClauseSplitter.ClauseClassifierLabel, string> dataset)
        {
            DecimalFormat df = new DecimalFormat("0.00%");

            Redwood.Log("size:         " + dataset.Size());
            Redwood.Log("split count:  " + StreamSupport.Stream(dataset.Spliterator(), false).Filter(null).Collect(Collectors.ToList()).Count);
            Redwood.Log("interm count: " + StreamSupport.Stream(dataset.Spliterator(), false).Filter(null).Collect(Collectors.ToList()).Count);
            Pair <double, double> pr = classifier.EvaluatePrecisionAndRecall(dataset, ClauseSplitter.ClauseClassifierLabel.ClauseSplit);

            Redwood.Log("p  (split):   " + df.Format(pr.first));
            Redwood.Log("r  (split):   " + df.Format(pr.second));
            Redwood.Log("f1 (split):   " + df.Format(2 * pr.first * pr.second / (pr.first + pr.second)));
            pr = classifier.EvaluatePrecisionAndRecall(dataset, ClauseSplitter.ClauseClassifierLabel.ClauseInterm);
            Redwood.Log("p  (interm):  " + df.Format(pr.first));
            Redwood.Log("r  (interm):  " + df.Format(pr.second));
            Redwood.Log("f1 (interm):  " + df.Format(2 * pr.first * pr.second / (pr.first + pr.second)));
        }
示例#14
0
        /// <exception cref="System.Exception"/>
        public static IList <ClustererDataLoader.ClustererDoc> LoadDocuments(int maxDocs)
        {
            IDictionary <int, IDictionary <Pair <int, int>, bool> > labeledPairs     = IOUtils.ReadObjectFromFile(StatisticalCorefTrainer.datasetFile);
            IDictionary <int, IDictionary <int, string> >           mentionTypes     = IOUtils.ReadObjectFromFile(StatisticalCorefTrainer.mentionTypesFile);
            IDictionary <int, IList <IList <int> > >        goldClusters             = IOUtils.ReadObjectFromFile(StatisticalCorefTrainer.goldClustersFile);
            IDictionary <int, ICounter <Pair <int, int> > > classificationScores     = IOUtils.ReadObjectFromFile(StatisticalCorefTrainer.pairwiseModelsPath + StatisticalCorefTrainer.ClassificationModel + "/" + StatisticalCorefTrainer.predictionsName + ".ser");
            IDictionary <int, ICounter <Pair <int, int> > > rankingScores            = IOUtils.ReadObjectFromFile(StatisticalCorefTrainer.pairwiseModelsPath + StatisticalCorefTrainer.RankingModel + "/" + StatisticalCorefTrainer.predictionsName + ".ser");
            IDictionary <int, ICounter <Pair <int, int> > > anaphoricityScoresLoaded = IOUtils.ReadObjectFromFile(StatisticalCorefTrainer.pairwiseModelsPath + StatisticalCorefTrainer.AnaphoricityModel + "/" + StatisticalCorefTrainer.predictionsName + ".ser");
            IDictionary <int, ICounter <int> > anaphoricityScores = new Dictionary <int, ICounter <int> >();

            foreach (KeyValuePair <int, ICounter <Pair <int, int> > > e in anaphoricityScoresLoaded)
            {
                ICounter <int> scores = new ClassicCounter <int>();
                e.Value.EntrySet().ForEach(null);
                anaphoricityScores[e.Key] = scores;
            }
            return(labeledPairs.Keys.Stream().Sorted().Limit(maxDocs).Map(null).Collect(Collectors.ToList()));
        }
 public virtual IList <CoreEntityMention> WrapEntityMentions(IList <ICoreMap> entityMentions)
 {
     return(entityMentions.Stream().Map(null).Collect(Collectors.ToList()));
 }
        // { Cats eat _some_ mice,
        //   Cats eat _most_ mice }
        /// <summary>
        /// A helper method for
        /// <see cref="GetModifierSubtreeSpan(Edu.Stanford.Nlp.Semgraph.SemanticGraph, Edu.Stanford.Nlp.Ling.IndexedWord)"/>
        /// and
        /// <see cref="GetSubtreeSpan(Edu.Stanford.Nlp.Semgraph.SemanticGraph, Edu.Stanford.Nlp.Ling.IndexedWord)"/>
        /// .
        /// </summary>
        private static Pair <int, int> GetGeneralizedSubtreeSpan(SemanticGraph tree, IndexedWord root, ICollection <string> validArcs)
        {
            int min = root.Index();
            int max = root.Index();
            IQueue <IndexedWord> fringe = new LinkedList <IndexedWord>();

            foreach (SemanticGraphEdge edge in tree.OutgoingEdgeIterable(root))
            {
                string edgeLabel = edge.GetRelation().GetShortName();
                if ((validArcs == null || validArcs.Contains(edgeLabel)) && !"punct".Equals(edgeLabel))
                {
                    fringe.Add(edge.GetDependent());
                }
            }
            while (!fringe.IsEmpty())
            {
                IndexedWord node = fringe.Poll();
                min = Math.Min(node.Index(), min);
                max = Math.Max(node.Index(), max);
                // ignore punctuation
                Sharpen.Collections.AddAll(fringe, tree.GetOutEdgesSorted(node).Stream().Filter(null).Map(null).Collect(Collectors.ToList()));
            }
            return(Pair.MakePair(min, max + 1));
        }
        // run a particular CRF of this ClassifierCombiner on a testFile
        // user can say -crfToExamine 0 to get 1st element or -crfToExamine /edu/stanford/models/muc7.crf.ser.gz
        // this does not currently support drill down on CMM's
        /// <exception cref="System.Exception"/>
        public static void ExamineCRF(Edu.Stanford.Nlp.IE.ClassifierCombiner cc, string crfNameOrIndex, SeqClassifierFlags flags, string testFile, string testFiles, IDocumentReaderAndWriter <CoreLabel> readerAndWriter)
        {
            CRFClassifier <CoreLabel> crf;
            // potential index into baseClassifiers
            int ci;

            // set ci with the following rules
            // 1. first see if ci is an index into baseClassifiers
            // 2. if its not an integer or wrong size, see if its a file name of a loadPath
            try
            {
                ci = System.Convert.ToInt32(crfNameOrIndex);
                if (ci < 0 || ci >= cc.baseClassifiers.Count)
                {
                    // ci is not an int corresponding to an element in baseClassifiers, see if name of a crf loadPath
                    ci = cc.initLoadPaths.IndexOf(crfNameOrIndex);
                }
            }
            catch (NumberFormatException)
            {
                // cannot interpret crfNameOrIndex as an integer, see if name of a crf loadPath
                ci = cc.initLoadPaths.IndexOf(crfNameOrIndex);
            }
            // if ci corresponds to an index in baseClassifiers, get the crf at that index, otherwise set crf to null
            if (ci >= 0 && ci < cc.baseClassifiers.Count)
            {
                // TODO: this will break if baseClassifiers contains something that is not a CRF
                crf = (CRFClassifier <CoreLabel>)cc.baseClassifiers[ci];
            }
            else
            {
                crf = null;
            }
            // if you can get a specific crf, generate the appropriate report, if null do nothing
            if (crf != null)
            {
                // if there is a crf and testFile was set , do the crf stuff for a single testFile
                if (testFile != null)
                {
                    if (flags.searchGraphPrefix != null)
                    {
                        crf.ClassifyAndWriteViterbiSearchGraph(testFile, flags.searchGraphPrefix, crf.MakeReaderAndWriter());
                    }
                    else
                    {
                        if (flags.printFirstOrderProbs)
                        {
                            crf.PrintFirstOrderProbs(testFile, readerAndWriter);
                        }
                        else
                        {
                            if (flags.printFactorTable)
                            {
                                crf.PrintFactorTable(testFile, readerAndWriter);
                            }
                            else
                            {
                                if (flags.printProbs)
                                {
                                    crf.PrintProbs(testFile, readerAndWriter);
                                }
                                else
                                {
                                    if (flags.useKBest)
                                    {
                                        // TO DO: handle if user doesn't provide kBest
                                        int k = flags.kBest;
                                        crf.ClassifyAndWriteAnswersKBest(testFile, k, readerAndWriter);
                                    }
                                    else
                                    {
                                        if (flags.printLabelValue)
                                        {
                                            crf.PrintLabelInformation(testFile, readerAndWriter);
                                        }
                                        else
                                        {
                                            // no crf test flag provided
                                            log.Info("Warning: no crf test flag was provided, running classify and write answers");
                                            crf.ClassifyAndWriteAnswers(testFile, readerAndWriter, true);
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
                else
                {
                    if (testFiles != null)
                    {
                        // if there is a crf and testFiles was set , do the crf stuff for testFiles
                        // if testFile was set as well, testFile overrides
                        IList <File> files = Arrays.Stream(testFiles.Split(",")).Map(null).Collect(Collectors.ToList());
                        if (flags.printProbs)
                        {
                            // there is a crf and printProbs
                            crf.PrintProbs(files, crf.DefaultReaderAndWriter());
                        }
                        else
                        {
                            log.Info("Warning: no crf test flag was provided, running classify files and write answers");
                            crf.ClassifyFilesAndWriteAnswers(files, crf.DefaultReaderAndWriter(), true);
                        }
                    }
                }
            }
        }
示例#18
0
        /// <summary>The main method.</summary>
        /// <exception cref="System.Exception"/>
        public static void Main(string[] args)
        {
            StringUtils.LogInvocationString(log, args);
            Properties         props = StringUtils.ArgsToProperties(args);
            SeqClassifierFlags flags = new SeqClassifierFlags(props, false);
            // false for print probs as printed in next code block
            string loadPath = props.GetProperty("loadClassifier");
            NERClassifierCombiner ncc;

            if (loadPath != null)
            {
                // note that when loading a serialized classifier, the philosophy is override
                // any settings in props with those given in the commandline
                // so if you dumped it with useSUTime = false, and you say -useSUTime at
                // the commandline, the commandline takes precedence
                ncc = ((NERClassifierCombiner)GetClassifier(loadPath, props));
            }
            else
            {
                // pass null for passDownProperties to let all props go through
                ncc = CreateNERClassifierCombiner("ner", null, props);
            }
            // write the NERClassifierCombiner to the given path on disk
            string serializeTo = props.GetProperty("serializeTo");

            if (serializeTo != null)
            {
                ncc.SerializeClassifier(serializeTo);
            }
            string textFile = props.GetProperty("textFile");

            if (textFile != null)
            {
                ncc.ClassifyAndWriteAnswers(textFile);
            }
            // run on multiple textFiles , based off CRFClassifier code
            string textFiles = props.GetProperty("textFiles");

            if (textFiles != null)
            {
                IList <File> files = new List <File>();
                foreach (string filename in textFiles.Split(","))
                {
                    files.Add(new File(filename));
                }
                ncc.ClassifyFilesAndWriteAnswers(files);
            }
            // options for run the NERClassifierCombiner on a testFile or testFiles
            string testFile     = props.GetProperty("testFile");
            string testFiles    = props.GetProperty("testFiles");
            string crfToExamine = props.GetProperty("crfToExamine");
            IDocumentReaderAndWriter <CoreLabel> readerAndWriter = ncc.DefaultReaderAndWriter();

            if (testFile != null || testFiles != null)
            {
                // check if there is not a crf specific request
                if (crfToExamine == null)
                {
                    // in this case there is no crfToExamine
                    if (testFile != null)
                    {
                        ncc.ClassifyAndWriteAnswers(testFile, readerAndWriter, true);
                    }
                    else
                    {
                        IList <File> files = Arrays.Stream(testFiles.Split(",")).Map(null).Collect(Collectors.ToList());
                        ncc.ClassifyFilesAndWriteAnswers(files, ncc.DefaultReaderAndWriter(), true);
                    }
                }
                else
                {
                    ClassifierCombiner.ExamineCRF(ncc, crfToExamine, flags, testFile, testFiles, readerAndWriter);
                }
            }
            // option for showing info about the NERClassifierCombiner
            string showNCCInfo = props.GetProperty("showNCCInfo");

            if (showNCCInfo != null)
            {
                ShowNCCInfo(ncc);
            }
            // option for reading in from stdin
            if (flags.readStdin)
            {
                ncc.ClassifyStdin();
            }
        }
 /// <summary>Add NER tags to a tree.</summary>
 private static void AddNERTags(Tree tree)
 {
     // set up tagger if necessary
     if (NerTagger == null || NerClassifyMethod == null)
     {
         SetupNERTagger();
     }
     if (NerTagger != null && NerClassifyMethod != null)
     {
         // we have everything successfully setup and so can act.
         try
         {
             // classify
             IList <CoreLabel> labels = tree.Yield().Stream().Map(null).Collect(Collectors.ToList());
             NerClassifyMethod.Invoke(NerTagger, labels);
         }
         catch (Exception)
         {
             log.Warn("Error running " + NerCombinerName + " on Tree!  Not applying NER tags!");
         }
     }
 }
        private ICollection <ExtractedPhrase> GetMatchedTokensIndex(SemanticGraph graph, SemgrexPattern pattern, DataInstance sent, string label)
        {
            //TODO: look at the ignoreCommonTags flag
            ExtractPhraseFromPattern extract       = new ExtractPhraseFromPattern(false, PatternFactory.numWordsCompoundMapped[label]);
            ICollection <IntPair>    outputIndices = new List <IntPair>();
            bool findSubTrees         = true;
            IList <CoreLabel> tokensC = sent.GetTokens();
            //TODO: see if you can get rid of this (only used for matchedGraphs)
            IList <string>          tokens           = tokensC.Stream().Map(null).Collect(Collectors.ToList());
            IList <string>          outputPhrases    = new List <string>();
            IList <ExtractedPhrase> extractedPhrases = new List <ExtractedPhrase>();
            Func <Pair <IndexedWord, SemanticGraph>, ICounter <string> > extractFeatures = new _IFunction_206();

            //TODO: make features;
            extract.GetSemGrexPatternNodes(graph, tokens, outputPhrases, outputIndices, pattern, findSubTrees, extractedPhrases, constVars.matchLowerCaseContext, matchingWordRestriction);

            /*
             * //TODO: probably a bad idea to add ALL ngrams
             * Collection<ExtractedPhrase> outputIndicesMaxPhraseLen = new ArrayList<ExtractedPhrase>();
             * for(IntPair o: outputIndices){
             * int min = o.get(0);
             * int max = o.get(1);
             *
             * for (int i = min; i <= max ; i++) {
             *
             * CoreLabel t = tokensC.get(i);
             * String phrase = t.word();
             * if(!matchedRestriction(t, label))
             * continue;
             * for (int ngramSize = 1; ngramSize < PatternFactory.numWordsCompound; ++ngramSize) {
             * int j = i + ngramSize - 1;
             * if(j > max)
             * break;
             *
             * CoreLabel tokenj = tokensC.get(j);
             *
             * if(ngramSize > 1)
             * phrase += " " + tokenj.word();
             *
             * if (matchedRestriction(tokenj, label)) {
             * outputIndicesMaxPhraseLen.add(new ExtractedPhrase(i, j, phrase));
             * //outputIndicesMaxPhraseLen.add(new IntPair(i, j));
             * }
             * }
             * }
             * }*/
            //System.out.println("extracted phrases are " + extractedPhrases + " and output indices are " + outputIndices);
            return(extractedPhrases);
        }
示例#21
0
 /// <summary>Run a search from this entailer.</summary>
 /// <remarks>
 /// Run a search from this entailer. This will return a list of sentence fragments
 /// that are entailed by the original sentence / fragment.
 /// </remarks>
 /// <returns>A list of entailed fragments.</returns>
 public virtual IList <SentenceFragment> Search()
 {
     return(SearchImplementation().Stream().Map(null).Filter(null).Collect(Collectors.ToList()));
 }
示例#22
0
 /// <summary>create list of CoreSentence's based on the Annotation's sentences</summary>
 private void WrapSentences()
 {
     sentences = this.annotationDocument.Get(typeof(CoreAnnotations.SentencesAnnotation)).Stream().Map(null).Collect(Collectors.ToList());
     sentences.ForEach(null);
 }
        public static IList <string> GetMainStrs(IList <string> tokens)
        {
            IList <string> mainTokenStrs = new List <string>(tokens.Count);

            Sharpen.Collections.AddAll(mainTokenStrs, tokens.Stream().Filter(null).Collect(Collectors.ToList()));
            return(mainTokenStrs);
        }
示例#24
0
 /// <summary>build a list of all entity mentions in the document from the sentences</summary>
 private void BuildDocumentEntityMentionsList()
 {
     entityMentions = sentences.Stream().FlatMap(null).Collect(Collectors.ToList());
 }
示例#25
0
 /// <summary>create list of CoreEntityMention's based on the CoreMap's entity mentions</summary>
 public virtual void WrapEntityMentions()
 {
     if (this.sentenceCoreMap.Get(typeof(CoreAnnotations.MentionsAnnotation)) != null)
     {
         entityMentions = this.sentenceCoreMap.Get(typeof(CoreAnnotations.MentionsAnnotation)).Stream().Map(null).Collect(Collectors.ToList());
     }
 }
示例#26
0
 private void BuildDocumentQuotesList()
 {
     this.quotes = QuoteAnnotator.GatherQuotes(this.annotationDocument).Stream().Map(null).Collect(Collectors.ToList());
 }
示例#27
0
 private void RouteObject(int indent, object value)
 {
     if (value is string)
     {
         // Case: simple string (this is easy!)
         writer.Write("\"");
         writer.Write(StringUtils.EscapeJsonString(value.ToString()));
         writer.Write("\"");
     }
     else
     {
         if (value is ICollection)
         {
             // Case: collection
             writer.Write("[");
             Newline();
             IEnumerator <object> elems = ((ICollection <object>)value).GetEnumerator();
             while (elems.MoveNext())
             {
                 Indent(indent + 1);
                 RouteObject(indent + 1, elems.Current);
                 if (elems.MoveNext())
                 {
                     writer.Write(",");
                 }
                 Newline();
             }
             Indent(indent);
             writer.Write("]");
         }
         else
         {
             if (value is Enum)
             {
                 // Case: enumeration constant
                 writer.Write("\"");
                 writer.Write(StringUtils.EscapeJsonString(((Enum)value).Name()));
                 writer.Write("\"");
             }
             else
             {
                 if (value is Pair)
                 {
                     RouteObject(indent, Arrays.AsList(((Pair)value).first, ((Pair)value).second));
                 }
                 else
                 {
                     if (value is Span)
                     {
                         writer.Write("[");
                         writer.Write(int.ToString(((Span)value).Start()));
                         writer.Write(",");
                         Space();
                         writer.Write(int.ToString(((Span)value).End()));
                         writer.Write("]");
                     }
                     else
                     {
                         if (value is IConsumer)
                         {
                             Object(indent, (IConsumer <JSONOutputter.IWriter>)value);
                         }
                         else
                         {
                             if (value is IStream)
                             {
                                 RouteObject(indent, ((IStream)value).Collect(Collectors.ToList()));
                             }
                             else
                             {
                                 if (value.GetType().IsArray)
                                 {
                                     // Arrays make life miserable in Java
                                     Type componentType = value.GetType().GetElementType();
                                     if (componentType.IsPrimitive)
                                     {
                                         if (typeof(int).IsAssignableFrom(componentType))
                                         {
                                             List <int> lst = new List <int>();
                                             //noinspection Convert2streamapi
                                             foreach (int elem in ((int[])value))
                                             {
                                                 lst.Add(elem);
                                             }
                                             RouteObject(indent, lst);
                                         }
                                         else
                                         {
                                             if (typeof(short).IsAssignableFrom(componentType))
                                             {
                                                 List <short> lst = new List <short>();
                                                 foreach (short elem in ((short[])value))
                                                 {
                                                     lst.Add(elem);
                                                 }
                                                 RouteObject(indent, lst);
                                             }
                                             else
                                             {
                                                 if (typeof(byte).IsAssignableFrom(componentType))
                                                 {
                                                     List <byte> lst = new List <byte>();
                                                     foreach (byte elem in ((byte[])value))
                                                     {
                                                         lst.Add(elem);
                                                     }
                                                     RouteObject(indent, lst);
                                                 }
                                                 else
                                                 {
                                                     if (typeof(long).IsAssignableFrom(componentType))
                                                     {
                                                         List <long> lst = new List <long>();
                                                         //noinspection Convert2streamapi
                                                         foreach (long elem in ((long[])value))
                                                         {
                                                             lst.Add(elem);
                                                         }
                                                         RouteObject(indent, lst);
                                                     }
                                                     else
                                                     {
                                                         if (typeof(char).IsAssignableFrom(componentType))
                                                         {
                                                             List <char> lst = new List <char>();
                                                             foreach (char elem in ((char[])value))
                                                             {
                                                                 lst.Add(elem);
                                                             }
                                                             RouteObject(indent, lst);
                                                         }
                                                         else
                                                         {
                                                             if (typeof(float).IsAssignableFrom(componentType))
                                                             {
                                                                 List <float> lst = new List <float>();
                                                                 foreach (float elem in ((float[])value))
                                                                 {
                                                                     lst.Add(elem);
                                                                 }
                                                                 RouteObject(indent, lst);
                                                             }
                                                             else
                                                             {
                                                                 if (typeof(double).IsAssignableFrom(componentType))
                                                                 {
                                                                     List <double> lst = new List <double>();
                                                                     //noinspection Convert2streamapi
                                                                     foreach (double elem in ((double[])value))
                                                                     {
                                                                         lst.Add(elem);
                                                                     }
                                                                     RouteObject(indent, lst);
                                                                 }
                                                                 else
                                                                 {
                                                                     if (typeof(bool).IsAssignableFrom(componentType))
                                                                     {
                                                                         List <bool> lst = new List <bool>();
                                                                         foreach (bool elem in ((bool[])value))
                                                                         {
                                                                             lst.Add(elem);
                                                                         }
                                                                         RouteObject(indent, lst);
                                                                     }
                                                                     else
                                                                     {
                                                                         throw new InvalidOperationException("Unhandled primitive type in array: " + componentType);
                                                                     }
                                                                 }
                                                             }
                                                         }
                                                     }
                                                 }
                                             }
                                         }
                                     }
                                     else
                                     {
                                         RouteObject(indent, Arrays.AsList((object[])value));
                                     }
                                 }
                                 else
                                 {
                                     if (value is int)
                                     {
                                         writer.Write(int.ToString((int)value));
                                     }
                                     else
                                     {
                                         if (value is short)
                                         {
                                             writer.Write(short.ToString((short)value));
                                         }
                                         else
                                         {
                                             if (value is byte)
                                             {
                                                 writer.Write(byte.ToString((byte)value));
                                             }
                                             else
                                             {
                                                 if (value is long)
                                                 {
                                                     writer.Write(System.Convert.ToString((long)value));
                                                 }
                                                 else
                                                 {
                                                     if (value is char)
                                                     {
                                                         writer.Write(char.ToString((char)(char)value));
                                                     }
                                                     else
                                                     {
                                                         if (value is float)
                                                         {
                                                             writer.Write(new DecimalFormat("0.#######").Format(value));
                                                         }
                                                         else
                                                         {
                                                             if (value is double)
                                                             {
                                                                 writer.Write(new DecimalFormat("0.##############").Format(value));
                                                             }
                                                             else
                                                             {
                                                                 if (value is bool)
                                                                 {
                                                                     writer.Write(bool.ToString((bool)value));
                                                                 }
                                                                 else
                                                                 {
                                                                     if (typeof(int).IsAssignableFrom(value.GetType()))
                                                                     {
                                                                         RouteObject(indent, int.Parse((int)value));
                                                                     }
                                                                     else
                                                                     {
                                                                         if (typeof(short).IsAssignableFrom(value.GetType()))
                                                                         {
                                                                             RouteObject(indent, short.ValueOf((short)value));
                                                                         }
                                                                         else
                                                                         {
                                                                             if (typeof(byte).IsAssignableFrom(value.GetType()))
                                                                             {
                                                                                 RouteObject(indent, byte.ValueOf(unchecked ((byte)value)));
                                                                             }
                                                                             else
                                                                             {
                                                                                 if (typeof(long).IsAssignableFrom(value.GetType()))
                                                                                 {
                                                                                     RouteObject(indent, long.ValueOf((long)value));
                                                                                 }
                                                                                 else
                                                                                 {
                                                                                     if (typeof(char).IsAssignableFrom(value.GetType()))
                                                                                     {
                                                                                         RouteObject(indent, char.ValueOf((char)value));
                                                                                     }
                                                                                     else
                                                                                     {
                                                                                         if (typeof(float).IsAssignableFrom(value.GetType()))
                                                                                         {
                                                                                             RouteObject(indent, float.ValueOf((float)value));
                                                                                         }
                                                                                         else
                                                                                         {
                                                                                             if (typeof(double).IsAssignableFrom(value.GetType()))
                                                                                             {
                                                                                                 RouteObject(indent, double.ValueOf((double)value));
                                                                                             }
                                                                                             else
                                                                                             {
                                                                                                 if (typeof(bool).IsAssignableFrom(value.GetType()))
                                                                                                 {
                                                                                                     RouteObject(indent, bool.ValueOf((bool)value));
                                                                                                 }
                                                                                                 else
                                                                                                 {
                                                                                                     throw new Exception("Unknown object to serialize: " + value);
                                                                                                 }
                                                                                             }
                                                                                         }
                                                                                     }
                                                                                 }
                                                                             }
                                                                         }
                                                                     }
                                                                 }
                                                             }
                                                         }
                                                     }
                                                 }
                                             }
                                         }
                                     }
                                 }
                             }
                         }
                     }
                 }
             }
         }
     }
 }
        /// <summary>Process all the trees in the given directory.</summary>
        /// <remarks>Process all the trees in the given directory. For example, the WSJ section of the Penn Treebank.</remarks>
        /// <param name="name">The name of the directory we are processing.</param>
        /// <param name="directory">The directory we are processing.</param>
        /// <returns>
        /// A dataset of subject/object pairs in the trees in the directory.
        /// This is a list of sentences, such that each sentence has a collection of pairs of spans.
        /// Each pair of spans is a subject/object span pair that constitutes a valid extraction.
        /// </returns>
        /// <exception cref="System.IO.IOException"/>
        private static IList <Pair <ICoreMap, ICollection <Pair <Span, Span> > > > ProcessDirectory(string name, File directory)
        {
            Redwood.Util.ForceTrack("Processing " + name);
            // Prepare the files to iterate over
            IEnumerable <File> files = IOUtils.IterFilesRecursive(directory, "mrg");
            int numTreesProcessed    = 0;
            IList <Pair <ICoreMap, ICollection <Pair <Span, Span> > > > trainingData = new List <Pair <ICoreMap, ICollection <Pair <Span, Span> > > >(1024);

            // Iterate over the files
            foreach (File file in files)
            {
                //      log(file);
                ITreeReader reader = new PennTreeReader(IOUtils.ReaderFromFile(file));
                Tree        tree;
                while ((tree = reader.ReadTree()) != null)
                {
                    try
                    {
                        // Prepare the tree
                        tree.IndexSpans();
                        tree.SetSpans();
                        // Get relevant information from sentence
                        IList <CoreLabel> tokens = tree.GetLeaves().Stream().Map(null).Collect(Collectors.ToList());
                        //            .filter(leaf -> !TRACE_SOURCE_PATTERN.matcher(leaf.word()).matches() && !leaf.tag().equals("-NONE-"))
                        SemanticGraph           graph   = Parse(tree);
                        IDictionary <int, Span> targets = FindTraceTargets(tree);
                        IDictionary <int, int>  sources = FindTraceSources(tree);
                        // Create a sentence object
                        ICoreMap sentence = new _ArrayCoreMap_325(tokens, graph, 4);
                        natlog.DoOneSentence(null, sentence);
                        // Generate training data
                        ICollection <Pair <Span, Span> > trainingDataFromSentence = SubjectObjectPairs(graph, tokens, targets, sources);
                        trainingData.Add(Pair.MakePair(sentence, trainingDataFromSentence));
                        // Debug print
                        numTreesProcessed += 1;
                        if (numTreesProcessed % 100 == 0)
                        {
                            Redwood.Util.Log("[" + new DecimalFormat("00000").Format(numTreesProcessed) + "] " + CountDatums(trainingData) + " known extractions");
                        }
                    }
                    catch (Exception t)
                    {
                        Sharpen.Runtime.PrintStackTrace(t);
                    }
                }
            }
            // End
            Redwood.Util.Log(string.Empty + numTreesProcessed + " trees processed yielding " + CountDatums(trainingData) + " known extractions");
            Redwood.Util.EndTrack("Processing " + name);
            return(trainingData);
        }
示例#29
0
        /// <summary>Applies {#processPattern} to a collection of trees.</summary>
        /// <param name="matchPattern">
        /// A
        /// <see cref="Edu.Stanford.Nlp.Trees.Tregex.TregexPattern"/>
        /// to be matched against a
        /// <see cref="Edu.Stanford.Nlp.Trees.Tree"/>
        /// .
        /// </param>
        /// <param name="p">
        /// A
        /// <see cref="TsurgeonPattern"/>
        /// to apply.
        /// </param>
        /// <param name="inputTrees">The input trees to be processed</param>
        /// <returns>A List of the transformed trees</returns>
        public static IList <Tree> ProcessPatternOnTrees(TregexPattern matchPattern, TsurgeonPattern p, ICollection <Tree> inputTrees)
        {
            IList <Tree> result = inputTrees.Stream().Map(null).Collect(Collectors.ToList());

            return(result);
        }
            public virtual void Update(IList <IList <int> > gold, IList <Clusterer.Cluster> clusters, IDictionary <int, IList <int> > mentionToGold, IDictionary <int, Clusterer.Cluster> mentionToSystem)
            {
                IList <IList <int> >            clustersAsList       = clusters.Stream().Map(null).Collect(Collectors.ToList());
                IDictionary <int, IList <int> > mentionToSystemLists = mentionToSystem.Stream().Collect(Collectors.ToMap(null, null));
                Pair <double, double>           prec = GetScore(clustersAsList, mentionToGold);
                Pair <double, double>           rec  = GetScore(gold, mentionToSystemLists);

                pNum += prec.first;
                pDen += prec.second;
                rNum += rec.first;
                rDen += rec.second;
            }