internal virtual double GetPatTFIDFScore(CandidatePhrase word, ICounter <E> patsThatExtractedThis, ICounter <E> allSelectedPatterns)
 {
     if (Data.processedDataFreq.GetCount(word) == 0.0)
     {
         Redwood.Log(Redwood.Warn, "How come the processed corpus freq has count of " + word + " 0. The count in raw freq is " + Data.rawFreq.GetCount(word) + " and the Data.rawFreq size is " + Data.rawFreq.Size());
         return(0);
     }
     else
     {
         double          total = 0;
         ICollection <E> rem   = new HashSet <E>();
         foreach (KeyValuePair <E, double> en2 in patsThatExtractedThis.EntrySet())
         {
             double weight = 1.0;
             if (usePatternWeights)
             {
                 weight = allSelectedPatterns.GetCount(en2.Key);
                 if (weight == 0)
                 {
                     Redwood.Log(Redwood.Force, "Warning: Weight zero for " + en2.Key + ". May be pattern was removed when choosing other patterns (if subsumed by another pattern).");
                     rem.Add(en2.Key);
                 }
             }
             total += weight;
         }
         Counters.RemoveKeys(patsThatExtractedThis, rem);
         double score = total / Data.processedDataFreq.GetCount(word);
         return(score);
     }
 }
Ejemplo n.º 2
0
        private static Tree FunkyFindLeafWithApproximateSpan(Tree root, string token, int index, int approximateness)
        {
            // log.info("Searching " + root + "\n  for " + token + " at position " + index + " (plus up to " + approximateness + ")");
            IList <Tree> leaves = root.GetLeaves();

            foreach (Tree leaf in leaves)
            {
                CoreLabel label        = typeof(CoreLabel).Cast(leaf.Label());
                int       indexInteger = label.Get(typeof(CoreAnnotations.IndexAnnotation));
                if (indexInteger == null)
                {
                    continue;
                }
                int ind = indexInteger - 1;
                if (token.Equals(leaf.Value()) && ind >= index && ind <= index + approximateness)
                {
                    return(leaf);
                }
            }
            // this shouldn't happen
            //    throw new RuntimeException("RuleBasedCorefMentionFinder: ERROR: Failed to find head token");
            Redwood.Log("RuleBasedCorefMentionFinder: Failed to find head token:\n" + "Tree is: " + root + "\n" + "token = |" + token + "|" + index + "|, approx=" + approximateness);
            foreach (Tree leaf_1 in leaves)
            {
                if (token.Equals(leaf_1.Value()))
                {
                    // log.info("Found it at position " + ind + "; returning " + leaf);
                    return(leaf_1);
                }
            }
            int fallback = Math.Max(0, leaves.Count - 2);

            Redwood.Log("RuleBasedCorefMentionFinder: Last resort: returning as head: " + leaves[fallback]);
            return(leaves[fallback]);
        }
Ejemplo n.º 3
0
 public override void CreateIndexIfUsingDBAndNotExists()
 {
     try
     {
         Redwood.Log(Redwood.Dbg, "Creating index for " + tableName);
         IConnection conn         = SQLConnection.GetConnection();
         IStatement  stmt         = conn.CreateStatement();
         bool        doesnotexist = false;
         //check if the index already exists
         try
         {
             IStatement stmt2 = conn.CreateStatement();
             string     query = "SELECT '" + tableName + "_index'::regclass";
             stmt2.Execute(query);
         }
         catch (SQLException)
         {
             doesnotexist = true;
         }
         if (doesnotexist)
         {
             string indexquery = "create index CONCURRENTLY " + tableName + "_index on " + tableName + " using hash(\"sentid\") ";
             stmt.Execute(indexquery);
             Redwood.Log(Redwood.Dbg, "Done creating index for " + tableName);
         }
     }
     catch (SQLException e)
     {
         throw new Exception(e);
     }
 }
Ejemplo n.º 4
0
        //Redwood.log("debug-cluster", "merged clusters: "+toID+" += "+from.clusterID);
        //to.printCorefCluster();
        //from.printCorefCluster();
        /// <summary>Print cluster information</summary>
        public virtual void PrintCorefCluster()
        {
            Redwood.Log("debug-cluster", "Cluster ID: " + clusterID + "\tNumbers: " + numbers + "\tGenders: " + genders + "\tanimacies: " + animacies);
            Redwood.Log("debug-cluster", "NE: " + nerStrings + "\tfirst Mention's ID: " + firstMention.mentionID + "\tHeads: " + heads + "\twords: " + words);
            SortedDictionary <int, Mention> forSortedPrint = new SortedDictionary <int, Mention>();

            foreach (Mention m in this.corefMentions)
            {
                forSortedPrint[m.mentionID] = m;
            }
            foreach (Mention m_1 in forSortedPrint.Values)
            {
                string rep = (representative == m_1) ? "*" : string.Empty;
                if (m_1.goldCorefClusterID == -1)
                {
                    Redwood.Log("debug-cluster", rep + "mention-> id:" + m_1.mentionID + "\toriginalRef: " + m_1.originalRef + "\t" + m_1.SpanToString() + "\tsentNum: " + m_1.sentNum + "\tstartIndex: " + m_1.startIndex + "\tType: " + m_1.mentionType + "\tNER: "
                                + m_1.nerString);
                }
                else
                {
                    Redwood.Log("debug-cluster", rep + "mention-> id:" + m_1.mentionID + "\toriginalClusterID: " + m_1.goldCorefClusterID + "\t" + m_1.SpanToString() + "\tsentNum: " + m_1.sentNum + "\tstartIndex: " + m_1.startIndex + "\toriginalRef: " + m_1.originalRef
                                + "\tType: " + m_1.mentionType + "\tNER: " + m_1.nerString);
                }
            }
        }
Ejemplo n.º 5
0
        public virtual void FindHead(ICoreMap s, IList <Mention> mentions)
        {
            Tree tree = s.Get(typeof(TreeCoreAnnotations.TreeAnnotation));
            IList <CoreLabel> sent = s.Get(typeof(CoreAnnotations.TokensAnnotation));

            tree.IndexSpans(0);
            foreach (Mention m in mentions)
            {
                if (lang == Locale.Chinese)
                {
                    FindHeadChinese(sent, m);
                }
                else
                {
                    CoreLabel head = (CoreLabel)FindSyntacticHead(m, tree, sent).Label();
                    m.headIndex  = head.Get(typeof(CoreAnnotations.IndexAnnotation)) - 1;
                    m.headWord   = sent[m.headIndex];
                    m.headString = m.headWord.Get(typeof(CoreAnnotations.TextAnnotation)).ToLower(Locale.English);
                }
                int start = m.headIndex - m.startIndex;
                if (start < 0 || start >= m.originalSpan.Count)
                {
                    Redwood.Log("Invalid index for head " + start + "=" + m.headIndex + "-" + m.startIndex + ": originalSpan=[" + StringUtils.JoinWords(m.originalSpan, " ") + "], head=" + m.headWord);
                    Redwood.Log("Setting head string to entire mention");
                    m.headIndex  = m.startIndex;
                    m.headWord   = m.originalSpan.Count > 0 ? m.originalSpan[0] : sent[m.startIndex];
                    m.headString = m.originalSpan.ToString();
                }
            }
        }
Ejemplo n.º 6
0
 public PatternToken(string tag, bool useTag, bool getCompoundPhrases, int numWordsCompound, string nerTag, bool useNER, bool useTargetParserParentRestriction, string grandparentParseTag)
 {
     if (useNER && nerTag == null)
     {
         throw new Exception("NER tag is null and using NER restriction is true. Check your data.");
     }
     this.tag              = tag;
     this.useTag           = useTag;
     this.numWordsCompound = numWordsCompound;
     if (!getCompoundPhrases)
     {
         this.numWordsCompound = 1;
     }
     this.nerTag = nerTag;
     this.useNER = useNER;
     this.useTargetParserParentRestriction = useTargetParserParentRestriction;
     if (useTargetParserParentRestriction)
     {
         if (grandparentParseTag == null)
         {
             Redwood.Log(ConstantsAndVariables.extremedebug, "Grand parent parse tag null ");
             this.grandparentParseTag = "null";
         }
         else
         {
             this.grandparentParseTag = grandparentParseTag;
         }
     }
 }
Ejemplo n.º 7
0
 private void LogB(object message)
 {
     lock (this)
     {
         char[] str = message.ToString().ToCharArray();
         foreach (char c in str)
         {
             if (c == '\n')
             {
                 string msg = buffer.ToString();
                 if (tag != null)
                 {
                     Redwood.Log(tag, msg);
                 }
                 else
                 {
                     Redwood.Log(msg);
                 }
                 buffer = new StringBuilder();
             }
             else
             {
                 buffer.Append(c.ToString());
             }
         }
     }
 }
Ejemplo n.º 8
0
 public static void ComputeRawFreqIfNull(IDictionary <string, DataInstance> sents, int numWordsCompound)
 {
     Redwood.Log(Redwood.Dbg, "Computing raw freq for every 1-" + numWordsCompound + " consecutive words");
     foreach (DataInstance l in sents.Values)
     {
         IList <IList <CoreLabel> > ngrams = CollectionUtils.GetNGrams(l.GetTokens(), 1, numWordsCompound);
         foreach (IList <CoreLabel> n in ngrams)
         {
             string s = string.Empty;
             foreach (CoreLabel c in n)
             {
                 // if (useWord(c, commonEngWords, ignoreWordRegex)) {
                 s += " " + c.Word();
             }
             // }
             s = s.Trim();
             if (!s.IsEmpty())
             {
                 Data.rawFreq.IncrementCount(CandidatePhrase.CreateOrGet(s));
             }
         }
     }
     //if (googleNGram != null && googleNGram.size() > 0)
     if (usingGoogleNgram)
     {
         SetRatioGoogleNgramFreqWithDataFreq();
     }
     if (domainNGramRawFreq != null && domainNGramRawFreq.Size() > 0)
     {
         ratioDomainNgramFreqWithDataFreq = domainNGramRawFreq.TotalCount() / Data.rawFreq.TotalCount();
     }
 }
            /// <summary>Convert a java.util.logging call to its equivalent Redwood logging call.</summary>
            /// <remarks>
            /// Convert a java.util.logging call to its equivalent Redwood logging call.
            /// Currently, the WARNING log level becomes Redwood WARNING flag, the SEVERE log level becomes Redwood.ERR, and anything at FINE or lower becomes Redwood.DBG
            /// CONFIG and INFO don't map to a Redwood tag.
            /// </remarks>
            public override void Publish(LogRecord record)
            {
                string message = record.GetMessage();
                Level  level   = record.GetLevel();
                object tag     = null;

                if (level == Level.Warning)
                {
                    tag = Redwood.Warn;
                }
                else
                {
                    if (level == Level.Severe)
                    {
                        tag = Redwood.Err;
                    }
                    else
                    {
                        if (level.IntValue() <= Level.Fine.IntValue())
                        {
                            tag = Redwood.Dbg;
                        }
                    }
                }
                if (tag == null)
                {
                    Redwood.Log(message);
                }
                else
                {
                    Redwood.Log(tag, message);
                }
            }
 public PatternsForEachTokenLucene(Properties props, IDictionary <string, IDictionary <int, ICollection <E> > > pats)
 {
     //ProtobufAnnotationSerializer p = new ProtobufAnnotationSerializer();
     ArgumentParser.FillOptions(this, props);
     if (allPatternsDir == null)
     {
         File f;
         try
         {
             f = File.CreateTempFile("allpatterns", "index");
             System.Console.Out.WriteLine("No directory provided for creating patternsForEachToken lucene index. Making it at " + f.GetAbsolutePath());
         }
         catch (IOException e)
         {
             throw new Exception(e);
         }
         f.DeleteOnExit();
         allPatternsDir = f.GetAbsolutePath();
     }
     if (createPatLuceneIndex)
     {
         Redwood.Log("Deleting any exising index at " + allPatternsDir);
         IOUtils.DeleteDirRecursively(new File(allPatternsDir));
     }
     indexDir = new File(allPatternsDir);
     if (pats != null)
     {
         AddPatterns(pats);
     }
 }
 /// <summary>main entry of coreference system.</summary>
 /// <param name="document">Input document for coref format (Annotation and optional information)</param>
 /// <param name="output">For output of coref system (conll format and log. list size should be 4.)</param>
 /// <returns>Map of coref chain ID and corresponding chain</returns>
 /// <exception cref="System.Exception"/>
 public virtual IDictionary <int, CorefChain> Coref(Document document, StringBuilder[] output)
 {
     if (HybridCorefProperties.PrintMDLog(props))
     {
         Redwood.Log(HybridCorefPrinter.PrintMentionDetectionLog(document));
     }
     if (HybridCorefProperties.DoScore(props))
     {
         output[0] = (new StringBuilder()).Append(CorefPrinter.PrintConllOutput(document, true));
         // gold
         output[1] = (new StringBuilder()).Append(CorefPrinter.PrintConllOutput(document, false));
     }
     // before coref
     output[3] = new StringBuilder();
     // log from sieves
     foreach (Edu.Stanford.Nlp.Coref.Hybrid.Sieve.Sieve sieve in sieves)
     {
         CorefUtils.CheckForInterrupt();
         output[3].Append(sieve.ResolveMention(document, dictionaries, props));
     }
     // post processing
     if (HybridCorefProperties.DoPostProcessing(props))
     {
         PostProcessing(document);
     }
     if (HybridCorefProperties.DoScore(props))
     {
         output[2] = (new StringBuilder()).Append(CorefPrinter.PrintConllOutput(document, false, true));
     }
     // after coref
     return(MakeCorefOutput(document));
 }
Ejemplo n.º 12
0
        /// <exception cref="System.Exception"/>
        public static void Test(PairwiseModel model, string predictionsName, bool anaphoricityModel)
        {
            Redwood.Log("scoref-train", "Reading compression...");
            Compressor <string> compressor = IOUtils.ReadObjectFromFile(StatisticalCorefTrainer.compressorFile);

            Redwood.Log("scoref-train", "Reading test data...");
            IList <DocumentExamples> testDocuments = IOUtils.ReadObjectFromFile(StatisticalCorefTrainer.extractedFeaturesFile);

            Redwood.Log("scoref-train", "Building test set...");
            IList <Pair <Example, IDictionary <int, CompressedFeatureVector> > > allExamples = anaphoricityModel ? GetAnaphoricityExamples(testDocuments) : GetExamples(testDocuments);

            Redwood.Log("scoref-train", "Testing...");
            PrintWriter writer = new PrintWriter(model.GetDefaultOutputPath() + predictionsName);
            IDictionary <int, ICounter <Pair <int, int> > > scores = new Dictionary <int, ICounter <Pair <int, int> > >();

            WriteScores(allExamples, compressor, model, writer, scores);
            if (model is MaxMarginMentionRanker)
            {
                writer.Close();
                writer        = new PrintWriter(model.GetDefaultOutputPath() + predictionsName + "_anaphoricity");
                testDocuments = IOUtils.ReadObjectFromFile(StatisticalCorefTrainer.extractedFeaturesFile);
                allExamples   = GetAnaphoricityExamples(testDocuments);
                WriteScores(allExamples, compressor, model, writer, scores);
            }
            IOUtils.WriteObjectToFile(scores, model.GetDefaultOutputPath() + predictionsName + ".ser");
            writer.Close();
        }
Ejemplo n.º 13
0
        public static double GetFinalConllScoreFromOutputDir(string corefOutputDir, string scorerPath)
        {
            File baseFolder = new File(corefOutputDir);

            File[] filesInBaseFolder = baseFolder.ListFiles();
            string baseName          = corefOutputDir;

            foreach (File outputFile in filesInBaseFolder)
            {
                string outputFileName = outputFile.GetName();
                baseName = baseName + "/" + outputFileName.Split("\\.")[0];
                break;
            }
            string goldOutput       = baseName + ".gold.txt";
            string afterCorefOutput = baseName + ".coref.predicted.txt";

            try
            {
                string summary    = CorefScorer.GetEvalSummary(scorerPath, goldOutput, afterCorefOutput);
                double finalScore = GetFinalConllScore(summary);
                return(finalScore);
            }
            catch (IOException)
            {
                Redwood.Log("Error: failed to get coref score from directory");
                return(-1);
            }
        }
Ejemplo n.º 14
0
 public static void PrintScoreSummary(string summary, Logger logger, bool afterPostProcessing)
 {
     string[] lines = summary.Split("\n");
     if (!afterPostProcessing)
     {
         foreach (string line in lines)
         {
             if (line.StartsWith("Identification of Mentions"))
             {
                 Redwood.Log(line);
                 return;
             }
         }
     }
     else
     {
         StringBuilder sb = new StringBuilder();
         foreach (string line in lines)
         {
             if (line.StartsWith("METRIC"))
             {
                 sb.Append(line);
             }
             if (!line.StartsWith("Identification of Mentions") && line.Contains("Recall"))
             {
                 sb.Append(line).Append("\n");
             }
         }
         Redwood.Log(sb.ToString());
     }
 }
Ejemplo n.º 15
0
        public virtual ICounter <CandidatePhrase> ChooseTopWords(ICounter <CandidatePhrase> newdt, TwoDimensionalCounter <CandidatePhrase, E> terms, ICounter <CandidatePhrase> useThresholdNumPatternsForTheseWords, ICollection <CandidatePhrase> ignoreWords
                                                                 , double thresholdWordExtract)
        {
            IEnumerator <CandidatePhrase> termIter   = Counters.ToPriorityQueue(newdt).GetEnumerator();
            ICounter <CandidatePhrase>    finalwords = new ClassicCounter <CandidatePhrase>();

            while (termIter.MoveNext())
            {
                if (finalwords.Size() >= constVars.numWordsToAdd)
                {
                    break;
                }
                CandidatePhrase w = termIter.Current;
                if (newdt.GetCount(w) < thresholdWordExtract)
                {
                    Redwood.Log(ConstantsAndVariables.extremedebug, "not adding word " + w + " and any later words because the score " + newdt.GetCount(w) + " is less than the threshold of  " + thresholdWordExtract);
                    break;
                }
                System.Diagnostics.Debug.Assert((newdt.GetCount(w) != double.PositiveInfinity));
                if (useThresholdNumPatternsForTheseWords.ContainsKey(w) && NumNonRedundantPatterns(terms, w) < constVars.thresholdNumPatternsApplied)
                {
                    Redwood.Log("extremePatDebug", "Not adding " + w + " because the number of non redundant patterns are below threshold of " + constVars.thresholdNumPatternsApplied + ":" + terms.GetCounter(w).KeySet());
                    continue;
                }
                CandidatePhrase matchedFuzzy = null;
                if (constVars.minLen4FuzzyForPattern > 0 && ignoreWords != null)
                {
                    matchedFuzzy = ConstantsAndVariables.ContainsFuzzy(ignoreWords, w, constVars.minLen4FuzzyForPattern);
                }
                if (matchedFuzzy == null)
                {
                    Redwood.Log("extremePatDebug", "adding word " + w);
                    finalwords.SetCount(w, newdt.GetCount(w));
                }
                else
                {
                    Redwood.Log("extremePatDebug", "not adding " + w + " because it matched " + matchedFuzzy + " in common English word");
                    ignoreWords.Add(w);
                }
            }
            string nextTen = string.Empty;
            int    n       = 0;

            while (termIter.MoveNext())
            {
                n++;
                if (n > 10)
                {
                    break;
                }
                CandidatePhrase w = termIter.Current;
                nextTen += ";\t" + w + ":" + newdt.GetCount(w);
            }
            Redwood.Log(Redwood.Dbg, "Next ten phrases were " + nextTen);
            return(finalwords);
        }
Ejemplo n.º 16
0
        //returns for each pattern, list of sentence ids
        public virtual IDictionary <E, ICollection <string> > GetFileSentIdsFromPats(ICollection <E> pats)
        {
            IDictionary <E, ICollection <string> > sents = new Dictionary <E, ICollection <string> >();

            foreach (E pat in pats)
            {
                ICollection <string> ids = GetFileSentIds(pat.GetRelevantWords());
                Redwood.Log(ConstantsAndVariables.extremedebug, "For pattern with index " + pat + " extracted the following sentences from the index " + ids);
                sents[pat] = ids;
            }
            return(sents);
        }
Ejemplo n.º 17
0
 //return ratioGoogleNgramFreqWithDataFreq;
 //  public static void loadGoogleNGrams() {
 //    if (googleNGram == null || googleNGram.size() == 0) {
 //      for (String line : IOUtils.readLines(googleNGramsFile)) {
 //        String[] t = line.split("\t");
 //        googleNGram.setCount(t[0], Double.valueOf(t[1]));
 //      }
 //      Redwood.log(ConstantsAndVariables.minimaldebug, "Data", "loading freq from google ngram file " + googleNGramsFile);
 //    }
 //  }
 public static void LoadDomainNGrams()
 {
     System.Diagnostics.Debug.Assert((domainNGramsFile != null));
     if (domainNGramRawFreq == null || domainNGramRawFreq.Size() == 0)
     {
         foreach (string line in IOUtils.ReadLines(domainNGramsFile))
         {
             string[] t = line.Split("\t");
             domainNGramRawFreq.SetCount(t[0], double.ValueOf(t[1]));
         }
         Redwood.Log(ConstantsAndVariables.minimaldebug, "Data", "loading freq from domain ngram file " + domainNGramsFile);
     }
 }
Ejemplo n.º 18
0
            public void Process(int id, Document document)
            {
                writerGold.Print(CorefPrinter.PrintConllOutput(document, true));
                writerBeforeCoref.Print(CorefPrinter.PrintConllOutput(document, false));
                long time = Runtime.CurrentTimeMillis();

                this._enclosing.corefAlgorithm.RunCoref(document);
                if (this._enclosing.verbose)
                {
                    Redwood.Log(this.GetName(), "Coref took " + (Runtime.CurrentTimeMillis() - time) / 1000.0 + "s");
                }
                CorefUtils.RemoveSingletonClusters(document);
                writerAfterCoref.Print(CorefPrinter.PrintConllOutput(document, false, true));
            }
        private void TrainPolicy(IList <IList <Pair <Clusterer.CandidateAction, Clusterer.CandidateAction> > > examples)
        {
            IList <Pair <Clusterer.CandidateAction, Clusterer.CandidateAction> > flattenedExamples = new List <Pair <Clusterer.CandidateAction, Clusterer.CandidateAction> >();

            examples.Stream().ForEach(null);
            for (int epoch = 0; epoch < NumEpochs; epoch++)
            {
                Java.Util.Collections.Shuffle(flattenedExamples, random);
                flattenedExamples.ForEach(null);
            }
            double totalCost = flattenedExamples.Stream().MapToDouble(null).Sum();

            Redwood.Log("scoref.train", string.Format("Training cost: %.4f", 100 * totalCost / flattenedExamples.Count));
        }
Ejemplo n.º 20
0
 public override bool Save(string dir)
 {
     try
     {
         IOUtils.EnsureDir(new File(dir));
         string f = dir + "/allpatterns.ser";
         IOUtils.WriteObjectToFile(this.patternsForEachToken, f);
         Redwood.Log(Redwood.Dbg, "Saving the patterns to " + f);
     }
     catch (IOException e)
     {
         throw new Exception(e);
     }
     return(true);
 }
Ejemplo n.º 21
0
        public virtual void PrintWeightVector(PrintWriter writer)
        {
            SortedDictionary <string, double> sortedWeights = GetWeightVector();

            foreach (KeyValuePair <string, double> e in sortedWeights)
            {
                if (writer == null)
                {
                    Redwood.Log("scoref.train", e.Key + " => " + e.Value);
                }
                else
                {
                    writer.Println(e.Key + " => " + e.Value);
                }
            }
        }
 internal virtual void SetIndexWriter()
 {
     try
     {
         if (indexWriter == null)
         {
             dir = FSDirectory.Open(indexDir);
             Redwood.Log(Redwood.Dbg, "Updating lucene index at " + indexDir);
             indexWriter = new IndexWriter(dir, iwc);
         }
     }
     catch (IOException e)
     {
         throw new Exception(e);
     }
 }
 public virtual void Annotate(Annotation annotation)
 {
     if (verbose)
     {
         Redwood.Log(Redwood.Dbg, "Adding TokensRegexAnnotator annotation...");
     }
     if (options.setTokenOffsets)
     {
         AddTokenOffsets(annotation);
     }
     // just do nothing if no extractor is specified
     if (extractor != null)
     {
         IList <ICoreMap> allMatched;
         if (annotation.ContainsKey(typeof(CoreAnnotations.SentencesAnnotation)))
         {
             allMatched = new List <ICoreMap>();
             IList <ICoreMap> sentences = annotation.Get(typeof(CoreAnnotations.SentencesAnnotation));
             foreach (ICoreMap sentence in sentences)
             {
                 IList <ICoreMap> matched = Extract(sentence);
                 if (matched != null && options.matchedExpressionsAnnotationKey != null)
                 {
                     Sharpen.Collections.AddAll(allMatched, matched);
                     sentence.Set(options.matchedExpressionsAnnotationKey, matched);
                     foreach (ICoreMap cm in matched)
                     {
                         cm.Set(typeof(CoreAnnotations.SentenceIndexAnnotation), sentence.Get(typeof(CoreAnnotations.SentenceIndexAnnotation)));
                     }
                 }
             }
         }
         else
         {
             allMatched = Extract(annotation);
         }
         if (options.matchedExpressionsAnnotationKey != null)
         {
             annotation.Set(options.matchedExpressionsAnnotationKey, allMatched);
         }
     }
     if (verbose)
     {
         Redwood.Log(Redwood.Dbg, "done.");
     }
 }
Ejemplo n.º 24
0
        /// <summary>A helper function for dumping the accuracy of the trained classifier.</summary>
        /// <param name="classifier">The classifier to evaluate.</param>
        /// <param name="dataset">The dataset to evaluate the classifier on.</param>
        public static void DumpAccuracy(IClassifier <ClauseSplitter.ClauseClassifierLabel, string> classifier, GeneralDataset <ClauseSplitter.ClauseClassifierLabel, string> dataset)
        {
            DecimalFormat df = new DecimalFormat("0.00%");

            Redwood.Log("size:         " + dataset.Size());
            Redwood.Log("split count:  " + StreamSupport.Stream(dataset.Spliterator(), false).Filter(null).Collect(Collectors.ToList()).Count);
            Redwood.Log("interm count: " + StreamSupport.Stream(dataset.Spliterator(), false).Filter(null).Collect(Collectors.ToList()).Count);
            Pair <double, double> pr = classifier.EvaluatePrecisionAndRecall(dataset, ClauseSplitter.ClauseClassifierLabel.ClauseSplit);

            Redwood.Log("p  (split):   " + df.Format(pr.first));
            Redwood.Log("r  (split):   " + df.Format(pr.second));
            Redwood.Log("f1 (split):   " + df.Format(2 * pr.first * pr.second / (pr.first + pr.second)));
            pr = classifier.EvaluatePrecisionAndRecall(dataset, ClauseSplitter.ClauseClassifierLabel.ClauseInterm);
            Redwood.Log("p  (interm):  " + df.Format(pr.first));
            Redwood.Log("r  (interm):  " + df.Format(pr.second));
            Redwood.Log("f1 (interm):  " + df.Format(2 * pr.first * pr.second / (pr.first + pr.second)));
        }
Ejemplo n.º 25
0
        public virtual void ApplyPats(ICounter <E> patterns, string label, TwoDimensionalCounter <CandidatePhrase, E> wordsandLemmaPatExtracted, CollectionValuedMap <E, Triple <string, int, int> > matchedTokensByPat, ICollection <CandidatePhrase> alreadyLabeledWords
                                      )
        {
            //   Counter<E> patternsLearnedThisIterConsistsOnlyGeneralized = new ClassicCounter<E>();
            //   Counter<E> patternsLearnedThisIterRest = new ClassicCounter<E>();
            //    Set<String> specialWords = constVars.invertedIndex.getSpecialWordsList();
            foreach (KeyValuePair <string, Env> en in constVars.env)
            {
                en.Value.GetVariables().PutAll(ConstantsAndVariables.globalEnv.GetVariables());
            }
            IDictionary <E, IDictionary <string, DataInstance> > sentencesForPatterns = GetSentences(constVars.invertedIndex.QueryIndex(patterns.KeySet()));

            foreach (KeyValuePair <E, IDictionary <string, DataInstance> > en_1 in sentencesForPatterns)
            {
                RunParallelApplyPats(en_1.Value, label, en_1.Key, wordsandLemmaPatExtracted, matchedTokensByPat, alreadyLabeledWords);
            }
            Redwood.Log(Redwood.Dbg, "# words/lemma and pattern pairs are " + wordsandLemmaPatExtracted.Size());
        }
Ejemplo n.º 26
0
        protected internal static void ExtractPremarkedEntityMentions(ICoreMap s, IList <Mention> mentions, ICollection <IntPair> mentionSpanSet, ICollection <IntPair> namedEntitySpanSet)
        {
            IList <CoreLabel> sent               = s.Get(typeof(CoreAnnotations.TokensAnnotation));
            SemanticGraph     basicDependency    = s.Get(typeof(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation));
            SemanticGraph     enhancedDependency = s.Get(typeof(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation));

            if (enhancedDependency == null)
            {
                enhancedDependency = s.Get(typeof(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation));
            }
            int beginIndex = -1;

            foreach (CoreLabel w in sent)
            {
                MultiTokenTag t = w.Get(typeof(CoreAnnotations.MentionTokenAnnotation));
                if (t != null)
                {
                    // Part of a mention
                    if (t.IsStart())
                    {
                        // Start of mention
                        beginIndex = w.Get(typeof(CoreAnnotations.IndexAnnotation)) - 1;
                    }
                    if (t.IsEnd())
                    {
                        // end of mention
                        int endIndex = w.Get(typeof(CoreAnnotations.IndexAnnotation));
                        if (beginIndex >= 0)
                        {
                            IntPair mSpan          = new IntPair(beginIndex, endIndex);
                            int     dummyMentionId = -1;
                            Mention m = new Mention(dummyMentionId, beginIndex, endIndex, sent, basicDependency, enhancedDependency, new List <CoreLabel>(sent.SubList(beginIndex, endIndex)));
                            mentions.Add(m);
                            mentionSpanSet.Add(mSpan);
                            beginIndex = -1;
                        }
                        else
                        {
                            Redwood.Log("Start of marked mention not found in sentence: " + t + " at tokenIndex=" + (w.Get(typeof(CoreAnnotations.IndexAnnotation)) - 1) + " for " + s.Get(typeof(CoreAnnotations.TextAnnotation)));
                        }
                    }
                }
            }
        }
 /// <summary>Simple test case.</summary>
 public static void Main(string[] args)
 {
     if (args.Length > 0 && args[0].Equals("redwood"))
     {
         Redwood.Log(Redwood.Dbg, "at the top");
         Redwood.StartTrack("Adaptor test controlled by redwood");
         Logger topLogger = Logger.GetLogger(Logger.GlobalLoggerName);
         topLogger.Warning("I'm warning you!");
         topLogger.Severe("Now I'm using my severe voice.");
         topLogger.Info("FYI");
         Redwood.Log(Redwood.Dbg, "adapting");
         JavaUtilLoggingAdaptor.Adapt();
         topLogger.Warning("I'm warning you in Redwood!");
         JavaUtilLoggingAdaptor.Adapt();
         // should be safe to call this twice
         topLogger.Severe("Now I'm using my severe voice in Redwood!");
         topLogger.Info("FYI: Redwood rocks");
         // make sure original java.util.logging levels are respected
         topLogger.SetLevel(Level.Off);
         topLogger.Severe("We shouldn't see this message.");
         Redwood.Log(Redwood.Dbg, "at the bottom");
         Redwood.EndTrack("Adaptor test controlled by redwood");
     }
     else
     {
         // Reverse mapping
         Logger topLogger = Logger.GetLogger(Logger.GlobalLoggerName);
         // Can be Logger.getGlobal() in jdk1.7
         // topLogger.addHandler(new ConsoleHandler());
         Logger logger = Logger.GetLogger(typeof(JavaUtilLoggingAdaptor).FullName);
         topLogger.Info("Starting test");
         logger.Log(Level.Info, "Hello from the class logger");
         Redwood.Log("Hello from Redwood!");
         Redwood.RootHandler().AddChild(RedirectOutputHandler.FromJavaUtilLogging(topLogger));
         Redwood.Log("Hello from Redwood -> Java!");
         Redwood.Log("Hello from Redwood -> Java again!");
         logger.Log(Level.Info, "Hello again from the class logger");
         Redwood.StartTrack("a track");
         Redwood.Log("Inside a track");
         logger.Log(Level.Info, "Hello a third time from the class logger");
         Redwood.EndTrack("a track");
         logger.Log(Level.Info, "Hello a fourth time from the class logger");
     }
 }
Ejemplo n.º 28
0
        public static IList <Pair <Example, IDictionary <int, CompressedFeatureVector> > > GetAnaphoricityExamples(IList <DocumentExamples> documents)
        {
            int p = 0;
            int t = 0;
            IList <Pair <Example, IDictionary <int, CompressedFeatureVector> > > examples = new List <Pair <Example, IDictionary <int, CompressedFeatureVector> > >();

            while (!documents.IsEmpty())
            {
                DocumentExamples        doc          = documents.Remove(documents.Count - 1);
                IDictionary <int, bool> areAnaphoric = new Dictionary <int, bool>();
                foreach (Example e in doc.examples)
                {
                    bool isAnaphoric = areAnaphoric[e.mentionId2];
                    if (isAnaphoric == null)
                    {
                        areAnaphoric[e.mentionId2] = false;
                    }
                    if (e.label == 1)
                    {
                        areAnaphoric[e.mentionId2] = true;
                    }
                }
                foreach (KeyValuePair <int, bool> e_1 in areAnaphoric)
                {
                    if (e_1.Value)
                    {
                        p++;
                    }
                    t++;
                }
                foreach (Example e_2 in doc.examples)
                {
                    bool isAnaphoric = areAnaphoric[e_2.mentionId2];
                    if (isAnaphoric != null)
                    {
                        Sharpen.Collections.Remove(areAnaphoric, e_2.mentionId2);
                        examples.Add(new Pair <Example, IDictionary <int, CompressedFeatureVector> >(new Example(e_2, isAnaphoric), doc.mentionFeatures));
                    }
                }
            }
            Redwood.Log("scoref-train", "Num anaphoricity examples " + p + " positive, " + t + " total");
            return(examples);
        }
        private double EvaluatePolicy(IList <ClustererDataLoader.ClustererDoc> docs, bool training)
        {
            isTraining = 0;
            EvalUtils.B3Evaluator evaluator = new EvalUtils.B3Evaluator();
            foreach (ClustererDataLoader.ClustererDoc doc in docs)
            {
                Clusterer.State currentState = new Clusterer.State(doc);
                while (!currentState.IsComplete())
                {
                    currentState.DoBestAction(classifier);
                }
                currentState.UpdateEvaluator(evaluator);
            }
            isTraining = 1;
            double score = evaluator.GetF1();

            Redwood.Log("scoref.train", string.Format("B3 F1 score on %s: %.4f", training ? "train" : "validate", score));
            return(score);
        }
 //
 //  @Override
 //  public void finishUpdating() {
 //    if(indexWriter != null){
 //      try {
 //        indexWriter.commit();
 //      } catch (IOException e) {
 //        throw new RuntimeException(e);
 //      }
 //    }
 //    closeIndexWriter();
 //  }
 //
 //  @Override
 //  public void update(List<CoreLabel> tokens, String sentid) {
 //    try {
 //      setIndexWriter();
 //      indexWriter.deleteDocuments(new TermQuery(new Term("sentid",sentid)));
 //      add(tokens, sentid);
 //    } catch (IOException e) {
 //      throw new RuntimeException(e);
 //    }
 //
 //  }
 internal static void SetIndexWriter()
 {
     lock (typeof(PatternsForEachTokenLucene))
     {
         try
         {
             if (!openIndexWriter.Get())
             {
                 dir = FSDirectory.Open(indexDir);
                 Redwood.Log(Redwood.Dbg, "Updating lucene index at " + indexDir);
                 indexWriter = new IndexWriter(dir, iwc);
                 openIndexWriter.Set(true);
             }
         }
         catch (IOException e)
         {
             throw new Exception(e);
         }
     }
 }