//line is a jsonstring of map of label to array of strings; ex: {"name":["Bush","Carter","Obama"]}
        /// <exception cref="System.Exception"/>
        public virtual string DoNewPhrases(string line)
        {
            System.Console.Out.WriteLine("adding new phrases");
            ConstantsAndVariables constVars  = new ConstantsAndVariables(props, humanLabelClasses.Keys, humanLabelClasses);
            IJsonReader           jsonReader = Javax.Json.Json.CreateReader(new StringReader(line));
            IJsonObject           objarr     = jsonReader.ReadObject();

            foreach (KeyValuePair <string, IJsonValue> o in objarr)
            {
                string label = o.Key;
                ICollection <CandidatePhrase> seed = new HashSet <CandidatePhrase>();
                IJsonArray arr = objarr.GetJsonArray(o.Key);
                for (int i = 0; i < arr.Count; i++)
                {
                    string seedw = arr.GetString(i);
                    System.Console.Out.WriteLine("adding " + seedw + " to seed ");
                    seed.Add(CandidatePhrase.CreateOrGet(seedw));
                }
                Sharpen.Collections.AddAll(seedWords[label], seed);
                constVars.AddSeedWords(label, seed);
                GetPatternsFromDataMultiClass.RunLabelSeedWords(Data.sents, humanLabelClasses[label], label, seed, constVars, false);
            }
            //model.labelWords(label, labelclass, Data.sents, seed);
            return("SUCCESS added new phrases");
        }
Пример #2
0
        /// <summary>Parse a JSON formatted tree into a SemanticGraph.</summary>
        /// <param name="jsonString">
        /// The JSON string tree to parse, e.g:
        /// "[{\"\"dependent\"\": 7, \"\"dep\"\": \"\"root\"\", \"\"governorgloss\"\": \"\"root\"\", \"\"governor\"\": 0, \"\"dependentgloss\"\": \"\"sport\"\"}, {\"\"dependent\"\": 1, \"\"dep\"\": \"\"nsubj\"\", \"\"governorgloss\"\": \"\"sport\"\", \"\"governor\"\": 7, \"\"dependentgloss\"\": \"\"chess\"\"}, {\"\"dependent\"\": 2, \"\"dep\"\": \"\"cop\"\", \"\"governorgloss\"\": \"\"sport\"\", \"\"governor\"\": 7, \"\"dependentgloss\"\": \"\"is\"\"}, {\"\"dependent\"\": 3, \"\"dep\"\": \"\"neg\"\", \"\"governorgloss\"\": \"\"sport\"\", \"\"governor\"\": 7, \"\"dependentgloss\"\": \"\"not\"\"}, {\"\"dependent\"\": 4, \"\"dep\"\": \"\"det\"\", \"\"governorgloss\"\": \"\"sport\"\", \"\"governor\"\": 7, \"\"dependentgloss\"\": \"\"a\"\"}, {\"\"dependent\"\": 5, \"\"dep\"\": \"\"advmod\"\", \"\"governorgloss\"\": \"\"physical\"\", \"\"governor\"\": 6, \"\"dependentgloss\"\": \"\"predominantly\"\"}, {\"\"dependent\"\": 6, \"\"dep\"\": \"\"amod\"\", \"\"governorgloss\"\": \"\"sport\"\", \"\"governor\"\": 7, \"\"dependentgloss\"\": \"\"physical\"\"}, {\"\"dependent\"\": 9, \"\"dep\"\": \"\"advmod\"\", \"\"governorgloss\"\": \"\"sport\"\", \"\"governor\"\": 7, \"\"dependentgloss\"\": \"\"yet\"\"}, {\"\"dependent\"\": 10, \"\"dep\"\": \"\"nsubj\"\", \"\"governorgloss\"\": \"\"shooting\"\", \"\"governor\"\": 12, \"\"dependentgloss\"\": \"\"neither\"\"}, {\"\"dependent\"\": 11, \"\"dep\"\": \"\"cop\"\", \"\"governorgloss\"\": \"\"shooting\"\", \"\"governor\"\": 12, \"\"dependentgloss\"\": \"\"are\"\"}, {\"\"dependent\"\": 12, \"\"dep\"\": \"\"parataxis\"\", \"\"governorgloss\"\": \"\"sport\"\", \"\"governor\"\": 7, \"\"dependentgloss\"\": \"\"shooting\"\"}, {\"\"dependent\"\": 13, \"\"dep\"\": \"\"cc\"\", \"\"governorgloss\"\": \"\"shooting\"\", \"\"governor\"\": 12, \"\"dependentgloss\"\": \"\"and\"\"}, {\"\"dependent\"\": 14, \"\"dep\"\": \"\"parataxis\"\", \"\"governorgloss\"\": \"\"sport\"\", \"\"governor\"\": 7, \"\"dependentgloss\"\": \"\"curling\"\"}, {\"\"dependent\"\": 14, \"\"dep\"\": \"\"conj:and\"\", \"\"governorgloss\"\": \"\"shooting\"\", \"\"governor\"\": 12, \"\"dependentgloss\"\": \"\"curling\"\"}, {\"\"dependent\"\": 16, \"\"dep\"\": \"\"nsubjpass\"\", \"\"governorgloss\"\": \"\"nicknamed\"\", \"\"governor\"\": 23, \"\"dependentgloss\"\": \"\"which\"\"}, {\"\"dependent\"\": 18, \"\"dep\"\": \"\"case\"\", \"\"governorgloss\"\": \"\"fact\"\", \"\"governor\"\": 19, \"\"dependentgloss\"\": \"\"in\"\"}, {\"\"dependent\"\": 19, \"\"dep\"\": \"\"nmod:in\"\", \"\"governorgloss\"\": \"\"nicknamed\"\", \"\"governor\"\": 23, \"\"dependentgloss\"\": \"\"fact\"\"}, {\"\"dependent\"\": 21, \"\"dep\"\": \"\"aux\"\", \"\"governorgloss\"\": \"\"nicknamed\"\", \"\"governor\"\": 23, \"\"dependentgloss\"\": \"\"has\"\"}, {\"\"dependent\"\": 22, \"\"dep\"\": \"\"auxpass\"\", \"\"governorgloss\"\": \"\"nicknamed\"\", \"\"governor\"\": 23, \"\"dependentgloss\"\": \"\"been\"\"}, {\"\"dependent\"\": 23, \"\"dep\"\": \"\"dep\"\", \"\"governorgloss\"\": \"\"shooting\"\", \"\"governor\"\": 12, \"\"dependentgloss\"\": \"\"nicknamed\"\"}, {\"\"dependent\"\": 25, \"\"dep\"\": \"\"dobj\"\", \"\"governorgloss\"\": \"\"nicknamed\"\", \"\"governor\"\": 23, \"\"dependentgloss\"\": \"\"chess\"\"}, {\"\"dependent\"\": 26, \"\"dep\"\": \"\"case\"\", \"\"governorgloss\"\": \"\"ice\"\", \"\"governor\"\": 27, \"\"dependentgloss\"\": \"\"on\"\"}, {\"\"dependent\"\": 27, \"\"dep\"\": \"\"nmod:on\"\", \"\"governorgloss\"\": \"\"chess\"\", \"\"governor\"\": 25, \"\"dependentgloss\"\": \"\"ice\"\"}, {\"\"dependent\"\": 29, \"\"dep\"\": \"\"amod\"\", \"\"governorgloss\"\": \"\"chess\"\", \"\"governor\"\": 25, \"\"dependentgloss\"\": \"\"5\"\"}]");
        /// </param>
        /// <param name="tokens">The tokens of the sentence, to form the backing labels of the tree.</param>
        /// <returns>A semantic graph of the sentence, according to the given tree.</returns>
        public static SemanticGraph ParseJsonTree(string jsonString, IList <CoreLabel> tokens)
        {
            // Escape quoted string parts
            IJsonReader   json  = Javax.Json.Json.CreateReader(new StringReader(jsonString));
            SemanticGraph tree  = new SemanticGraph();
            IJsonArray    array = json.ReadArray();

            if (array == null || array.IsEmpty())
            {
                return(tree);
            }
            IndexedWord[] vertices = new IndexedWord[tokens.Count + 2];
            // Add edges
            for (int i = 0; i < array.Count; i++)
            {
                IJsonObject entry = array.GetJsonObject(i);
                // Parse row
                int dependentIndex = entry.GetInt("dependent");
                if (vertices[dependentIndex] == null)
                {
                    if (dependentIndex > tokens.Count)
                    {
                        // Bizarre mismatch in sizes; the malt parser seems to do this often
                        return(new SemanticGraph());
                    }
                    vertices[dependentIndex] = new IndexedWord(tokens[dependentIndex - 1]);
                }
                IndexedWord dependent     = vertices[dependentIndex];
                int         governorIndex = entry.GetInt("governor");
                if (governorIndex > tokens.Count)
                {
                    // Bizarre mismatch in sizes; the malt parser seems to do this often
                    return(new SemanticGraph());
                }
                if (vertices[governorIndex] == null && governorIndex > 0)
                {
                    vertices[governorIndex] = new IndexedWord(tokens[governorIndex - 1]);
                }
                IndexedWord governor = vertices[governorIndex];
                string      relation = entry.GetString("dep");
                // Process row
                if (governorIndex == 0)
                {
                    tree.AddRoot(dependent);
                }
                else
                {
                    tree.AddVertex(dependent);
                    if (!tree.ContainsVertex(governor))
                    {
                        tree.AddVertex(governor);
                    }
                    if (!"ref".Equals(relation))
                    {
                        tree.AddEdge(governor, dependent, GrammaticalRelation.ValueOf(Language.English, relation), double.NegativeInfinity, false);
                    }
                }
            }
            return(tree);
        }
        private DataTable ConvertJsonArrayToDataTable(IJsonArray array)
        {
            var columnMap = array.OfType<IJsonObject>()
                .Select(obj => obj.Keys)
                .SelectMany(keys => keys)
                .Distinct()
                .Select(
                    (key, index) =>
                    new {Key = key, Index = index})
                .ToDictionary(t => t.Key, t => t.Index);

            var dt = new DataTable();
            dt.Columns.AddRange(columnMap
                                    .OrderBy(kv => kv.Value)
                                    .Select(kv => new DataColumn(kv.Key))
                                    .ToArray());

            foreach (var @object in array.OfType<IJsonObject>())
            {
                var data = new object[columnMap.Count];
                foreach (var kv in @object)
                {
                    data[columnMap[kv.Key]] =
                        kv.Value.ToObject();
                }
                dt.Rows.Add(data);
            }
            return dt;
        }
Пример #4
0
 public override bool VisitArray(IJsonArray value)
 {
     if (_value.Count == 0)
     {
         return true;
     }
     return _value.Select(elem => new JsonFilter(elem)).All(filter => value.Any(filter.Matches));
 }
Пример #5
0
        public void GetValuesTest()
        {
            string    json     = File.ReadAllText("TestJsonParser/Files/PersonInfo.json");
            IJsonItem jsonItem = new JsonParser(LanguageMatchEngine).Parse(json);

            IJsonObject personInfo   = (IJsonObject)jsonItem;
            IJsonObject address      = (IJsonObject)personInfo["address"];
            IJsonArray  phoneNumbers = (IJsonArray)personInfo["phoneNumber"];
            IJsonObject faxNumber2   = (IJsonObject)phoneNumbers[1];

            Assert.Equal("Smith", ((IJsonValue)personInfo["lastName"]).Value);
            Assert.Equal(25d, ((IJsonValue)personInfo["age"]).Value);
            Assert.True((bool)((IJsonValue)address["owner"]).Value);
            Assert.Equal("646 555-4567", ((IJsonValue)faxNumber2["number"]).Value);
        }
        private int ChangeAnnotation(string line, bool remove)
        {
            int         tokensNum  = 0;
            IJsonReader jsonReader = Javax.Json.Json.CreateReader(new StringReader(line));
            IJsonObject objarr     = jsonReader.ReadObject();

            foreach (string label in objarr.Keys)
            {
                IJsonObject obj4label = objarr.GetJsonObject(label);
                foreach (string sentid in obj4label.Keys)
                {
                    IJsonArray tokenArry = obj4label.GetJsonArray(sentid);
                    foreach (IJsonValue tokenid in tokenArry)
                    {
                        tokensNum++;
                        Data.sents[sentid].GetTokens()[System.Convert.ToInt32(tokenid.ToString())].Set(humanLabelClasses[label], remove ? backgroundSymbol : label);
                    }
                }
            }
            return(tokensNum);
        }
        /// <exception cref="System.IO.IOException"/>
        /// <exception cref="System.TypeLoadException"/>
        internal virtual void SetUpProperties(string line, bool readFile, bool writeOutputToFile, string additionalSeedWordsFiles)
        {
            IJsonReader jsonReader = Javax.Json.Json.CreateReader(new StringReader(line));
            IJsonObject objarr     = jsonReader.ReadObject();

            jsonReader.Close();
            Properties props = new Properties();

            foreach (string o in objarr.Keys)
            {
                if (o.Equals("seedWords"))
                {
                    IJsonObject obj = objarr.GetJsonObject(o);
                    foreach (string st in obj.Keys)
                    {
                        seedWords[st] = new HashSet <CandidatePhrase>();
                        IJsonArray arr = obj.GetJsonArray(st);
                        for (int i = 0; i < arr.Count; i++)
                        {
                            string val = arr.GetString(i);
                            seedWords[st].Add(CandidatePhrase.CreateOrGet(val));
                            System.Console.Out.WriteLine("adding " + val + " for label " + st);
                        }
                    }
                }
                else
                {
                    props.SetProperty(o, objarr.GetString(o));
                }
            }
            System.Console.Out.WriteLine("seedwords are " + seedWords);
            if (additionalSeedWordsFiles != null && !additionalSeedWordsFiles.IsEmpty())
            {
                IDictionary <string, ICollection <CandidatePhrase> > additionalSeedWords = GetPatternsFromDataMultiClass.ReadSeedWords(additionalSeedWordsFiles);
                logger.Info("additional seed words are " + additionalSeedWords);
                foreach (string label in seedWords.Keys)
                {
                    if (additionalSeedWords.Contains(label))
                    {
                        Sharpen.Collections.AddAll(seedWords[label], additionalSeedWords[label]);
                    }
                }
            }
            outputFile = null;
            if (readFile)
            {
                System.Console.Out.WriteLine("input value is " + objarr.GetString("input"));
                outputFile = props.GetProperty("input") + "_processed";
                props.SetProperty("file", objarr.GetString("input"));
                if (writeOutputToFile && !props.Contains("columnOutputFile"))
                {
                    props.SetProperty("columnOutputFile", outputFile);
                }
            }
            else
            {
                string systemdir = Runtime.GetProperty("java.io.tmpdir");
                File   tempFile  = File.CreateTempFile("sents", ".tmp", new File(systemdir));
                tempFile.DeleteOnExit();
                IOUtils.WriteStringToFile(props.GetProperty("input"), tempFile.GetPath(), "utf8");
                props.SetProperty("file", tempFile.GetAbsolutePath());
            }
            SetProperties(props);
            this.props = props;
            int i_1 = 1;

            foreach (string label_1 in seedWords.Keys)
            {
                string ansclstr = "edu.stanford.nlp.patterns.PatternsAnnotations$PatternLabel" + i_1;
                Type   mcCl     = (Type)Sharpen.Runtime.GetType(ansclstr);
                machineAnswerClasses[label_1] = mcCl;
                string humanansclstr = "edu.stanford.nlp.patterns.PatternsAnnotations$PatternHumanLabel" + i_1;
                humanLabelClasses[label_1] = (Type)Sharpen.Runtime.GetType(humanansclstr);
                i_1++;
            }
        }
Пример #8
0
 public Matcher VisitArray(IJsonArray value)
 {
     return(new ArrayMatcher(value, this));
 }
Пример #9
0
 public override bool VisitArray(IJsonArray value)
 {
     return (Value.Count == value.Count)
            && Value.Zip(value, (a, b) => b.Visit(a.Visit(_matcherFactory))).All(b => b);
 }
Пример #10
0
 public ArrayProxy(IJsonArray jsonArray)
 {
     JsonArray = jsonArray ?? throw new ArgumentNullException(nameof(jsonArray));
 }
Пример #11
0
 public ArrayComparerVisitor(IJsonArray value)
 {
     _value = value;
 }
Пример #12
0
 public ArrayMatcher(IJsonArray value, MatcherFactory matcherFactory)
 {
     _matcherFactory = matcherFactory;
     Value           = value;
 }
Пример #13
0
 public override bool VisitArray(IJsonArray value)
 {
     return((Value.Count == value.Count) &&
            Value.Zip(value, (a, b) => b.Visit(a.Visit(_matcherFactory))).All(b => b));
 }
Пример #14
0
 public virtual bool VisitArray(IJsonArray value)
 {
     return(value.Any(v => v.Visit(this)));
 }
Пример #15
0
 public IEnumerable <IndexValue> VisitArray(IJsonArray value)
 {
     return(from elem in value
            from indexValue in elem.Visit(this)
            select indexValue);
 }
Пример #16
0
        /// <exception cref="System.IO.IOException"/>
        /// <exception cref="System.TypeLoadException"/>
        private ICounter <CandidatePhrase> LearnNewPhrasesPrivate(string label, PatternsForEachToken patternsForEachToken, ICounter <E> patternsLearnedThisIter, ICounter <E> allSelectedPatterns, ICollection <CandidatePhrase> alreadyIdentifiedWords, CollectionValuedMap
                                                                  <E, Triple <string, int, int> > matchedTokensByPat, ICounter <CandidatePhrase> scoreForAllWordsThisIteration, TwoDimensionalCounter <CandidatePhrase, E> terms, TwoDimensionalCounter <CandidatePhrase, E> wordsPatExtracted, TwoDimensionalCounter <E
                                                                                                                                                                                                                                                                                                                       , CandidatePhrase> patternsAndWords4Label, string identifier, ICollection <CandidatePhrase> ignoreWords, bool computeProcDataFreq)
        {
            ICollection <CandidatePhrase> alreadyLabeledWords = new HashSet <CandidatePhrase>();

            if (constVars.doNotApplyPatterns)
            {
                // if want to get the stats by the lossy way of just counting without
                // applying the patterns
                ConstantsAndVariables.DataSentsIterator sentsIter = new ConstantsAndVariables.DataSentsIterator(constVars.batchProcessSents);
                while (sentsIter.MoveNext())
                {
                    Pair <IDictionary <string, DataInstance>, File> sentsf = sentsIter.Current;
                    this.StatsWithoutApplyingPatterns(sentsf.First(), patternsForEachToken, patternsLearnedThisIter, wordsPatExtracted);
                }
            }
            else
            {
                if (patternsLearnedThisIter.Size() > 0)
                {
                    this.ApplyPats(patternsLearnedThisIter, label, wordsPatExtracted, matchedTokensByPat, alreadyLabeledWords);
                }
            }
            if (computeProcDataFreq)
            {
                if (!phraseScorer.wordFreqNorm.Equals(PhraseScorer.Normalization.None))
                {
                    Redwood.Log(Redwood.Dbg, "computing processed freq");
                    foreach (KeyValuePair <CandidatePhrase, double> fq in Data.rawFreq.EntrySet())
                    {
                        double @in = fq.Value;
                        if (phraseScorer.wordFreqNorm.Equals(PhraseScorer.Normalization.Sqrt))
                        {
                            @in = Math.Sqrt(@in);
                        }
                        else
                        {
                            if (phraseScorer.wordFreqNorm.Equals(PhraseScorer.Normalization.Log))
                            {
                                @in = 1 + Math.Log(@in);
                            }
                            else
                            {
                                throw new Exception("can't understand the normalization");
                            }
                        }
                        System.Diagnostics.Debug.Assert(!double.IsNaN(@in), "Why is processed freq nan when rawfreq is " + @in);
                        Data.processedDataFreq.SetCount(fq.Key, @in);
                    }
                }
                else
                {
                    Data.processedDataFreq = Data.rawFreq;
                }
            }
            if (constVars.wordScoring.Equals(GetPatternsFromDataMultiClass.WordScoring.Weightednorm))
            {
                foreach (CandidatePhrase en in wordsPatExtracted.FirstKeySet())
                {
                    if (!constVars.GetOtherSemanticClassesWords().Contains(en) && (en.GetPhraseLemma() == null || !constVars.GetOtherSemanticClassesWords().Contains(CandidatePhrase.CreateOrGet(en.GetPhraseLemma()))) && !alreadyLabeledWords.Contains(en))
                    {
                        terms.AddAll(en, wordsPatExtracted.GetCounter(en));
                    }
                }
                RemoveKeys(terms, ConstantsAndVariables.GetStopWords());
                ICounter <CandidatePhrase> phraseScores = phraseScorer.ScorePhrases(label, terms, wordsPatExtracted, allSelectedPatterns, alreadyIdentifiedWords, false);
                System.Console.Out.WriteLine("count for word U.S. is " + phraseScores.GetCount(CandidatePhrase.CreateOrGet("U.S.")));
                ICollection <CandidatePhrase> ignoreWordsAll;
                if (ignoreWords != null && !ignoreWords.IsEmpty())
                {
                    ignoreWordsAll = CollectionUtils.UnionAsSet(ignoreWords, constVars.GetOtherSemanticClassesWords());
                }
                else
                {
                    ignoreWordsAll = new HashSet <CandidatePhrase>(constVars.GetOtherSemanticClassesWords());
                }
                Sharpen.Collections.AddAll(ignoreWordsAll, constVars.GetSeedLabelDictionary()[label]);
                Sharpen.Collections.AddAll(ignoreWordsAll, constVars.GetLearnedWords(label).KeySet());
                System.Console.Out.WriteLine("ignoreWordsAll contains word U.S. is " + ignoreWordsAll.Contains(CandidatePhrase.CreateOrGet("U.S.")));
                ICounter <CandidatePhrase> finalwords = ChooseTopWords(phraseScores, terms, phraseScores, ignoreWordsAll, constVars.thresholdWordExtract);
                phraseScorer.PrintReasonForChoosing(finalwords);
                scoreForAllWordsThisIteration.Clear();
                Counters.AddInPlace(scoreForAllWordsThisIteration, phraseScores);
                Redwood.Log(ConstantsAndVariables.minimaldebug, "\n\n## Selected Words for " + label + " : " + Counters.ToSortedString(finalwords, finalwords.Size(), "%1$s:%2$.2f", "\t"));
                if (constVars.goldEntities != null)
                {
                    IDictionary <string, bool> goldEntities4Label = constVars.goldEntities[label];
                    if (goldEntities4Label != null)
                    {
                        StringBuilder s = new StringBuilder();
                        finalwords.KeySet().Stream().ForEach(null);
                        Redwood.Log(ConstantsAndVariables.minimaldebug, "\n\n## Gold labels for selected words for label " + label + " : " + s.ToString());
                    }
                    else
                    {
                        Redwood.Log(Redwood.Dbg, "No gold entities provided for label " + label);
                    }
                }
                if (constVars.outDir != null && !constVars.outDir.IsEmpty())
                {
                    string outputdir = constVars.outDir + "/" + identifier + "/" + label;
                    IOUtils.EnsureDir(new File(outputdir));
                    TwoDimensionalCounter <CandidatePhrase, CandidatePhrase> reasonForWords = new TwoDimensionalCounter <CandidatePhrase, CandidatePhrase>();
                    foreach (CandidatePhrase word in finalwords.KeySet())
                    {
                        foreach (E l in wordsPatExtracted.GetCounter(word).KeySet())
                        {
                            foreach (CandidatePhrase w2 in patternsAndWords4Label.GetCounter(l))
                            {
                                reasonForWords.IncrementCount(word, w2);
                            }
                        }
                    }
                    Redwood.Log(ConstantsAndVariables.minimaldebug, "Saving output in " + outputdir);
                    string filename = outputdir + "/words.json";
                    // the json object is an array corresponding to each iteration - of list
                    // of objects,
                    // each of which is a bean of entity and reasons
                    IJsonArrayBuilder obj = Javax.Json.Json.CreateArrayBuilder();
                    if (writtenInJustification.Contains(label) && writtenInJustification[label])
                    {
                        IJsonReader jsonReader = Javax.Json.Json.CreateReader(new BufferedInputStream(new FileInputStream(filename)));
                        IJsonArray  objarr     = jsonReader.ReadArray();
                        foreach (IJsonValue o in objarr)
                        {
                            obj.Add(o);
                        }
                        jsonReader.Close();
                    }
                    IJsonArrayBuilder objThisIter = Javax.Json.Json.CreateArrayBuilder();
                    foreach (CandidatePhrase w in reasonForWords.FirstKeySet())
                    {
                        IJsonObjectBuilder objinner = Javax.Json.Json.CreateObjectBuilder();
                        IJsonArrayBuilder  l        = Javax.Json.Json.CreateArrayBuilder();
                        foreach (CandidatePhrase w2 in reasonForWords.GetCounter(w).KeySet())
                        {
                            l.Add(w2.GetPhrase());
                        }
                        IJsonArrayBuilder pats = Javax.Json.Json.CreateArrayBuilder();
                        foreach (E p in wordsPatExtracted.GetCounter(w))
                        {
                            pats.Add(p.ToStringSimple());
                        }
                        objinner.Add("reasonwords", l);
                        objinner.Add("patterns", pats);
                        objinner.Add("score", finalwords.GetCount(w));
                        objinner.Add("entity", w.GetPhrase());
                        objThisIter.Add(objinner.Build());
                    }
                    obj.Add(objThisIter);
                    // Redwood.log(ConstantsAndVariables.minimaldebug, channelNameLogger,
                    // "Writing justification at " + filename);
                    IOUtils.WriteStringToFile(StringUtils.Normalize(StringUtils.ToAscii(obj.Build().ToString())), filename, "ASCII");
                    writtenInJustification[label] = true;
                }
                if (constVars.justify)
                {
                    Redwood.Log(Redwood.Dbg, "\nJustification for phrases:\n");
                    foreach (CandidatePhrase word in finalwords.KeySet())
                    {
                        Redwood.Log(Redwood.Dbg, "Phrase " + word + " extracted because of patterns: \t" + Counters.ToSortedString(wordsPatExtracted.GetCounter(word), wordsPatExtracted.GetCounter(word).Size(), "%1$s:%2$f", "\n"));
                    }
                }
                // if (usePatternResultAsLabel)
                // if (answerLabel != null)
                // labelWords(sents, commonEngWords, finalwords.keySet(),
                // patterns.keySet(), outFile);
                // else
                // throw new RuntimeException("why is the answer label null?");
                return(finalwords);
            }
            else
            {
                if (constVars.wordScoring.Equals(GetPatternsFromDataMultiClass.WordScoring.Bpb))
                {
                    Counters.AddInPlace(terms, wordsPatExtracted);
                    ICounter <CandidatePhrase>       maxPatWeightTerms = new ClassicCounter <CandidatePhrase>();
                    IDictionary <CandidatePhrase, E> wordMaxPat        = new Dictionary <CandidatePhrase, E>();
                    foreach (KeyValuePair <CandidatePhrase, ClassicCounter <E> > en in terms.EntrySet())
                    {
                        ICounter <E> weights = new ClassicCounter <E>();
                        foreach (E k in en.Value.KeySet())
                        {
                            weights.SetCount(k, patternsLearnedThisIter.GetCount(k));
                        }
                        maxPatWeightTerms.SetCount(en.Key, Counters.Max(weights));
                        wordMaxPat[en.Key] = Counters.Argmax(weights);
                    }
                    Counters.RemoveKeys(maxPatWeightTerms, alreadyIdentifiedWords);
                    double maxvalue = Counters.Max(maxPatWeightTerms);
                    ICollection <CandidatePhrase> words = Counters.KeysAbove(maxPatWeightTerms, maxvalue - 1e-10);
                    CandidatePhrase bestw = null;
                    if (words.Count > 1)
                    {
                        double max = double.NegativeInfinity;
                        foreach (CandidatePhrase w in words)
                        {
                            if (terms.GetCount(w, wordMaxPat[w]) > max)
                            {
                                max   = terms.GetCount(w, wordMaxPat[w]);
                                bestw = w;
                            }
                        }
                    }
                    else
                    {
                        if (words.Count == 1)
                        {
                            bestw = words.GetEnumerator().Current;
                        }
                        else
                        {
                            return(new ClassicCounter <CandidatePhrase>());
                        }
                    }
                    Redwood.Log(ConstantsAndVariables.minimaldebug, "Selected Words: " + bestw);
                    return(Counters.AsCounter(Arrays.AsList(bestw)));
                }
                else
                {
                    throw new Exception("wordscoring " + constVars.wordScoring + " not identified");
                }
            }
        }
Пример #17
0
 public ArrayMatcher(IJsonArray value, MatcherFactory matcherFactory)
 {
     _matcherFactory = matcherFactory;
     Value = value;
 }