//line is a jsonstring of map of label to array of strings; ex: {"name":["Bush","Carter","Obama"]} /// <exception cref="System.Exception"/> public virtual string DoNewPhrases(string line) { System.Console.Out.WriteLine("adding new phrases"); ConstantsAndVariables constVars = new ConstantsAndVariables(props, humanLabelClasses.Keys, humanLabelClasses); IJsonReader jsonReader = Javax.Json.Json.CreateReader(new StringReader(line)); IJsonObject objarr = jsonReader.ReadObject(); foreach (KeyValuePair <string, IJsonValue> o in objarr) { string label = o.Key; ICollection <CandidatePhrase> seed = new HashSet <CandidatePhrase>(); IJsonArray arr = objarr.GetJsonArray(o.Key); for (int i = 0; i < arr.Count; i++) { string seedw = arr.GetString(i); System.Console.Out.WriteLine("adding " + seedw + " to seed "); seed.Add(CandidatePhrase.CreateOrGet(seedw)); } Sharpen.Collections.AddAll(seedWords[label], seed); constVars.AddSeedWords(label, seed); GetPatternsFromDataMultiClass.RunLabelSeedWords(Data.sents, humanLabelClasses[label], label, seed, constVars, false); } //model.labelWords(label, labelclass, Data.sents, seed); return("SUCCESS added new phrases"); }
//the format of the line input is json string of maps. required keys are "input" and "seedWords". "input" can be a string or file (in which case readFile should be true.) // For example: {"input":"presidents.txt","seedWords":{"name":["Obama"],"place":["Chicago"]}} /// <exception cref="System.IO.IOException"/> /// <exception cref="Java.Lang.InstantiationException"/> /// <exception cref="System.Reflection.TargetInvocationException"/> /// <exception cref="Java.Util.Concurrent.ExecutionException"/> /// <exception cref="Java.Sql.SQLException"/> /// <exception cref="System.Exception"/> /// <exception cref="System.MemberAccessException"/> /// <exception cref="System.TypeLoadException"/> /// <exception cref="System.MissingMethodException"/> public virtual string ProcessText(bool writeOutputToFile) { logger.Info("Starting to process text"); logger.Info("all seed words are " + seedWords); Pair <IDictionary <string, DataInstance>, IDictionary <string, DataInstance> > sentsPair = GetPatternsFromDataMultiClass.ProcessSents(props, seedWords.Keys); Data.sents = sentsPair.First(); ConstantsAndVariables constVars = new ConstantsAndVariables(props, seedWords.Keys, machineAnswerClasses); foreach (string label in seedWords.Keys) { GetPatternsFromDataMultiClass.RunLabelSeedWords(Data.sents, humanLabelClasses[label], label, seedWords[label], constVars, true); } if (writeOutputToFile) { GetPatternsFromDataMultiClass.WriteColumnOutput(outputFile, false, humanLabelClasses); System.Console.Out.WriteLine("written the output to " + outputFile); } logger.Info("Finished processing text"); return("SUCCESS"); }