/// <exception cref="System.IO.IOException"/> /// <exception cref="System.TypeLoadException"/> public virtual IDictionary <string, double[]> GetEmbeddings(string cacheFilename, IList <CoNLLBenchmark.CoNLLSentence> sentences) { File f = new File(cacheFilename); IDictionary <string, double[]> trimmedSet; if (!f.Exists()) { trimmedSet = new Dictionary <string, double[]>(); IDictionary <string, double[]> massiveSet = LoadEmbeddingsFromFile("../google-300.txt"); log.Info("Got massive embedding set size " + massiveSet.Count); foreach (CoNLLBenchmark.CoNLLSentence sentence in sentences) { foreach (string token in sentence.token) { if (massiveSet.Contains(token)) { trimmedSet[token] = massiveSet[token]; } } } log.Info("Got trimmed embedding set size " + trimmedSet.Count); f.CreateNewFile(); ObjectOutputStream oos = new ObjectOutputStream(new GZIPOutputStream(new FileOutputStream(cacheFilename))); oos.WriteObject(trimmedSet); oos.Close(); log.Info("Wrote trimmed set to file"); } else { ObjectInputStream ois = new ObjectInputStream(new GZIPInputStream(new FileInputStream(cacheFilename))); trimmedSet = (IDictionary <string, double[]>)ois.ReadObject(); ois.Close(); } return(trimmedSet); }
/// <exception cref="System.IO.IOException"/> /// <exception cref="System.TypeLoadException"/> private static void DemonstrateSerializationColumnDataClassifier() { System.Console.Out.WriteLine(); System.Console.Out.WriteLine("Demonstrating working with a serialized classifier using serializeTo"); ColumnDataClassifier cdc = new ColumnDataClassifier(where + "examples/cheese2007.prop"); cdc.TrainClassifier(where + "examples/cheeseDisease.train"); // Exhibit serialization and deserialization working. Serialized to bytes in memory for simplicity System.Console.Out.WriteLine(); ByteArrayOutputStream baos = new ByteArrayOutputStream(); ObjectOutputStream oos = new ObjectOutputStream(baos); cdc.SerializeClassifier(oos); oos.Close(); byte[] @object = baos.ToByteArray(); ByteArrayInputStream bais = new ByteArrayInputStream(@object); ObjectInputStream ois = new ObjectInputStream(bais); ColumnDataClassifier cdc2 = ColumnDataClassifier.GetClassifier(ois); ois.Close(); // We compare the output of the deserialized classifier cdc2 versus the original one cl // For both we use a ColumnDataClassifier to convert text lines to examples System.Console.Out.WriteLine("Making predictions with both classifiers"); foreach (string line in ObjectBank.GetLineIterator(where + "examples/cheeseDisease.test", "utf-8")) { IDatum <string, string> d = cdc.MakeDatumFromLine(line); IDatum <string, string> d2 = cdc2.MakeDatumFromLine(line); System.Console.Out.Printf("%s =origi=> %s (%.4f)%n", line, cdc.ClassOf(d), cdc.ScoresOf(d).GetCount(cdc.ClassOf(d))); System.Console.Out.Printf("%s =deser=> %s (%.4f)%n", line, cdc2.ClassOf(d2), cdc2.ScoresOf(d).GetCount(cdc2.ClassOf(d))); } }
/// <exception cref="System.IO.IOException"/> public virtual void Save(string path) { // save the CRF this.classifier.SerializeClassifier(path); // save the additional arguments FileOutputStream fos = new FileOutputStream(path + ".extra"); ObjectOutputStream @out = new ObjectOutputStream(fos); @out.WriteObject(this.gazetteerLocation); @out.WriteObject(this.annotationsToSkip); @out.WriteObject(this.useSubTypes); @out.WriteObject(this.useBIO); @out.Close(); }
internal static void SaveSegmenterDataToSerialized(Edu.Stanford.Nlp.Parser.Lexparser.ChineseLexiconAndWordSegmenter cs, string filename) { try { log.Info("Writing segmenter in serialized format to file " + filename + " "); ObjectOutputStream @out = IOUtils.WriteStreamFromString(filename); @out.WriteObject(cs); @out.Close(); log.Info("done."); } catch (IOException ioe) { Sharpen.Runtime.PrintStackTrace(ioe); } }
/// <summary>Saves the singleton predictor model to the given filename.</summary> /// <remarks> /// Saves the singleton predictor model to the given filename. /// If there is an error, a RuntimeIOException is thrown. /// </remarks> private static void SaveToSerialized(LogisticClassifier <string, string> predictor, string filename) { try { log.Info("Writing singleton predictor in serialized format to file " + filename + ' '); ObjectOutputStream @out = IOUtils.WriteStreamFromString(filename); @out.WriteObject(predictor); @out.Close(); log.Info("done."); } catch (IOException ioe) { throw new RuntimeIOException(ioe); } }
public virtual void TestReadWriteStreamFromString() { ObjectOutputStream oos = IOUtils.WriteStreamFromString(dirPath + "/objs.obj"); oos.WriteObject(int.Parse(42)); oos.WriteObject("forty two"); oos.Close(); ObjectInputStream ois = IOUtils.ReadStreamFromString(dirPath + "/objs.obj"); object i = ois.ReadObject(); object s = ois.ReadObject(); NUnit.Framework.Assert.IsTrue(int.Parse(42).Equals(i)); NUnit.Framework.Assert.IsTrue("forty two".Equals(s)); ois.Close(); }
/// <summary> /// Constructs a SignedObject from any Serializable object. /// The given object is signed with the given signing key, using the /// designated signature engine. /// </summary> /// <param name="object"> the object to be signed. </param> /// <param name="signingKey"> the private key for signing. </param> /// <param name="signingEngine"> the signature signing engine. /// </param> /// <exception cref="IOException"> if an error occurs during serialization </exception> /// <exception cref="InvalidKeyException"> if the key is invalid. </exception> /// <exception cref="SignatureException"> if signing fails. </exception> //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public SignedObject(Serializable object, PrivateKey signingKey, Signature signingEngine) throws IOException, InvalidKeyException, SignatureException public SignedObject(Serializable @object, PrivateKey signingKey, Signature signingEngine) { // creating a stream pipe-line, from a to b ByteArrayOutputStream b = new ByteArrayOutputStream(); ObjectOutput a = new ObjectOutputStream(b); // write and flush the object content to byte array a.WriteObject(@object); a.Flush(); a.Close(); this.Content = b.ToByteArray(); b.Close(); // now sign the encapsulated object this.Sign(signingKey, signingEngine); }
// todo [2017]: This should be redone sometime to not have such a hardcoded upper limit. // = null; private void SerializeDictionary(string serializePath) { logger.Info("Serializing dictionaries to " + serializePath + " ... "); try { ObjectOutputStream oos = IOUtils.WriteStreamFromString(serializePath); //oos.writeObject(MAX_LEXICON_LENGTH); oos.WriteObject(words_); //oos.writeObject(cdtos_); oos.Close(); logger.Info("done."); } catch (Exception e) { logger.Error("Failed", e); throw new RuntimeIOException(e); } }
/// <exception cref="System.IO.IOException"/> public virtual void Save(string modelpath) { // make sure modelpath directory exists int lastSlash = modelpath.LastIndexOf(File.separator); if (lastSlash > 0) { string path = Sharpen.Runtime.Substring(modelpath, 0, lastSlash); File f = new File(path); if (!f.Exists()) { f.Mkdirs(); } } FileOutputStream fos = new FileOutputStream(modelpath); ObjectOutputStream @out = new ObjectOutputStream(fos); @out.WriteObject(this); @out.Close(); }
public static void OutputModel(string fileName, IClassifier <string, string> clf) { FileOutputStream fo = null; try { fo = new FileOutputStream(fileName); ObjectOutputStream so = new ObjectOutputStream(fo); so.WriteObject(clf); so.Flush(); so.Close(); } catch (FileNotFoundException e) { Sharpen.Runtime.PrintStackTrace(e); } catch (IOException e) { Sharpen.Runtime.PrintStackTrace(e); } }
/// <exception cref="System.IO.IOException"/> private void Save(string path) { System.Console.Out.Write("Saving classifier to " + path + "... "); // make sure the directory specified by path exists int lastSlash = path.LastIndexOf(File.separator); if (lastSlash > 0) { File dir = new File(Sharpen.Runtime.Substring(path, 0, lastSlash)); if (!dir.Exists()) { dir.Mkdirs(); } } ObjectOutputStream @out = new ObjectOutputStream(new FileOutputStream(path)); @out.WriteObject(weights); @out.WriteObject(featureIndex); @out.WriteObject(labelIndex); @out.Close(); System.Console.Out.WriteLine("done."); }
public void PutObject(string key, Java.Lang.Object obj) { System.IO.MemoryStream baos = new System.IO.MemoryStream(); ObjectOutputStream outStream = null; try { outStream = new ObjectOutputStream(baos); outStream.WriteObject(obj); string objectVal = ASCIIEncoding.ASCII.GetString(Base64.Encode(baos.ToArray(), Base64Flags.Default)); editor.PutString(key, objectVal); editor.Commit(); } catch (IOException e) { e.PrintStackTrace(); } finally { try { if (baos != null) { baos.Close(); } if (outStream != null) { outStream.Close(); } } catch (IOException e) { e.PrintStackTrace(); } } }
public virtual byte[] ConvertToBytes(IList <Tree> input) { try { ByteArrayOutputStream bos = new ByteArrayOutputStream(); GZIPOutputStream gos = new GZIPOutputStream(bos); ObjectOutputStream oos = new ObjectOutputStream(gos); IList <Tree> transformed = CollectionUtils.TransformAsList(input, treeBasicCategories); IList <Tree> filtered = CollectionUtils.FilterAsList(transformed, treeFilter); oos.WriteObject(filtered.Count); foreach (Tree tree in filtered) { oos.WriteObject(tree.ToString()); } oos.Close(); gos.Close(); bos.Close(); return(bos.ToByteArray()); } catch (IOException e) { throw new RuntimeIOException(e); } }
public virtual void TestClassicCounterHistoricalMain() { c.SetCount("p", 0); c.SetCount("q", 2); ClassicCounter <string> small_c = new ClassicCounter <string>(c); ICounter <string> c7 = c.GetFactory().Create(); c7.AddAll(c); NUnit.Framework.Assert.AreEqual(c.TotalCount(), 2.0); c.IncrementCount("p"); NUnit.Framework.Assert.AreEqual(c.TotalCount(), 3.0); c.IncrementCount("p", 2.0); NUnit.Framework.Assert.AreEqual(Counters.Min(c), 2.0); NUnit.Framework.Assert.AreEqual(Counters.Argmin(c), "q"); // Now p is p=3.0, q=2.0 c.SetCount("w", -5.0); c.SetCount("x", -4.5); IList <string> biggestKeys = new List <string>(c.KeySet()); NUnit.Framework.Assert.AreEqual(biggestKeys.Count, 4); biggestKeys.Sort(Counters.ToComparator(c, false, true)); NUnit.Framework.Assert.AreEqual("w", biggestKeys[0]); NUnit.Framework.Assert.AreEqual("x", biggestKeys[1]); NUnit.Framework.Assert.AreEqual("p", biggestKeys[2]); NUnit.Framework.Assert.AreEqual("q", biggestKeys[3]); NUnit.Framework.Assert.AreEqual(Counters.Min(c), -5.0, Tolerance); NUnit.Framework.Assert.AreEqual(Counters.Argmin(c), "w"); NUnit.Framework.Assert.AreEqual(Counters.Max(c), 3.0, Tolerance); NUnit.Framework.Assert.AreEqual(Counters.Argmax(c), "p"); if (integral) { NUnit.Framework.Assert.AreEqual(Counters.Mean(c), -1.0); } else { NUnit.Framework.Assert.AreEqual(Counters.Mean(c), -1.125, Tolerance); } if (!integral) { // only do this for floating point counters. Too much bother to rewrite c.SetCount("x", -2.5); ClassicCounter <string> c2 = new ClassicCounter <string>(c); NUnit.Framework.Assert.AreEqual(3.0, c2.GetCount("p")); NUnit.Framework.Assert.AreEqual(2.0, c2.GetCount("q")); NUnit.Framework.Assert.AreEqual(-5.0, c2.GetCount("w")); NUnit.Framework.Assert.AreEqual(-2.5, c2.GetCount("x")); ICounter <string> c3 = c.GetFactory().Create(); foreach (string str in c2.KeySet()) { c3.IncrementCount(str); } NUnit.Framework.Assert.AreEqual(1.0, c3.GetCount("p")); NUnit.Framework.Assert.AreEqual(1.0, c3.GetCount("q")); NUnit.Framework.Assert.AreEqual(1.0, c3.GetCount("w")); NUnit.Framework.Assert.AreEqual(1.0, c3.GetCount("x")); Counters.AddInPlace(c2, c3, 10.0); NUnit.Framework.Assert.AreEqual(13.0, c2.GetCount("p")); NUnit.Framework.Assert.AreEqual(12.0, c2.GetCount("q")); NUnit.Framework.Assert.AreEqual(5.0, c2.GetCount("w")); NUnit.Framework.Assert.AreEqual(7.5, c2.GetCount("x")); c3.AddAll(c); NUnit.Framework.Assert.AreEqual(4.0, c3.GetCount("p")); NUnit.Framework.Assert.AreEqual(3.0, c3.GetCount("q")); NUnit.Framework.Assert.AreEqual(-4.0, c3.GetCount("w")); NUnit.Framework.Assert.AreEqual(-1.5, c3.GetCount("x")); Counters.SubtractInPlace(c3, c); NUnit.Framework.Assert.AreEqual(1.0, c3.GetCount("p")); NUnit.Framework.Assert.AreEqual(1.0, c3.GetCount("q")); NUnit.Framework.Assert.AreEqual(1.0, c3.GetCount("w")); NUnit.Framework.Assert.AreEqual(1.0, c3.GetCount("x")); foreach (string str_1 in c.KeySet()) { c3.IncrementCount(str_1); } NUnit.Framework.Assert.AreEqual(2.0, c3.GetCount("p")); NUnit.Framework.Assert.AreEqual(2.0, c3.GetCount("q")); NUnit.Framework.Assert.AreEqual(2.0, c3.GetCount("w")); NUnit.Framework.Assert.AreEqual(2.0, c3.GetCount("x")); Counters.DivideInPlace(c2, c3); NUnit.Framework.Assert.AreEqual(6.5, c2.GetCount("p")); NUnit.Framework.Assert.AreEqual(6.0, c2.GetCount("q")); NUnit.Framework.Assert.AreEqual(2.5, c2.GetCount("w")); NUnit.Framework.Assert.AreEqual(3.75, c2.GetCount("x")); Counters.DivideInPlace(c2, 0.5); NUnit.Framework.Assert.AreEqual(13.0, c2.GetCount("p")); NUnit.Framework.Assert.AreEqual(12.0, c2.GetCount("q")); NUnit.Framework.Assert.AreEqual(5.0, c2.GetCount("w")); NUnit.Framework.Assert.AreEqual(7.5, c2.GetCount("x")); Counters.MultiplyInPlace(c2, 2.0); NUnit.Framework.Assert.AreEqual(26.0, c2.GetCount("p")); NUnit.Framework.Assert.AreEqual(24.0, c2.GetCount("q")); NUnit.Framework.Assert.AreEqual(10.0, c2.GetCount("w")); NUnit.Framework.Assert.AreEqual(15.0, c2.GetCount("x")); Counters.DivideInPlace(c2, 2.0); NUnit.Framework.Assert.AreEqual(13.0, c2.GetCount("p")); NUnit.Framework.Assert.AreEqual(12.0, c2.GetCount("q")); NUnit.Framework.Assert.AreEqual(5.0, c2.GetCount("w")); NUnit.Framework.Assert.AreEqual(7.5, c2.GetCount("x")); foreach (string str_2 in c2.KeySet()) { c2.IncrementCount(str_2); } NUnit.Framework.Assert.AreEqual(14.0, c2.GetCount("p")); NUnit.Framework.Assert.AreEqual(13.0, c2.GetCount("q")); NUnit.Framework.Assert.AreEqual(6.0, c2.GetCount("w")); NUnit.Framework.Assert.AreEqual(8.5, c2.GetCount("x")); foreach (string str_3 in c.KeySet()) { c2.IncrementCount(str_3); } NUnit.Framework.Assert.AreEqual(15.0, c2.GetCount("p")); NUnit.Framework.Assert.AreEqual(14.0, c2.GetCount("q")); NUnit.Framework.Assert.AreEqual(7.0, c2.GetCount("w")); NUnit.Framework.Assert.AreEqual(9.5, c2.GetCount("x")); c2.AddAll(small_c); NUnit.Framework.Assert.AreEqual(15.0, c2.GetCount("p")); NUnit.Framework.Assert.AreEqual(16.0, c2.GetCount("q")); NUnit.Framework.Assert.AreEqual(7.0, c2.GetCount("w")); NUnit.Framework.Assert.AreEqual(9.5, c2.GetCount("x")); NUnit.Framework.Assert.AreEqual(new HashSet <string>(Arrays.AsList("p", "q")), Counters.KeysAbove(c2, 14)); NUnit.Framework.Assert.AreEqual(new HashSet <string>(Arrays.AsList("q")), Counters.KeysAt(c2, 16)); NUnit.Framework.Assert.AreEqual(new HashSet <string>(Arrays.AsList("x", "w")), Counters.KeysBelow(c2, 9.5)); Counters.AddInPlace(c2, small_c, -6); NUnit.Framework.Assert.AreEqual(15.0, c2.GetCount("p")); NUnit.Framework.Assert.AreEqual(4.0, c2.GetCount("q")); NUnit.Framework.Assert.AreEqual(7.0, c2.GetCount("w")); NUnit.Framework.Assert.AreEqual(9.5, c2.GetCount("x")); Counters.SubtractInPlace(c2, small_c); Counters.SubtractInPlace(c2, small_c); Counters.RetainNonZeros(c2); NUnit.Framework.Assert.AreEqual(15.0, c2.GetCount("p")); NUnit.Framework.Assert.IsFalse(c2.ContainsKey("q")); NUnit.Framework.Assert.AreEqual(7.0, c2.GetCount("w")); NUnit.Framework.Assert.AreEqual(9.5, c2.GetCount("x")); } // serialize to Stream if (c is ISerializable) { try { ByteArrayOutputStream baos = new ByteArrayOutputStream(); ObjectOutputStream @out = new ObjectOutputStream(new BufferedOutputStream(baos)); @out.WriteObject(c); @out.Close(); // reconstitute byte[] bytes = baos.ToByteArray(); ObjectInputStream @in = new ObjectInputStream(new BufferedInputStream(new ByteArrayInputStream(bytes))); c = IOUtils.ReadObjectFromObjectStream(@in); @in.Close(); if (!this.integral) { NUnit.Framework.Assert.AreEqual(-2.5, c.TotalCount()); NUnit.Framework.Assert.AreEqual(-5.0, Counters.Min(c)); NUnit.Framework.Assert.AreEqual("w", Counters.Argmin(c)); } c.Clear(); if (!this.integral) { NUnit.Framework.Assert.AreEqual(0.0, c.TotalCount()); } } catch (IOException ioe) { Fail("IOException: " + ioe); } catch (TypeLoadException cce) { Fail("ClassNotFoundException: " + cce); } } }
/// <summary>for testing -- CURRENTLY BROKEN!!!</summary> /// <param name="args">input dir and output filename</param> /// <exception cref="System.IO.IOException"/> public static void Main(string[] args) { if (args.Length != 3) { throw new Exception("args: treebankPath trainNums testNums"); } ChineseTreebankParserParams ctpp = new ChineseTreebankParserParams(); ctpp.charTags = true; // TODO: these options are getting clobbered by reading in the // parser object (unless it's a text file parser?) Options op = new Options(ctpp); op.doDep = false; op.testOptions.maxLength = 90; LexicalizedParser lp; try { IFileFilter trainFilt = new NumberRangesFileFilter(args[1], false); lp = LexicalizedParser.TrainFromTreebank(args[0], trainFilt, op); try { string filename = "chineseCharTagPCFG.ser.gz"; log.Info("Writing parser in serialized format to file " + filename + " "); System.Console.Error.Flush(); ObjectOutputStream @out = IOUtils.WriteStreamFromString(filename); @out.WriteObject(lp); @out.Close(); log.Info("done."); } catch (IOException ioe) { Sharpen.Runtime.PrintStackTrace(ioe); } } catch (ArgumentException) { lp = LexicalizedParser.LoadModel(args[1], op); } IFileFilter testFilt = new NumberRangesFileFilter(args[2], false); MemoryTreebank testTreebank = ctpp.MemoryTreebank(); testTreebank.LoadPath(new File(args[0]), testFilt); PrintWriter pw = new PrintWriter(new OutputStreamWriter(new FileOutputStream("out.chi"), "GB18030"), true); WordCatEquivalenceClasser eqclass = new WordCatEquivalenceClasser(); WordCatEqualityChecker eqcheck = new WordCatEqualityChecker(); EquivalenceClassEval eval = new EquivalenceClassEval(eqclass, eqcheck); // System.out.println("Preterminals:" + preterminals); System.Console.Out.WriteLine("Testing..."); foreach (Tree gold in testTreebank) { Tree tree; try { tree = lp.ParseTree(gold.YieldHasWord()); if (tree == null) { System.Console.Out.WriteLine("Failed to parse " + gold.YieldHasWord()); continue; } } catch (Exception e) { Sharpen.Runtime.PrintStackTrace(e); continue; } gold = gold.FirstChild(); pw.Println(SentenceUtils.ListToString(gold.PreTerminalYield())); pw.Println(SentenceUtils.ListToString(gold.Yield())); gold.PennPrint(pw); pw.Println(tree.PreTerminalYield()); pw.Println(tree.Yield()); tree.PennPrint(pw); // Collection allBrackets = WordCatConstituent.allBrackets(tree); // Collection goldBrackets = WordCatConstituent.allBrackets(gold); // eval.eval(allBrackets, goldBrackets); eval.DisplayLast(); } System.Console.Out.WriteLine(); System.Console.Out.WriteLine(); eval.Display(); }
/// <exception cref="System.IO.IOException"/> public static void Main(string[] args) { IDictionary <string, int> flagsToNumArgs = Generics.NewHashMap(); flagsToNumArgs["-parser"] = int.Parse(3); flagsToNumArgs["-lex"] = int.Parse(3); flagsToNumArgs["-test"] = int.Parse(2); flagsToNumArgs["-out"] = int.Parse(1); flagsToNumArgs["-lengthPenalty"] = int.Parse(1); flagsToNumArgs["-penaltyType"] = int.Parse(1); flagsToNumArgs["-maxLength"] = int.Parse(1); flagsToNumArgs["-stats"] = int.Parse(2); IDictionary <string, string[]> argMap = StringUtils.ArgsToMap(args, flagsToNumArgs); bool eval = argMap.Contains("-eval"); PrintWriter pw = null; if (argMap.Contains("-out")) { pw = new PrintWriter(new OutputStreamWriter(new FileOutputStream((argMap["-out"])[0]), "GB18030"), true); } log.Info("ChineseCharacterBasedLexicon called with args:"); ChineseTreebankParserParams ctpp = new ChineseTreebankParserParams(); for (int i = 0; i < args.Length; i++) { ctpp.SetOptionFlag(args, i); log.Info(" " + args[i]); } log.Info(); Options op = new Options(ctpp); if (argMap.Contains("-stats")) { string[] statArgs = (argMap["-stats"]); MemoryTreebank rawTrainTreebank = op.tlpParams.MemoryTreebank(); IFileFilter trainFilt = new NumberRangesFileFilter(statArgs[1], false); rawTrainTreebank.LoadPath(new File(statArgs[0]), trainFilt); log.Info("Done reading trees."); MemoryTreebank trainTreebank; if (argMap.Contains("-annotate")) { trainTreebank = new MemoryTreebank(); TreeAnnotator annotator = new TreeAnnotator(ctpp.HeadFinder(), ctpp, op); foreach (Tree tree in rawTrainTreebank) { trainTreebank.Add(annotator.TransformTree(tree)); } log.Info("Done annotating trees."); } else { trainTreebank = rawTrainTreebank; } PrintStats(trainTreebank, pw); System.Environment.Exit(0); } int maxLength = 1000000; // Test.verbose = true; if (argMap.Contains("-norm")) { op.testOptions.lengthNormalization = true; } if (argMap.Contains("-maxLength")) { maxLength = System.Convert.ToInt32((argMap["-maxLength"])[0]); } op.testOptions.maxLength = 120; bool combo = argMap.Contains("-combo"); if (combo) { ctpp.useCharacterBasedLexicon = true; op.testOptions.maxSpanForTags = 10; op.doDep = false; op.dcTags = false; } LexicalizedParser lp = null; ILexicon lex = null; if (argMap.Contains("-parser")) { string[] parserArgs = (argMap["-parser"]); if (parserArgs.Length > 1) { IFileFilter trainFilt = new NumberRangesFileFilter(parserArgs[1], false); lp = LexicalizedParser.TrainFromTreebank(parserArgs[0], trainFilt, op); if (parserArgs.Length == 3) { string filename = parserArgs[2]; log.Info("Writing parser in serialized format to file " + filename + " "); System.Console.Error.Flush(); ObjectOutputStream @out = IOUtils.WriteStreamFromString(filename); @out.WriteObject(lp); @out.Close(); log.Info("done."); } } else { string parserFile = parserArgs[0]; lp = LexicalizedParser.LoadModel(parserFile, op); } lex = lp.GetLexicon(); op = lp.GetOp(); ctpp = (ChineseTreebankParserParams)op.tlpParams; } if (argMap.Contains("-rad")) { ctpp.useUnknownCharacterModel = true; } if (argMap.Contains("-lengthPenalty")) { ctpp.lengthPenalty = double.Parse((argMap["-lengthPenalty"])[0]); } if (argMap.Contains("-penaltyType")) { ctpp.penaltyType = System.Convert.ToInt32((argMap["-penaltyType"])[0]); } if (argMap.Contains("-lex")) { string[] lexArgs = (argMap["-lex"]); if (lexArgs.Length > 1) { IIndex <string> wordIndex = new HashIndex <string>(); IIndex <string> tagIndex = new HashIndex <string>(); lex = ctpp.Lex(op, wordIndex, tagIndex); MemoryTreebank rawTrainTreebank = op.tlpParams.MemoryTreebank(); IFileFilter trainFilt = new NumberRangesFileFilter(lexArgs[1], false); rawTrainTreebank.LoadPath(new File(lexArgs[0]), trainFilt); log.Info("Done reading trees."); MemoryTreebank trainTreebank; if (argMap.Contains("-annotate")) { trainTreebank = new MemoryTreebank(); TreeAnnotator annotator = new TreeAnnotator(ctpp.HeadFinder(), ctpp, op); foreach (Tree tree in rawTrainTreebank) { tree = annotator.TransformTree(tree); trainTreebank.Add(tree); } log.Info("Done annotating trees."); } else { trainTreebank = rawTrainTreebank; } lex.InitializeTraining(trainTreebank.Count); lex.Train(trainTreebank); lex.FinishTraining(); log.Info("Done training lexicon."); if (lexArgs.Length == 3) { string filename = lexArgs.Length == 3 ? lexArgs[2] : "parsers/chineseCharLex.ser.gz"; log.Info("Writing lexicon in serialized format to file " + filename + " "); System.Console.Error.Flush(); ObjectOutputStream @out = IOUtils.WriteStreamFromString(filename); @out.WriteObject(lex); @out.Close(); log.Info("done."); } } else { string lexFile = lexArgs.Length == 1 ? lexArgs[0] : "parsers/chineseCharLex.ser.gz"; log.Info("Reading Lexicon from file " + lexFile); ObjectInputStream @in = IOUtils.ReadStreamFromString(lexFile); try { lex = (ILexicon)@in.ReadObject(); } catch (TypeLoadException) { throw new Exception("Bad serialized file: " + lexFile); } @in.Close(); } } if (argMap.Contains("-test")) { bool segmentWords = ctpp.segment; bool parse = lp != null; System.Diagnostics.Debug.Assert((parse || segmentWords)); // WordCatConstituent.collinizeWords = argMap.containsKey("-collinizeWords"); // WordCatConstituent.collinizeTags = argMap.containsKey("-collinizeTags"); IWordSegmenter seg = null; if (segmentWords) { seg = (IWordSegmenter)lex; } string[] testArgs = (argMap["-test"]); MemoryTreebank testTreebank = op.tlpParams.MemoryTreebank(); IFileFilter testFilt = new NumberRangesFileFilter(testArgs[1], false); testTreebank.LoadPath(new File(testArgs[0]), testFilt); ITreeTransformer subcategoryStripper = op.tlpParams.SubcategoryStripper(); ITreeTransformer collinizer = ctpp.Collinizer(); WordCatEquivalenceClasser eqclass = new WordCatEquivalenceClasser(); WordCatEqualityChecker eqcheck = new WordCatEqualityChecker(); EquivalenceClassEval basicEval = new EquivalenceClassEval(eqclass, eqcheck, "basic"); EquivalenceClassEval collinsEval = new EquivalenceClassEval(eqclass, eqcheck, "collinized"); IList <string> evalTypes = new List <string>(3); bool goodPOS = false; if (segmentWords) { evalTypes.Add(WordCatConstituent.wordType); if (ctpp.segmentMarkov && !parse) { evalTypes.Add(WordCatConstituent.tagType); goodPOS = true; } } if (parse) { evalTypes.Add(WordCatConstituent.tagType); evalTypes.Add(WordCatConstituent.catType); if (combo) { evalTypes.Add(WordCatConstituent.wordType); goodPOS = true; } } TreeToBracketProcessor proc = new TreeToBracketProcessor(evalTypes); log.Info("Testing..."); foreach (Tree goldTop in testTreebank) { Tree gold = goldTop.FirstChild(); IList <IHasWord> goldSentence = gold.YieldHasWord(); if (goldSentence.Count > maxLength) { log.Info("Skipping sentence; too long: " + goldSentence.Count); continue; } else { log.Info("Processing sentence; length: " + goldSentence.Count); } IList <IHasWord> s; if (segmentWords) { StringBuilder goldCharBuf = new StringBuilder(); foreach (IHasWord aGoldSentence in goldSentence) { StringLabel word = (StringLabel)aGoldSentence; goldCharBuf.Append(word.Value()); } string goldChars = goldCharBuf.ToString(); s = seg.Segment(goldChars); } else { s = goldSentence; } Tree tree; if (parse) { tree = lp.ParseTree(s); if (tree == null) { throw new Exception("PARSER RETURNED NULL!!!"); } } else { tree = Edu.Stanford.Nlp.Trees.Trees.ToFlatTree(s); tree = subcategoryStripper.TransformTree(tree); } if (pw != null) { if (parse) { tree.PennPrint(pw); } else { IEnumerator sentIter = s.GetEnumerator(); for (; ;) { Word word = (Word)sentIter.Current; pw.Print(word.Word()); if (sentIter.MoveNext()) { pw.Print(" "); } else { break; } } } pw.Println(); } if (eval) { ICollection ourBrackets; ICollection goldBrackets; ourBrackets = proc.AllBrackets(tree); goldBrackets = proc.AllBrackets(gold); if (goodPOS) { Sharpen.Collections.AddAll(ourBrackets, TreeToBracketProcessor.CommonWordTagTypeBrackets(tree, gold)); Sharpen.Collections.AddAll(goldBrackets, TreeToBracketProcessor.CommonWordTagTypeBrackets(gold, tree)); } basicEval.Eval(ourBrackets, goldBrackets); System.Console.Out.WriteLine("\nScores:"); basicEval.DisplayLast(); Tree collinsTree = collinizer.TransformTree(tree); Tree collinsGold = collinizer.TransformTree(gold); ourBrackets = proc.AllBrackets(collinsTree); goldBrackets = proc.AllBrackets(collinsGold); if (goodPOS) { Sharpen.Collections.AddAll(ourBrackets, TreeToBracketProcessor.CommonWordTagTypeBrackets(collinsTree, collinsGold)); Sharpen.Collections.AddAll(goldBrackets, TreeToBracketProcessor.CommonWordTagTypeBrackets(collinsGold, collinsTree)); } collinsEval.Eval(ourBrackets, goldBrackets); System.Console.Out.WriteLine("\nCollinized scores:"); collinsEval.DisplayLast(); System.Console.Out.WriteLine(); } } if (eval) { basicEval.Display(); System.Console.Out.WriteLine(); collinsEval.Display(); } } }
/// <exception cref="System.IO.IOException"/> public virtual void Close() { oos.Close(); }