/// <exception cref="System.IO.IOException"/>
        /// <exception cref="System.TypeLoadException"/>
        public virtual IDictionary <string, double[]> GetEmbeddings(string cacheFilename, IList <CoNLLBenchmark.CoNLLSentence> sentences)
        {
            File f = new File(cacheFilename);
            IDictionary <string, double[]> trimmedSet;

            if (!f.Exists())
            {
                trimmedSet = new Dictionary <string, double[]>();
                IDictionary <string, double[]> massiveSet = LoadEmbeddingsFromFile("../google-300.txt");
                log.Info("Got massive embedding set size " + massiveSet.Count);
                foreach (CoNLLBenchmark.CoNLLSentence sentence in sentences)
                {
                    foreach (string token in sentence.token)
                    {
                        if (massiveSet.Contains(token))
                        {
                            trimmedSet[token] = massiveSet[token];
                        }
                    }
                }
                log.Info("Got trimmed embedding set size " + trimmedSet.Count);
                f.CreateNewFile();
                ObjectOutputStream oos = new ObjectOutputStream(new GZIPOutputStream(new FileOutputStream(cacheFilename)));
                oos.WriteObject(trimmedSet);
                oos.Close();
                log.Info("Wrote trimmed set to file");
            }
            else
            {
                ObjectInputStream ois = new ObjectInputStream(new GZIPInputStream(new FileInputStream(cacheFilename)));
                trimmedSet = (IDictionary <string, double[]>)ois.ReadObject();
                ois.Close();
            }
            return(trimmedSet);
        }
Exemplo n.º 2
0
        /// <exception cref="System.IO.IOException"/>
        /// <exception cref="System.TypeLoadException"/>
        private static void DemonstrateSerializationColumnDataClassifier()
        {
            System.Console.Out.WriteLine();
            System.Console.Out.WriteLine("Demonstrating working with a serialized classifier using serializeTo");
            ColumnDataClassifier cdc = new ColumnDataClassifier(where + "examples/cheese2007.prop");

            cdc.TrainClassifier(where + "examples/cheeseDisease.train");
            // Exhibit serialization and deserialization working. Serialized to bytes in memory for simplicity
            System.Console.Out.WriteLine();
            ByteArrayOutputStream baos = new ByteArrayOutputStream();
            ObjectOutputStream    oos  = new ObjectOutputStream(baos);

            cdc.SerializeClassifier(oos);
            oos.Close();
            byte[] @object            = baos.ToByteArray();
            ByteArrayInputStream bais = new ByteArrayInputStream(@object);
            ObjectInputStream    ois  = new ObjectInputStream(bais);
            ColumnDataClassifier cdc2 = ColumnDataClassifier.GetClassifier(ois);

            ois.Close();
            // We compare the output of the deserialized classifier cdc2 versus the original one cl
            // For both we use a ColumnDataClassifier to convert text lines to examples
            System.Console.Out.WriteLine("Making predictions with both classifiers");
            foreach (string line in ObjectBank.GetLineIterator(where + "examples/cheeseDisease.test", "utf-8"))
            {
                IDatum <string, string> d  = cdc.MakeDatumFromLine(line);
                IDatum <string, string> d2 = cdc2.MakeDatumFromLine(line);
                System.Console.Out.Printf("%s  =origi=>  %s (%.4f)%n", line, cdc.ClassOf(d), cdc.ScoresOf(d).GetCount(cdc.ClassOf(d)));
                System.Console.Out.Printf("%s  =deser=>  %s (%.4f)%n", line, cdc2.ClassOf(d2), cdc2.ScoresOf(d).GetCount(cdc2.ClassOf(d)));
            }
        }
        /// <exception cref="System.IO.IOException"/>
        public virtual void Save(string path)
        {
            // save the CRF
            this.classifier.SerializeClassifier(path);
            // save the additional arguments
            FileOutputStream   fos  = new FileOutputStream(path + ".extra");
            ObjectOutputStream @out = new ObjectOutputStream(fos);

            @out.WriteObject(this.gazetteerLocation);
            @out.WriteObject(this.annotationsToSkip);
            @out.WriteObject(this.useSubTypes);
            @out.WriteObject(this.useBIO);
            @out.Close();
        }
Exemplo n.º 4
0
 internal static void SaveSegmenterDataToSerialized(Edu.Stanford.Nlp.Parser.Lexparser.ChineseLexiconAndWordSegmenter cs, string filename)
 {
     try
     {
         log.Info("Writing segmenter in serialized format to file " + filename + " ");
         ObjectOutputStream @out = IOUtils.WriteStreamFromString(filename);
         @out.WriteObject(cs);
         @out.Close();
         log.Info("done.");
     }
     catch (IOException ioe)
     {
         Sharpen.Runtime.PrintStackTrace(ioe);
     }
 }
 /// <summary>Saves the singleton predictor model to the given filename.</summary>
 /// <remarks>
 /// Saves the singleton predictor model to the given filename.
 /// If there is an error, a RuntimeIOException is thrown.
 /// </remarks>
 private static void SaveToSerialized(LogisticClassifier <string, string> predictor, string filename)
 {
     try
     {
         log.Info("Writing singleton predictor in serialized format to file " + filename + ' ');
         ObjectOutputStream @out = IOUtils.WriteStreamFromString(filename);
         @out.WriteObject(predictor);
         @out.Close();
         log.Info("done.");
     }
     catch (IOException ioe)
     {
         throw new RuntimeIOException(ioe);
     }
 }
Exemplo n.º 6
0
        public virtual void TestReadWriteStreamFromString()
        {
            ObjectOutputStream oos = IOUtils.WriteStreamFromString(dirPath + "/objs.obj");

            oos.WriteObject(int.Parse(42));
            oos.WriteObject("forty two");
            oos.Close();
            ObjectInputStream ois = IOUtils.ReadStreamFromString(dirPath + "/objs.obj");
            object            i   = ois.ReadObject();
            object            s   = ois.ReadObject();

            NUnit.Framework.Assert.IsTrue(int.Parse(42).Equals(i));
            NUnit.Framework.Assert.IsTrue("forty two".Equals(s));
            ois.Close();
        }
Exemplo n.º 7
0
        /// <summary>
        /// Constructs a SignedObject from any Serializable object.
        /// The given object is signed with the given signing key, using the
        /// designated signature engine.
        /// </summary>
        /// <param name="object"> the object to be signed. </param>
        /// <param name="signingKey"> the private key for signing. </param>
        /// <param name="signingEngine"> the signature signing engine.
        /// </param>
        /// <exception cref="IOException"> if an error occurs during serialization </exception>
        /// <exception cref="InvalidKeyException"> if the key is invalid. </exception>
        /// <exception cref="SignatureException"> if signing fails. </exception>
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public SignedObject(Serializable object, PrivateKey signingKey, Signature signingEngine) throws IOException, InvalidKeyException, SignatureException
        public SignedObject(Serializable @object, PrivateKey signingKey, Signature signingEngine)
        {
            // creating a stream pipe-line, from a to b
            ByteArrayOutputStream b = new ByteArrayOutputStream();
            ObjectOutput          a = new ObjectOutputStream(b);

            // write and flush the object content to byte array
            a.WriteObject(@object);
            a.Flush();
            a.Close();
            this.Content = b.ToByteArray();
            b.Close();

            // now sign the encapsulated object
            this.Sign(signingKey, signingEngine);
        }
Exemplo n.º 8
0
 // todo [2017]: This should be redone sometime to not have such a hardcoded upper limit.
 // = null;
 private void SerializeDictionary(string serializePath)
 {
     logger.Info("Serializing dictionaries to " + serializePath + " ... ");
     try
     {
         ObjectOutputStream oos = IOUtils.WriteStreamFromString(serializePath);
         //oos.writeObject(MAX_LEXICON_LENGTH);
         oos.WriteObject(words_);
         //oos.writeObject(cdtos_);
         oos.Close();
         logger.Info("done.");
     }
     catch (Exception e)
     {
         logger.Error("Failed", e);
         throw new RuntimeIOException(e);
     }
 }
Exemplo n.º 9
0
        /// <exception cref="System.IO.IOException"/>
        public virtual void Save(string modelpath)
        {
            // make sure modelpath directory exists
            int lastSlash = modelpath.LastIndexOf(File.separator);

            if (lastSlash > 0)
            {
                string path = Sharpen.Runtime.Substring(modelpath, 0, lastSlash);
                File   f    = new File(path);
                if (!f.Exists())
                {
                    f.Mkdirs();
                }
            }
            FileOutputStream   fos  = new FileOutputStream(modelpath);
            ObjectOutputStream @out = new ObjectOutputStream(fos);

            @out.WriteObject(this);
            @out.Close();
        }
        public static void OutputModel(string fileName, IClassifier <string, string> clf)
        {
            FileOutputStream fo = null;

            try
            {
                fo = new FileOutputStream(fileName);
                ObjectOutputStream so = new ObjectOutputStream(fo);
                so.WriteObject(clf);
                so.Flush();
                so.Close();
            }
            catch (FileNotFoundException e)
            {
                Sharpen.Runtime.PrintStackTrace(e);
            }
            catch (IOException e)
            {
                Sharpen.Runtime.PrintStackTrace(e);
            }
        }
        /// <exception cref="System.IO.IOException"/>
        private void Save(string path)
        {
            System.Console.Out.Write("Saving classifier to " + path + "... ");
            // make sure the directory specified by path exists
            int lastSlash = path.LastIndexOf(File.separator);

            if (lastSlash > 0)
            {
                File dir = new File(Sharpen.Runtime.Substring(path, 0, lastSlash));
                if (!dir.Exists())
                {
                    dir.Mkdirs();
                }
            }
            ObjectOutputStream @out = new ObjectOutputStream(new FileOutputStream(path));

            @out.WriteObject(weights);
            @out.WriteObject(featureIndex);
            @out.WriteObject(labelIndex);
            @out.Close();
            System.Console.Out.WriteLine("done.");
        }
Exemplo n.º 12
0
        public void PutObject(string key, Java.Lang.Object obj)
        {
            System.IO.MemoryStream baos      = new System.IO.MemoryStream();
            ObjectOutputStream     outStream = null;

            try
            {
                outStream = new ObjectOutputStream(baos);
                outStream.WriteObject(obj);
                string objectVal = ASCIIEncoding.ASCII.GetString(Base64.Encode(baos.ToArray(), Base64Flags.Default));
                editor.PutString(key, objectVal);
                editor.Commit();
            }
            catch (IOException e)
            {
                e.PrintStackTrace();
            }
            finally
            {
                try
                {
                    if (baos != null)
                    {
                        baos.Close();
                    }
                    if (outStream != null)
                    {
                        outStream.Close();
                    }
                }
                catch (IOException e)
                {
                    e.PrintStackTrace();
                }
            }
        }
Exemplo n.º 13
0
 public virtual byte[] ConvertToBytes(IList <Tree> input)
 {
     try
     {
         ByteArrayOutputStream bos         = new ByteArrayOutputStream();
         GZIPOutputStream      gos         = new GZIPOutputStream(bos);
         ObjectOutputStream    oos         = new ObjectOutputStream(gos);
         IList <Tree>          transformed = CollectionUtils.TransformAsList(input, treeBasicCategories);
         IList <Tree>          filtered    = CollectionUtils.FilterAsList(transformed, treeFilter);
         oos.WriteObject(filtered.Count);
         foreach (Tree tree in filtered)
         {
             oos.WriteObject(tree.ToString());
         }
         oos.Close();
         gos.Close();
         bos.Close();
         return(bos.ToByteArray());
     }
     catch (IOException e)
     {
         throw new RuntimeIOException(e);
     }
 }
        public virtual void TestClassicCounterHistoricalMain()
        {
            c.SetCount("p", 0);
            c.SetCount("q", 2);
            ClassicCounter <string> small_c = new ClassicCounter <string>(c);
            ICounter <string>       c7      = c.GetFactory().Create();

            c7.AddAll(c);
            NUnit.Framework.Assert.AreEqual(c.TotalCount(), 2.0);
            c.IncrementCount("p");
            NUnit.Framework.Assert.AreEqual(c.TotalCount(), 3.0);
            c.IncrementCount("p", 2.0);
            NUnit.Framework.Assert.AreEqual(Counters.Min(c), 2.0);
            NUnit.Framework.Assert.AreEqual(Counters.Argmin(c), "q");
            // Now p is p=3.0, q=2.0
            c.SetCount("w", -5.0);
            c.SetCount("x", -4.5);
            IList <string> biggestKeys = new List <string>(c.KeySet());

            NUnit.Framework.Assert.AreEqual(biggestKeys.Count, 4);
            biggestKeys.Sort(Counters.ToComparator(c, false, true));
            NUnit.Framework.Assert.AreEqual("w", biggestKeys[0]);
            NUnit.Framework.Assert.AreEqual("x", biggestKeys[1]);
            NUnit.Framework.Assert.AreEqual("p", biggestKeys[2]);
            NUnit.Framework.Assert.AreEqual("q", biggestKeys[3]);
            NUnit.Framework.Assert.AreEqual(Counters.Min(c), -5.0, Tolerance);
            NUnit.Framework.Assert.AreEqual(Counters.Argmin(c), "w");
            NUnit.Framework.Assert.AreEqual(Counters.Max(c), 3.0, Tolerance);
            NUnit.Framework.Assert.AreEqual(Counters.Argmax(c), "p");
            if (integral)
            {
                NUnit.Framework.Assert.AreEqual(Counters.Mean(c), -1.0);
            }
            else
            {
                NUnit.Framework.Assert.AreEqual(Counters.Mean(c), -1.125, Tolerance);
            }
            if (!integral)
            {
                // only do this for floating point counters.  Too much bother to rewrite
                c.SetCount("x", -2.5);
                ClassicCounter <string> c2 = new ClassicCounter <string>(c);
                NUnit.Framework.Assert.AreEqual(3.0, c2.GetCount("p"));
                NUnit.Framework.Assert.AreEqual(2.0, c2.GetCount("q"));
                NUnit.Framework.Assert.AreEqual(-5.0, c2.GetCount("w"));
                NUnit.Framework.Assert.AreEqual(-2.5, c2.GetCount("x"));
                ICounter <string> c3 = c.GetFactory().Create();
                foreach (string str in c2.KeySet())
                {
                    c3.IncrementCount(str);
                }
                NUnit.Framework.Assert.AreEqual(1.0, c3.GetCount("p"));
                NUnit.Framework.Assert.AreEqual(1.0, c3.GetCount("q"));
                NUnit.Framework.Assert.AreEqual(1.0, c3.GetCount("w"));
                NUnit.Framework.Assert.AreEqual(1.0, c3.GetCount("x"));
                Counters.AddInPlace(c2, c3, 10.0);
                NUnit.Framework.Assert.AreEqual(13.0, c2.GetCount("p"));
                NUnit.Framework.Assert.AreEqual(12.0, c2.GetCount("q"));
                NUnit.Framework.Assert.AreEqual(5.0, c2.GetCount("w"));
                NUnit.Framework.Assert.AreEqual(7.5, c2.GetCount("x"));
                c3.AddAll(c);
                NUnit.Framework.Assert.AreEqual(4.0, c3.GetCount("p"));
                NUnit.Framework.Assert.AreEqual(3.0, c3.GetCount("q"));
                NUnit.Framework.Assert.AreEqual(-4.0, c3.GetCount("w"));
                NUnit.Framework.Assert.AreEqual(-1.5, c3.GetCount("x"));
                Counters.SubtractInPlace(c3, c);
                NUnit.Framework.Assert.AreEqual(1.0, c3.GetCount("p"));
                NUnit.Framework.Assert.AreEqual(1.0, c3.GetCount("q"));
                NUnit.Framework.Assert.AreEqual(1.0, c3.GetCount("w"));
                NUnit.Framework.Assert.AreEqual(1.0, c3.GetCount("x"));
                foreach (string str_1 in c.KeySet())
                {
                    c3.IncrementCount(str_1);
                }
                NUnit.Framework.Assert.AreEqual(2.0, c3.GetCount("p"));
                NUnit.Framework.Assert.AreEqual(2.0, c3.GetCount("q"));
                NUnit.Framework.Assert.AreEqual(2.0, c3.GetCount("w"));
                NUnit.Framework.Assert.AreEqual(2.0, c3.GetCount("x"));
                Counters.DivideInPlace(c2, c3);
                NUnit.Framework.Assert.AreEqual(6.5, c2.GetCount("p"));
                NUnit.Framework.Assert.AreEqual(6.0, c2.GetCount("q"));
                NUnit.Framework.Assert.AreEqual(2.5, c2.GetCount("w"));
                NUnit.Framework.Assert.AreEqual(3.75, c2.GetCount("x"));
                Counters.DivideInPlace(c2, 0.5);
                NUnit.Framework.Assert.AreEqual(13.0, c2.GetCount("p"));
                NUnit.Framework.Assert.AreEqual(12.0, c2.GetCount("q"));
                NUnit.Framework.Assert.AreEqual(5.0, c2.GetCount("w"));
                NUnit.Framework.Assert.AreEqual(7.5, c2.GetCount("x"));
                Counters.MultiplyInPlace(c2, 2.0);
                NUnit.Framework.Assert.AreEqual(26.0, c2.GetCount("p"));
                NUnit.Framework.Assert.AreEqual(24.0, c2.GetCount("q"));
                NUnit.Framework.Assert.AreEqual(10.0, c2.GetCount("w"));
                NUnit.Framework.Assert.AreEqual(15.0, c2.GetCount("x"));
                Counters.DivideInPlace(c2, 2.0);
                NUnit.Framework.Assert.AreEqual(13.0, c2.GetCount("p"));
                NUnit.Framework.Assert.AreEqual(12.0, c2.GetCount("q"));
                NUnit.Framework.Assert.AreEqual(5.0, c2.GetCount("w"));
                NUnit.Framework.Assert.AreEqual(7.5, c2.GetCount("x"));
                foreach (string str_2 in c2.KeySet())
                {
                    c2.IncrementCount(str_2);
                }
                NUnit.Framework.Assert.AreEqual(14.0, c2.GetCount("p"));
                NUnit.Framework.Assert.AreEqual(13.0, c2.GetCount("q"));
                NUnit.Framework.Assert.AreEqual(6.0, c2.GetCount("w"));
                NUnit.Framework.Assert.AreEqual(8.5, c2.GetCount("x"));
                foreach (string str_3 in c.KeySet())
                {
                    c2.IncrementCount(str_3);
                }
                NUnit.Framework.Assert.AreEqual(15.0, c2.GetCount("p"));
                NUnit.Framework.Assert.AreEqual(14.0, c2.GetCount("q"));
                NUnit.Framework.Assert.AreEqual(7.0, c2.GetCount("w"));
                NUnit.Framework.Assert.AreEqual(9.5, c2.GetCount("x"));
                c2.AddAll(small_c);
                NUnit.Framework.Assert.AreEqual(15.0, c2.GetCount("p"));
                NUnit.Framework.Assert.AreEqual(16.0, c2.GetCount("q"));
                NUnit.Framework.Assert.AreEqual(7.0, c2.GetCount("w"));
                NUnit.Framework.Assert.AreEqual(9.5, c2.GetCount("x"));
                NUnit.Framework.Assert.AreEqual(new HashSet <string>(Arrays.AsList("p", "q")), Counters.KeysAbove(c2, 14));
                NUnit.Framework.Assert.AreEqual(new HashSet <string>(Arrays.AsList("q")), Counters.KeysAt(c2, 16));
                NUnit.Framework.Assert.AreEqual(new HashSet <string>(Arrays.AsList("x", "w")), Counters.KeysBelow(c2, 9.5));
                Counters.AddInPlace(c2, small_c, -6);
                NUnit.Framework.Assert.AreEqual(15.0, c2.GetCount("p"));
                NUnit.Framework.Assert.AreEqual(4.0, c2.GetCount("q"));
                NUnit.Framework.Assert.AreEqual(7.0, c2.GetCount("w"));
                NUnit.Framework.Assert.AreEqual(9.5, c2.GetCount("x"));
                Counters.SubtractInPlace(c2, small_c);
                Counters.SubtractInPlace(c2, small_c);
                Counters.RetainNonZeros(c2);
                NUnit.Framework.Assert.AreEqual(15.0, c2.GetCount("p"));
                NUnit.Framework.Assert.IsFalse(c2.ContainsKey("q"));
                NUnit.Framework.Assert.AreEqual(7.0, c2.GetCount("w"));
                NUnit.Framework.Assert.AreEqual(9.5, c2.GetCount("x"));
            }
            // serialize to Stream
            if (c is ISerializable)
            {
                try
                {
                    ByteArrayOutputStream baos = new ByteArrayOutputStream();
                    ObjectOutputStream    @out = new ObjectOutputStream(new BufferedOutputStream(baos));
                    @out.WriteObject(c);
                    @out.Close();
                    // reconstitute
                    byte[]            bytes = baos.ToByteArray();
                    ObjectInputStream @in   = new ObjectInputStream(new BufferedInputStream(new ByteArrayInputStream(bytes)));
                    c = IOUtils.ReadObjectFromObjectStream(@in);
                    @in.Close();
                    if (!this.integral)
                    {
                        NUnit.Framework.Assert.AreEqual(-2.5, c.TotalCount());
                        NUnit.Framework.Assert.AreEqual(-5.0, Counters.Min(c));
                        NUnit.Framework.Assert.AreEqual("w", Counters.Argmin(c));
                    }
                    c.Clear();
                    if (!this.integral)
                    {
                        NUnit.Framework.Assert.AreEqual(0.0, c.TotalCount());
                    }
                }
                catch (IOException ioe)
                {
                    Fail("IOException: " + ioe);
                }
                catch (TypeLoadException cce)
                {
                    Fail("ClassNotFoundException: " + cce);
                }
            }
        }
        /// <summary>for testing -- CURRENTLY BROKEN!!!</summary>
        /// <param name="args">input dir and output filename</param>
        /// <exception cref="System.IO.IOException"/>
        public static void Main(string[] args)
        {
            if (args.Length != 3)
            {
                throw new Exception("args: treebankPath trainNums testNums");
            }
            ChineseTreebankParserParams ctpp = new ChineseTreebankParserParams();

            ctpp.charTags = true;
            // TODO: these options are getting clobbered by reading in the
            // parser object (unless it's a text file parser?)
            Options op = new Options(ctpp);

            op.doDep = false;
            op.testOptions.maxLength = 90;
            LexicalizedParser lp;

            try
            {
                IFileFilter trainFilt = new NumberRangesFileFilter(args[1], false);
                lp = LexicalizedParser.TrainFromTreebank(args[0], trainFilt, op);
                try
                {
                    string filename = "chineseCharTagPCFG.ser.gz";
                    log.Info("Writing parser in serialized format to file " + filename + " ");
                    System.Console.Error.Flush();
                    ObjectOutputStream @out = IOUtils.WriteStreamFromString(filename);
                    @out.WriteObject(lp);
                    @out.Close();
                    log.Info("done.");
                }
                catch (IOException ioe)
                {
                    Sharpen.Runtime.PrintStackTrace(ioe);
                }
            }
            catch (ArgumentException)
            {
                lp = LexicalizedParser.LoadModel(args[1], op);
            }
            IFileFilter    testFilt     = new NumberRangesFileFilter(args[2], false);
            MemoryTreebank testTreebank = ctpp.MemoryTreebank();

            testTreebank.LoadPath(new File(args[0]), testFilt);
            PrintWriter pw = new PrintWriter(new OutputStreamWriter(new FileOutputStream("out.chi"), "GB18030"), true);
            WordCatEquivalenceClasser eqclass = new WordCatEquivalenceClasser();
            WordCatEqualityChecker    eqcheck = new WordCatEqualityChecker();
            EquivalenceClassEval      eval    = new EquivalenceClassEval(eqclass, eqcheck);

            //    System.out.println("Preterminals:" + preterminals);
            System.Console.Out.WriteLine("Testing...");
            foreach (Tree gold in testTreebank)
            {
                Tree tree;
                try
                {
                    tree = lp.ParseTree(gold.YieldHasWord());
                    if (tree == null)
                    {
                        System.Console.Out.WriteLine("Failed to parse " + gold.YieldHasWord());
                        continue;
                    }
                }
                catch (Exception e)
                {
                    Sharpen.Runtime.PrintStackTrace(e);
                    continue;
                }
                gold = gold.FirstChild();
                pw.Println(SentenceUtils.ListToString(gold.PreTerminalYield()));
                pw.Println(SentenceUtils.ListToString(gold.Yield()));
                gold.PennPrint(pw);
                pw.Println(tree.PreTerminalYield());
                pw.Println(tree.Yield());
                tree.PennPrint(pw);
                //      Collection allBrackets = WordCatConstituent.allBrackets(tree);
                //      Collection goldBrackets = WordCatConstituent.allBrackets(gold);
                //      eval.eval(allBrackets, goldBrackets);
                eval.DisplayLast();
            }
            System.Console.Out.WriteLine();
            System.Console.Out.WriteLine();
            eval.Display();
        }
        /// <exception cref="System.IO.IOException"/>
        public static void Main(string[] args)
        {
            IDictionary <string, int> flagsToNumArgs = Generics.NewHashMap();

            flagsToNumArgs["-parser"]        = int.Parse(3);
            flagsToNumArgs["-lex"]           = int.Parse(3);
            flagsToNumArgs["-test"]          = int.Parse(2);
            flagsToNumArgs["-out"]           = int.Parse(1);
            flagsToNumArgs["-lengthPenalty"] = int.Parse(1);
            flagsToNumArgs["-penaltyType"]   = int.Parse(1);
            flagsToNumArgs["-maxLength"]     = int.Parse(1);
            flagsToNumArgs["-stats"]         = int.Parse(2);
            IDictionary <string, string[]> argMap = StringUtils.ArgsToMap(args, flagsToNumArgs);
            bool        eval = argMap.Contains("-eval");
            PrintWriter pw   = null;

            if (argMap.Contains("-out"))
            {
                pw = new PrintWriter(new OutputStreamWriter(new FileOutputStream((argMap["-out"])[0]), "GB18030"), true);
            }
            log.Info("ChineseCharacterBasedLexicon called with args:");
            ChineseTreebankParserParams ctpp = new ChineseTreebankParserParams();

            for (int i = 0; i < args.Length; i++)
            {
                ctpp.SetOptionFlag(args, i);
                log.Info(" " + args[i]);
            }
            log.Info();
            Options op = new Options(ctpp);

            if (argMap.Contains("-stats"))
            {
                string[]       statArgs         = (argMap["-stats"]);
                MemoryTreebank rawTrainTreebank = op.tlpParams.MemoryTreebank();
                IFileFilter    trainFilt        = new NumberRangesFileFilter(statArgs[1], false);
                rawTrainTreebank.LoadPath(new File(statArgs[0]), trainFilt);
                log.Info("Done reading trees.");
                MemoryTreebank trainTreebank;
                if (argMap.Contains("-annotate"))
                {
                    trainTreebank = new MemoryTreebank();
                    TreeAnnotator annotator = new TreeAnnotator(ctpp.HeadFinder(), ctpp, op);
                    foreach (Tree tree in rawTrainTreebank)
                    {
                        trainTreebank.Add(annotator.TransformTree(tree));
                    }
                    log.Info("Done annotating trees.");
                }
                else
                {
                    trainTreebank = rawTrainTreebank;
                }
                PrintStats(trainTreebank, pw);
                System.Environment.Exit(0);
            }
            int maxLength = 1000000;

            //    Test.verbose = true;
            if (argMap.Contains("-norm"))
            {
                op.testOptions.lengthNormalization = true;
            }
            if (argMap.Contains("-maxLength"))
            {
                maxLength = System.Convert.ToInt32((argMap["-maxLength"])[0]);
            }
            op.testOptions.maxLength = 120;
            bool combo = argMap.Contains("-combo");

            if (combo)
            {
                ctpp.useCharacterBasedLexicon = true;
                op.testOptions.maxSpanForTags = 10;
                op.doDep  = false;
                op.dcTags = false;
            }
            LexicalizedParser lp  = null;
            ILexicon          lex = null;

            if (argMap.Contains("-parser"))
            {
                string[] parserArgs = (argMap["-parser"]);
                if (parserArgs.Length > 1)
                {
                    IFileFilter trainFilt = new NumberRangesFileFilter(parserArgs[1], false);
                    lp = LexicalizedParser.TrainFromTreebank(parserArgs[0], trainFilt, op);
                    if (parserArgs.Length == 3)
                    {
                        string filename = parserArgs[2];
                        log.Info("Writing parser in serialized format to file " + filename + " ");
                        System.Console.Error.Flush();
                        ObjectOutputStream @out = IOUtils.WriteStreamFromString(filename);
                        @out.WriteObject(lp);
                        @out.Close();
                        log.Info("done.");
                    }
                }
                else
                {
                    string parserFile = parserArgs[0];
                    lp = LexicalizedParser.LoadModel(parserFile, op);
                }
                lex  = lp.GetLexicon();
                op   = lp.GetOp();
                ctpp = (ChineseTreebankParserParams)op.tlpParams;
            }
            if (argMap.Contains("-rad"))
            {
                ctpp.useUnknownCharacterModel = true;
            }
            if (argMap.Contains("-lengthPenalty"))
            {
                ctpp.lengthPenalty = double.Parse((argMap["-lengthPenalty"])[0]);
            }
            if (argMap.Contains("-penaltyType"))
            {
                ctpp.penaltyType = System.Convert.ToInt32((argMap["-penaltyType"])[0]);
            }
            if (argMap.Contains("-lex"))
            {
                string[] lexArgs = (argMap["-lex"]);
                if (lexArgs.Length > 1)
                {
                    IIndex <string> wordIndex = new HashIndex <string>();
                    IIndex <string> tagIndex  = new HashIndex <string>();
                    lex = ctpp.Lex(op, wordIndex, tagIndex);
                    MemoryTreebank rawTrainTreebank = op.tlpParams.MemoryTreebank();
                    IFileFilter    trainFilt        = new NumberRangesFileFilter(lexArgs[1], false);
                    rawTrainTreebank.LoadPath(new File(lexArgs[0]), trainFilt);
                    log.Info("Done reading trees.");
                    MemoryTreebank trainTreebank;
                    if (argMap.Contains("-annotate"))
                    {
                        trainTreebank = new MemoryTreebank();
                        TreeAnnotator annotator = new TreeAnnotator(ctpp.HeadFinder(), ctpp, op);
                        foreach (Tree tree in rawTrainTreebank)
                        {
                            tree = annotator.TransformTree(tree);
                            trainTreebank.Add(tree);
                        }
                        log.Info("Done annotating trees.");
                    }
                    else
                    {
                        trainTreebank = rawTrainTreebank;
                    }
                    lex.InitializeTraining(trainTreebank.Count);
                    lex.Train(trainTreebank);
                    lex.FinishTraining();
                    log.Info("Done training lexicon.");
                    if (lexArgs.Length == 3)
                    {
                        string filename = lexArgs.Length == 3 ? lexArgs[2] : "parsers/chineseCharLex.ser.gz";
                        log.Info("Writing lexicon in serialized format to file " + filename + " ");
                        System.Console.Error.Flush();
                        ObjectOutputStream @out = IOUtils.WriteStreamFromString(filename);
                        @out.WriteObject(lex);
                        @out.Close();
                        log.Info("done.");
                    }
                }
                else
                {
                    string lexFile = lexArgs.Length == 1 ? lexArgs[0] : "parsers/chineseCharLex.ser.gz";
                    log.Info("Reading Lexicon from file " + lexFile);
                    ObjectInputStream @in = IOUtils.ReadStreamFromString(lexFile);
                    try
                    {
                        lex = (ILexicon)@in.ReadObject();
                    }
                    catch (TypeLoadException)
                    {
                        throw new Exception("Bad serialized file: " + lexFile);
                    }
                    @in.Close();
                }
            }
            if (argMap.Contains("-test"))
            {
                bool segmentWords = ctpp.segment;
                bool parse        = lp != null;
                System.Diagnostics.Debug.Assert((parse || segmentWords));
                //      WordCatConstituent.collinizeWords = argMap.containsKey("-collinizeWords");
                //      WordCatConstituent.collinizeTags = argMap.containsKey("-collinizeTags");
                IWordSegmenter seg = null;
                if (segmentWords)
                {
                    seg = (IWordSegmenter)lex;
                }
                string[]       testArgs     = (argMap["-test"]);
                MemoryTreebank testTreebank = op.tlpParams.MemoryTreebank();
                IFileFilter    testFilt     = new NumberRangesFileFilter(testArgs[1], false);
                testTreebank.LoadPath(new File(testArgs[0]), testFilt);
                ITreeTransformer          subcategoryStripper = op.tlpParams.SubcategoryStripper();
                ITreeTransformer          collinizer          = ctpp.Collinizer();
                WordCatEquivalenceClasser eqclass             = new WordCatEquivalenceClasser();
                WordCatEqualityChecker    eqcheck             = new WordCatEqualityChecker();
                EquivalenceClassEval      basicEval           = new EquivalenceClassEval(eqclass, eqcheck, "basic");
                EquivalenceClassEval      collinsEval         = new EquivalenceClassEval(eqclass, eqcheck, "collinized");
                IList <string>            evalTypes           = new List <string>(3);
                bool goodPOS = false;
                if (segmentWords)
                {
                    evalTypes.Add(WordCatConstituent.wordType);
                    if (ctpp.segmentMarkov && !parse)
                    {
                        evalTypes.Add(WordCatConstituent.tagType);
                        goodPOS = true;
                    }
                }
                if (parse)
                {
                    evalTypes.Add(WordCatConstituent.tagType);
                    evalTypes.Add(WordCatConstituent.catType);
                    if (combo)
                    {
                        evalTypes.Add(WordCatConstituent.wordType);
                        goodPOS = true;
                    }
                }
                TreeToBracketProcessor proc = new TreeToBracketProcessor(evalTypes);
                log.Info("Testing...");
                foreach (Tree goldTop in testTreebank)
                {
                    Tree             gold         = goldTop.FirstChild();
                    IList <IHasWord> goldSentence = gold.YieldHasWord();
                    if (goldSentence.Count > maxLength)
                    {
                        log.Info("Skipping sentence; too long: " + goldSentence.Count);
                        continue;
                    }
                    else
                    {
                        log.Info("Processing sentence; length: " + goldSentence.Count);
                    }
                    IList <IHasWord> s;
                    if (segmentWords)
                    {
                        StringBuilder goldCharBuf = new StringBuilder();
                        foreach (IHasWord aGoldSentence in goldSentence)
                        {
                            StringLabel word = (StringLabel)aGoldSentence;
                            goldCharBuf.Append(word.Value());
                        }
                        string goldChars = goldCharBuf.ToString();
                        s = seg.Segment(goldChars);
                    }
                    else
                    {
                        s = goldSentence;
                    }
                    Tree tree;
                    if (parse)
                    {
                        tree = lp.ParseTree(s);
                        if (tree == null)
                        {
                            throw new Exception("PARSER RETURNED NULL!!!");
                        }
                    }
                    else
                    {
                        tree = Edu.Stanford.Nlp.Trees.Trees.ToFlatTree(s);
                        tree = subcategoryStripper.TransformTree(tree);
                    }
                    if (pw != null)
                    {
                        if (parse)
                        {
                            tree.PennPrint(pw);
                        }
                        else
                        {
                            IEnumerator sentIter = s.GetEnumerator();
                            for (; ;)
                            {
                                Word word = (Word)sentIter.Current;
                                pw.Print(word.Word());
                                if (sentIter.MoveNext())
                                {
                                    pw.Print(" ");
                                }
                                else
                                {
                                    break;
                                }
                            }
                        }
                        pw.Println();
                    }
                    if (eval)
                    {
                        ICollection ourBrackets;
                        ICollection goldBrackets;
                        ourBrackets  = proc.AllBrackets(tree);
                        goldBrackets = proc.AllBrackets(gold);
                        if (goodPOS)
                        {
                            Sharpen.Collections.AddAll(ourBrackets, TreeToBracketProcessor.CommonWordTagTypeBrackets(tree, gold));
                            Sharpen.Collections.AddAll(goldBrackets, TreeToBracketProcessor.CommonWordTagTypeBrackets(gold, tree));
                        }
                        basicEval.Eval(ourBrackets, goldBrackets);
                        System.Console.Out.WriteLine("\nScores:");
                        basicEval.DisplayLast();
                        Tree collinsTree = collinizer.TransformTree(tree);
                        Tree collinsGold = collinizer.TransformTree(gold);
                        ourBrackets  = proc.AllBrackets(collinsTree);
                        goldBrackets = proc.AllBrackets(collinsGold);
                        if (goodPOS)
                        {
                            Sharpen.Collections.AddAll(ourBrackets, TreeToBracketProcessor.CommonWordTagTypeBrackets(collinsTree, collinsGold));
                            Sharpen.Collections.AddAll(goldBrackets, TreeToBracketProcessor.CommonWordTagTypeBrackets(collinsGold, collinsTree));
                        }
                        collinsEval.Eval(ourBrackets, goldBrackets);
                        System.Console.Out.WriteLine("\nCollinized scores:");
                        collinsEval.DisplayLast();
                        System.Console.Out.WriteLine();
                    }
                }
                if (eval)
                {
                    basicEval.Display();
                    System.Console.Out.WriteLine();
                    collinsEval.Display();
                }
            }
        }
Exemplo n.º 17
0
 /// <exception cref="System.IO.IOException"/>
 public virtual void Close()
 {
     oos.Close();
 }