Beispiel #1
0
        /// <summary>
        ///  ReadObject wraps the loop to read each property in an object and call the corresponding
        ///  setter to set it.
        /// </summary>
        /// <remarks>
        ///  This works for classes only, as the instance can't be passed by ref.
        ///  Structs can serialize as arrays or directly implement the loop to decode themselves.
        ///  Setters take a reference to the instance so that the Dictionary can be static per type,
        ///  which is critical for acceptable performance.
        /// </remarks>
        /// <typeparam name="T">Type being deserialized</typeparam>
        /// <param name="reader">ITreeReader being read from</param>
        /// <param name="instance">T instance being initialized</param>
        /// <param name="setters">Dictionary of setter per field name</param>
        /// <param name="throwOnUnknown">Throw if property name not in setters found</param>
        public static void ReadObject <T>(this ITreeReader reader, T instance, Dictionary <string, Setter <T> > setters, bool throwOnUnknown = true) where T : ITreeSerializable
        {
            // Ensure object state reset before Read
            instance.Clear();

            reader.Expect(TreeToken.StartObject);
            reader.Read();

            while (reader.TokenType == TreeToken.PropertyName)
            {
                string propertyName = reader.ReadAsString();
                reader.Read();

                if (setters.TryGetValue(propertyName, out Setter <T> setter))
                {
                    setter(reader, instance);
                    reader.Read();
                }
                else
                {
                    if (throwOnUnknown)
                    {
                        throw new IOException($"Found unknown {typeof(T).Name} property, \"{propertyName}\", expected one of \"{String.Join("; ", setters.Keys)}\" at {reader.Position:n0} using {reader.GetType().Name}.");
                    }
                    else
                    {
                        reader.Skip();
                    }
                }
            }

            reader.Expect(TreeToken.EndObject);
            // EndObject must be left for caller to handle
        }
        /// <summary>Processes a single file containing AnCora XML trees.</summary>
        /// <remarks>
        /// Processes a single file containing AnCora XML trees. Returns MWE statistics for the trees in
        /// the file and the actual parsed trees.
        /// </remarks>
        private static Pair <TwoDimensionalCounter <string, string>, IList <Tree> > ProcessTreeFile(File file, SpanishXMLTreeReaderFactory trf, string encoding)
        {
            TwoDimensionalCounter <string, string> tagger = new TwoDimensionalCounter <string, string>();

            try
            {
                Reader       @in   = new BufferedReader(new InputStreamReader(new FileInputStream(file), encoding));
                ITreeReader  tr    = trf.NewTreeReader(file.GetPath(), @in);
                IList <Tree> trees = new List <Tree>();
                Tree         t;
                Tree         splitPoint;
                while ((t = tr.ReadTree()) != null)
                {
                    do
                    {
                        // We may need to split the current tree into multiple parts.
                        // (If not, a call to `split` with a `null` split-point is a
                        // no-op
                        splitPoint = FindSplitPoint(t);
                        Pair <Tree, Tree> split = Split(t, splitPoint);
                        Tree toAdd = split.First();
                        t = split.Second();
                        trees.Add(toAdd);
                        UpdateTagger(tagger, toAdd);
                    }while (splitPoint != null);
                }
                tr.Close();
                return(new Pair <TwoDimensionalCounter <string, string>, IList <Tree> >(tagger, trees));
            }
            catch (IOException e)
            {
                Sharpen.Runtime.PrintStackTrace(e);
                return(null);
            }
        }
Beispiel #3
0
        /// <summary>
        ///  Read existing items in an existing Dictionary instance.
        ///  Used with Dictionaries of specific things which may or may not be present in the file, like Table.Columns.
        /// </summary>
        /// <typeparam name="T">Type of values in Dictionary</typeparam>
        /// <param name="reader">ITreeReader to read from</param>
        /// <param name="dictionary">Dictionary containing items to read</param>
        /// <param name="throwOnUnknown">True to throw for property name not in Dictionary, false to quietly skip over it</param>
        public static void ReadDictionaryItems <T>(this ITreeReader reader, Dictionary <string, T> dictionary, bool throwOnUnknown = true) where T : ITreeSerializable
        {
            reader.Expect(TreeToken.StartObject);
            reader.Read();

            while (reader.TokenType == TreeToken.PropertyName)
            {
                string itemName = reader.ReadAsString();
                reader.Read();

                if (dictionary.TryGetValue(itemName, out T item))
                {
                    item.Read(reader);
                    reader.Read();
                }
                else
                {
                    if (throwOnUnknown)
                    {
                        throw new IOException($"Found unknown {typeof(T).Name} property \"{itemName}\", expected one of \"{String.Join("; ", dictionary.Keys)}\" at {reader.Position:n0} using {reader.GetType().Name}.");
                    }
                    else
                    {
                        reader.Skip();
                    }
                }
            }

            reader.Expect(TreeToken.EndObject);
        }
Beispiel #4
0
        public static Dictionary <string, T> ReadStringDictionary <T>(this ITreeReader reader, Func <T> ctor) where T : ITreeSerializable
        {
            if (reader.TokenType == TreeToken.Null)
            {
                return(null);
            }
            Dictionary <string, T> result = new Dictionary <string, T>();

            reader.Expect(TreeToken.StartObject);
            reader.Read();

            while (reader.TokenType == TreeToken.PropertyName)
            {
                string key = reader.ReadAsString();
                reader.Read();

                T value = ctor();
                value.Read(reader);

                result[key] = value;

                reader.Read();
            }

            reader.Expect(TreeToken.EndObject);
            return(result);
        }
        /// <exception cref="System.IO.IOException"/>
        public virtual void Process()
        {
            SpanishXMLTreeReaderFactory trf = new SpanishXMLTreeReaderFactory();
            Tree t;

            foreach (File file in fileList)
            {
                Reader      @in = new BufferedReader(new InputStreamReader(new FileInputStream(file), AncoraEncoding));
                ITreeReader tr  = trf.NewTreeReader(@in);
                // Tree reading will implicitly perform tree normalization for us
                while ((t = tr.ReadTree()) != null)
                {
                    // Update tagger with this tree
                    IList <CoreLabel> yield = t.TaggedLabeledYield();
                    foreach (CoreLabel leafLabel in yield)
                    {
                        if (leafLabel.Tag().Equals(SpanishTreeNormalizer.MwTag))
                        {
                            continue;
                        }
                        unigramTagger.IncrementCount(leafLabel.Word(), leafLabel.Tag());
                    }
                }
            }
        }
Beispiel #6
0
 public static void Load(this ITreeSerializable item, Stream stream, TreeFormat format, TreeSerializationSettings settings = null)
 {
     using (ITreeReader reader = TreeSerializer.Reader(format, stream, settings))
     {
         item.Read(reader);
     }
 }
Beispiel #7
0
        public void Read(ITreeReader reader)
        {
            Clear();

            // Read Tables, skipping unknown tables if Settings.Strict == false
            reader.ReadDictionaryItems(Tables, throwOnUnknown: reader.Settings.Strict);
        }
Beispiel #8
0
 private static void ResolveDummyTags(File treeFile, TwoDimensionalCounter <string, string> pretermLabel, TwoDimensionalCounter <string, string> unigramTagger)
 {
     try
     {
         BufferedReader     br  = new BufferedReader(new InputStreamReader(new FileInputStream(treeFile), "UTF-8"));
         ITreeReaderFactory trf = new FrenchTreeReaderFactory();
         ITreeReader        tr  = trf.NewTreeReader(br);
         PrintWriter        pw  = new PrintWriter(new TextWriter(new FileOutputStream(new File(treeFile + ".fixed")), false, "UTF-8"));
         int nTrees             = 0;
         for (Tree t; (t = tr.ReadTree()) != null; nTrees++)
         {
             TraverseAndFix(t, pretermLabel, unigramTagger);
             pw.Println(t.ToString());
         }
         pw.Close();
         tr.Close();
         System.Console.Out.WriteLine("Processed " + nTrees + " trees");
     }
     catch (UnsupportedEncodingException e)
     {
         Sharpen.Runtime.PrintStackTrace(e);
     }
     catch (FileNotFoundException e)
     {
         Sharpen.Runtime.PrintStackTrace(e);
     }
     catch (IOException e)
     {
         Sharpen.Runtime.PrintStackTrace(e);
     }
 }
Beispiel #9
0
        /// <param name="args">File to run on</param>
        public static void Main(string[] args)
        {
            if (args.Length < 1)
            {
                System.Console.Out.Printf("Usage: java %s tree_file%n", typeof(Edu.Stanford.Nlp.Trees.International.Negra.NegraPennTreeReaderFactory).FullName);
                return;
            }
            ITreebankLanguagePack tlp = new NegraPennLanguagePack();
            ITreeReaderFactory    trf = new Edu.Stanford.Nlp.Trees.International.Negra.NegraPennTreeReaderFactory(2, false, false, tlp);

            try
            {
                ITreeReader tr = trf.NewTreeReader(IOUtils.ReaderFromString(args[0], tlp.GetEncoding()));
                for (Tree t; (t = tr.ReadTree()) != null;)
                {
                    t.PennPrint();
                }
                tr.Close();
            }
            catch (UnsupportedEncodingException e)
            {
                Sharpen.Runtime.PrintStackTrace(e);
            }
            catch (FileNotFoundException e)
            {
                Sharpen.Runtime.PrintStackTrace(e);
            }
            catch (IOException e)
            {
                Sharpen.Runtime.PrintStackTrace(e);
            }
        }
Beispiel #10
0
 /// <summary>Load a collection of parse trees from a Reader.</summary>
 /// <remarks>
 /// Load a collection of parse trees from a Reader.
 /// Each tree may optionally be encased in parens to allow for Penn
 /// Treebank style trees.
 /// </remarks>
 /// <param name="r">
 /// The reader to read trees from.  (If you want it buffered,
 /// you should already have buffered it!)
 /// </param>
 /// <param name="id">
 /// An ID for where these files come from (arbitrary, but
 /// something like a filename.  Can be <code>null</code> for none.
 /// </param>
 public void Load(Reader r, string id)
 {
     try
     {
         // could throw an IO exception?
         ITreeReader tr        = TreeReaderFactory().NewTreeReader(r);
         int         sentIndex = 0;
         for (Tree pt; (pt = tr.ReadTree()) != null;)
         {
             if (pt.Label() is IHasIndex)
             {
                 // so we can trace where this tree came from
                 IHasIndex hi = (IHasIndex)pt.Label();
                 if (id != null)
                 {
                     hi.SetDocID(id);
                 }
                 hi.SetSentIndex(sentIndex);
             }
             parseTrees.Add(pt);
             sentIndex++;
         }
     }
     catch (IOException e)
     {
         log.Info("load IO Exception: " + e);
     }
 }
Beispiel #11
0
 public void Read(ITreeReader reader)
 {
     Array        = reader.ReadBlockArray <T>();
     Index        = 0;
     Count        = Array.Length;
     IsExpandable = false;
 }
Beispiel #12
0
        public static Dictionary <int, T> ReadIntDictionary <T>(this ITreeReader reader, Func <T> ctor) where T : ITreeSerializable
        {
            if (reader.TokenType == TreeToken.Null)
            {
                return(null);
            }
            Dictionary <int, T> result = new Dictionary <int, T>();

            reader.Expect(TreeToken.StartArray);
            reader.Read();

            int[] keys = reader.ReadBlockArray <int>();

            reader.Read();
            reader.Expect(TreeToken.StartArray);
            reader.Read();

            for (int i = 0; i < keys.Length; ++i)
            {
                int key = keys[i];

                T value = ctor();
                value.Read(reader);

                result[key] = value;

                reader.Read();
            }

            reader.Expect(TreeToken.EndArray);
            reader.Read();

            reader.Expect(TreeToken.EndArray);
            return(result);
        }
Beispiel #13
0
 /// <summary>
 ///  Verify the current token is a required value, otherwise throw a good exception.
 /// </summary>
 /// <param name="reader">ITreeReader to check</param>
 /// <param name="expected">Current TreeToken expected</param>
 public static void Expect(this ITreeReader reader, TreeToken expected)
 {
     if (reader.TokenType != expected)
     {
         throw new IOException($"{reader.GetType().Name} expected \"{expected}\" but found \"{reader.TokenType}\" at {reader.Position:n0}");
     }
 }
Beispiel #14
0
        public TreeDiagnosticsReader(ITreeReader inner)
        {
            Inner = inner;

            LastPropertyName = "<Database>";
            Path             = new Stack <TreeDiagnostics>();
            Tree             = Open();
        }
Beispiel #15
0
        /// <param name="args"/>
        public static void Main(string[] args)
        {
            if (args.Length != 1)
            {
                System.Console.Error.Printf("Usage: java %s tree_file%n", typeof(SplitMaker).FullName);
                System.Environment.Exit(-1);
            }
            ITreebankLanguagePack tlp = new HebrewTreebankLanguagePack();
            string inputFile          = args[0];
            File   treeFile           = new File(inputFile);

            try
            {
                ITreeReaderFactory trf     = new HebrewTreeReaderFactory();
                BufferedReader     br      = new BufferedReader(new InputStreamReader(new FileInputStream(treeFile), tlp.GetEncoding()));
                ITreeReader        tr      = trf.NewTreeReader(br);
                PrintWriter        pwDev   = new PrintWriter(new TextWriter(new FileOutputStream(inputFile + ".clean.dev"), false, tlp.GetEncoding()));
                PrintWriter        pwTrain = new PrintWriter(new TextWriter(new FileOutputStream(inputFile + ".clean.train"), false, tlp.GetEncoding()));
                PrintWriter        pwTest  = new PrintWriter(new TextWriter(new FileOutputStream(inputFile + ".clean.test"), false, tlp.GetEncoding()));
                int numTrees = 0;
                for (Tree t; ((t = tr.ReadTree()) != null); numTrees++)
                {
                    if (numTrees < 483)
                    {
                        pwDev.Println(t.ToString());
                    }
                    else
                    {
                        if (numTrees >= 483 && numTrees < 5724)
                        {
                            pwTrain.Println(t.ToString());
                        }
                        else
                        {
                            pwTest.Println(t.ToString());
                        }
                    }
                }
                tr.Close();
                pwDev.Close();
                pwTrain.Close();
                pwTest.Close();
                System.Console.Error.Printf("Processed %d trees.%n", numTrees);
            }
            catch (UnsupportedEncodingException e)
            {
                Sharpen.Runtime.PrintStackTrace(e);
            }
            catch (FileNotFoundException e)
            {
                Sharpen.Runtime.PrintStackTrace(e);
            }
            catch (IOException e)
            {
                Sharpen.Runtime.PrintStackTrace(e);
            }
        }
        /// <summary>For debugging.</summary>
        /// <param name="args"/>
        public static void Main(string[] args)
        {
            if (args.Length < 1)
            {
                System.Console.Error.Printf("Usage: java %s tree_file(s)%n%n", typeof(Edu.Stanford.Nlp.Trees.International.French.FrenchXMLTreeReader).FullName);
                System.Environment.Exit(-1);
            }
            IList <File> fileList = new List <File>();

            foreach (string arg in args)
            {
                fileList.Add(new File(arg));
            }
            ITreeReaderFactory trf             = new FrenchXMLTreeReaderFactory(false);
            int totalTrees                     = 0;
            ICollection <string> morphAnalyses = Generics.NewHashSet();

            try
            {
                foreach (File file in fileList)
                {
                    ITreeReader tr = trf.NewTreeReader(new BufferedReader(new InputStreamReader(new FileInputStream(file), "UTF-8")));
                    Tree        t;
                    int         numTrees;
                    string      canonicalFileName = Sharpen.Runtime.Substring(file.GetName(), 0, file.GetName().LastIndexOf('.'));
                    for (numTrees = 0; (t = tr.ReadTree()) != null; numTrees++)
                    {
                        string ftbID = ((CoreLabel)t.Label()).Get(typeof(CoreAnnotations.SentenceIDAnnotation));
                        System.Console.Out.Printf("%s-%s\t%s%n", canonicalFileName, ftbID, t.ToString());
                        IList <ILabel> leaves = t.Yield();
                        foreach (ILabel label in leaves)
                        {
                            if (label is CoreLabel)
                            {
                                morphAnalyses.Add(((CoreLabel)label).OriginalText());
                            }
                        }
                    }
                    tr.Close();
                    System.Console.Error.Printf("%s: %d trees%n", file.GetName(), numTrees);
                    totalTrees += numTrees;
                }
                //wsg2011: Print out the observed morphological analyses
                //      for(String analysis : morphAnalyses)
                //        log.info(analysis);
                System.Console.Error.Printf("%nRead %d trees%n", totalTrees);
            }
            catch (FileNotFoundException e)
            {
                Sharpen.Runtime.PrintStackTrace(e);
            }
            catch (IOException e)
            {
                Sharpen.Runtime.PrintStackTrace(e);
            }
        }
 public Sawyer(AddressQualityChecker addressQualityChecker, IGeocodeManager geocodeManager, ILocationCacheFactory locationCacheFactory, ILogger <Sawyer> logger, TreeParser treeParser, ITreeReader treeReader, ITreeWriter treeWriter)
 {
     this.addressQualityChecker = addressQualityChecker;
     this.geocodeManager        = geocodeManager;
     this.locationCache         = locationCacheFactory.Create();
     this.logger     = logger;
     this.treeParser = treeParser;
     this.treeReader = treeReader;
     this.treeWriter = treeWriter;
 }
        public void Read(ITreeReader reader)
        {
            reader.ReadObject(this, setters);

            if (_valueEndInPage != null)
            {
                Count = _valueEndInPage.Length;
                _lastNonEmptyIndex = Count - 1;
            }
        }
Beispiel #19
0
        public static void VerifySkip <T>(T value, TreeFormat format) where T : ITreeSerializable
        {
            // Test serialization details
            using (MemoryStream stream = new MemoryStream())
            {
                TreeSerializationSettings settings = new TreeSerializationSettings()
                {
                    LeaveStreamOpen = true
                };

                using (ITreeWriter writer = Writer(format, stream, settings))
                {
                    value.Write(writer);
                }

                long bytesWritten = stream.Position;

                // Read tokens individually and verify 'None' returned at end
                stream.Seek(0, SeekOrigin.Begin);
                using (ITreeReader reader = Reader(format, stream, settings))
                {
                    while (reader.Read())
                    {
                        // Verify each token type is coming back properly (no reading random bytes)
                        Assert.True((byte)reader.TokenType <= (byte)TreeToken.BlockArray);
                    }

                    Assert.Equal(TreeToken.None, reader.TokenType);
                    Assert.Equal(bytesWritten, stream.Position);
                }

                // Verify Skip once skips everything (each ITreeSerializable must be one value or one root array or object
                stream.Seek(0, SeekOrigin.Begin);
                using (ITreeReader reader = Reader(format, stream, settings))
                {
                    reader.Skip();
                    Assert.Equal(TreeToken.None, reader.TokenType);
                    Assert.Equal(bytesWritten, stream.Position);
                }

                // For objects, verify each property can be skipped correctly
                // Each Skip should read the value, so that the next token is the next PropertyName
                stream.Seek(0, SeekOrigin.Begin);
                using (ITreeReader reader = Reader(format, stream, settings))
                {
                    if (reader.TokenType == TreeToken.StartObject)
                    {
                        Empty empty = new Empty();
                        empty.Read(reader);

                        Assert.Equal(bytesWritten, stream.Position);
                    }
                }
            }
        }
Beispiel #20
0
        //Delete sentence-initial punctuation
        //Delete sentence final punctuation that is preceded by punctuation (first time)
        //Delete sentence final punctuation that is preceded by punctuation (second time)
        //Convert remaining sentence-final punctuation to either . if it is not [.!?]
        //Delete medial, sentence-final punctuation
        //Now move the sentence-final mark under SENT
        //For those trees that lack a sentence-final punc, add one.
        //Finally, delete these punctuation marks, which I can't seem to kill otherwise...
        //A bad MWADV tree in the training set
        // Not sure why this got a label of X.  Similar trees suggest it
        // should be A instead
        // This also seems to be mislabeled
        /// <param name="args"/>
        public static void Main(string[] args)
        {
            if (args.Length != 1)
            {
                log.Info("Usage: java " + typeof(Edu.Stanford.Nlp.International.French.Pipeline.FTBCorrector).FullName + " filename\n");
                System.Environment.Exit(-1);
            }
            ITreeTransformer tt = new Edu.Stanford.Nlp.International.French.Pipeline.FTBCorrector();
            File             f  = new File(args[0]);

            try
            {
                //These bad trees in the Candito training set should be thrown out:
                //  (ROOT (SENT (" ") (. .)))
                //  (ROOT (SENT (. .)))
                TregexPattern      pBadTree  = TregexPattern.Compile("@SENT <: @PUNC");
                TregexPattern      pBadTree2 = TregexPattern.Compile("@SENT <1 @PUNC <2 @PUNC !<3 __");
                BufferedReader     br        = new BufferedReader(new InputStreamReader(new FileInputStream(f), "UTF-8"));
                ITreeReaderFactory trf       = new FrenchTreeReaderFactory();
                ITreeReader        tr        = trf.NewTreeReader(br);
                int nTrees = 0;
                for (Tree t; (t = tr.ReadTree()) != null; nTrees++)
                {
                    TregexMatcher m  = pBadTree.Matcher(t);
                    TregexMatcher m2 = pBadTree2.Matcher(t);
                    if (m.Find() || m2.Find())
                    {
                        log.Info("Discarding tree: " + t.ToString());
                    }
                    else
                    {
                        Tree fixedT = tt.TransformTree(t);
                        System.Console.Out.WriteLine(fixedT.ToString());
                    }
                }
                tr.Close();
                System.Console.Error.Printf("Wrote %d trees%n", nTrees);
            }
            catch (UnsupportedEncodingException e)
            {
                Sharpen.Runtime.PrintStackTrace(e);
            }
            catch (FileNotFoundException e)
            {
                Sharpen.Runtime.PrintStackTrace(e);
            }
            catch (IOException e)
            {
                Sharpen.Runtime.PrintStackTrace(e);
            }
            catch (TregexParseException e)
            {
                Sharpen.Runtime.PrintStackTrace(e);
            }
        }
Beispiel #21
0
        /// <summary>
        /// Deserialize record from XML using short class name without namespace
        /// for the root XML element.
        /// </summary>
        public static void ParseXml(this Key obj, string xmlString)
        {
            ITreeReader reader = new XmlTreeReader(xmlString);

            // Root node of serialized XML must be the same as mapped class name without namespace
            var         mappedFullName = obj.GetType().Name;
            ITreeReader recordNodes    = reader.ReadElement(mappedFullName);

            // Deserialize from XML nodes inside the root node
            obj.DeserializeFrom(recordNodes);
        }
Beispiel #22
0
        /// <param name="args"/>
        public static void Main(string[] args)
        {
            if (args.Length != 1)
            {
                System.Console.Error.Printf("Usage: java %s file%n", typeof(MWEPreprocessor).FullName);
                System.Environment.Exit(-1);
            }
            File treeFile = new File(args[0]);
            TwoDimensionalCounter <string, string> labelTerm     = new TwoDimensionalCounter <string, string>();
            TwoDimensionalCounter <string, string> termLabel     = new TwoDimensionalCounter <string, string>();
            TwoDimensionalCounter <string, string> labelPreterm  = new TwoDimensionalCounter <string, string>();
            TwoDimensionalCounter <string, string> pretermLabel  = new TwoDimensionalCounter <string, string>();
            TwoDimensionalCounter <string, string> unigramTagger = new TwoDimensionalCounter <string, string>();

            try
            {
                BufferedReader     br  = new BufferedReader(new InputStreamReader(new FileInputStream(treeFile), "UTF-8"));
                ITreeReaderFactory trf = new FrenchTreeReaderFactory();
                ITreeReader        tr  = trf.NewTreeReader(br);
                for (Tree t; (t = tr.ReadTree()) != null;)
                {
                    CountMWEStatistics(t, unigramTagger, labelPreterm, pretermLabel, labelTerm, termLabel);
                }
                tr.Close();
                //Closes the underlying reader
                System.Console.Out.WriteLine("Generating {MWE Type -> Terminal}");
                PrintCounter(labelTerm, "label_term.csv");
                System.Console.Out.WriteLine("Generating {Terminal -> MWE Type}");
                PrintCounter(termLabel, "term_label.csv");
                System.Console.Out.WriteLine("Generating {MWE Type -> POS sequence}");
                PrintCounter(labelPreterm, "label_pos.csv");
                System.Console.Out.WriteLine("Generating {POS sequence -> MWE Type}");
                PrintCounter(pretermLabel, "pos_label.csv");
                System.Console.Out.WriteLine("Resolving DUMMY tags");
                ResolveDummyTags(treeFile, pretermLabel, unigramTagger);
                System.Console.Out.WriteLine("#Unknown Word Types: " + MWEPreprocessor.ManualUWModel.nUnknownWordTypes);
                System.Console.Out.WriteLine("#Missing POS: " + nMissingPOS);
                System.Console.Out.WriteLine("#Missing Phrasal: " + nMissingPhrasal);
                System.Console.Out.WriteLine("Done!");
            }
            catch (UnsupportedEncodingException e)
            {
                Sharpen.Runtime.PrintStackTrace(e);
            }
            catch (FileNotFoundException e)
            {
                Sharpen.Runtime.PrintStackTrace(e);
            }
            catch (IOException e)
            {
                Sharpen.Runtime.PrintStackTrace(e);
            }
        }
Beispiel #23
0
        public void Read(ITreeReader reader)
        {
            _chapters = reader.ReadList <ArraySliceChapter <T> >(() => new ArraySliceChapter <T>());

            int chapterCount = _chapters?.Count ?? 0;

            if (chapterCount > 0)
            {
                // Infer count; N - 1 full chapters and the actual row count from the last
                _count = ArraySliceChapter <T> .ChapterRowCount * (chapterCount - 1) + _chapters[chapterCount - 1].Count;
            }
        }
Beispiel #24
0
        /// <param name="args"/>
        public static void Main(string[] args)
        {
            if (args.Length != 2)
            {
                System.Console.Error.Printf("Usage: java %s tree_file morfette_tnt_file%n", typeof(MungeTreesWithMorfetteAnalyses).FullName);
                System.Environment.Exit(-1);
            }
            string             treeFile     = args[0];
            string             morfetteFile = args[1];
            ITreeReaderFactory trf          = new FrenchTreeReaderFactory();

            try
            {
                ITreeReader tr = trf.NewTreeReader(new BufferedReader(new InputStreamReader(new FileInputStream(treeFile), "UTF-8")));
                IEnumerator <IList <CoreLabel> > morfetteItr = new MungeTreesWithMorfetteAnalyses.MorfetteFileIterator(morfetteFile);
                for (Tree tree; (tree = tr.ReadTree()) != null && morfetteItr.MoveNext();)
                {
                    IList <CoreLabel> analysis = morfetteItr.Current;
                    IList <ILabel>    yield    = tree.Yield();
                    System.Diagnostics.Debug.Assert(analysis.Count == yield.Count);
                    int yieldLen = yield.Count;
                    for (int i = 0; i < yieldLen; ++i)
                    {
                        CoreLabel tokenAnalysis = analysis[i];
                        ILabel    token         = yield[i];
                        string    lemma         = GetLemma(token.Value(), tokenAnalysis.Lemma());
                        string    newLeaf       = string.Format("%s%s%s%s%s", token.Value(), MorphoFeatureSpecification.MorphoMark, lemma, MorphoFeatureSpecification.LemmaMark, tokenAnalysis.Tag());
                        ((CoreLabel)token).SetValue(newLeaf);
                    }
                    System.Console.Out.WriteLine(tree.ToString());
                }
                if (tr.ReadTree() != null || morfetteItr.MoveNext())
                {
                    log.Info("WARNING: Uneven input files!");
                }
                tr.Close();
            }
            catch (UnsupportedEncodingException e)
            {
                Sharpen.Runtime.PrintStackTrace(e);
            }
            catch (FileNotFoundException e)
            {
                Sharpen.Runtime.PrintStackTrace(e);
            }
            catch (IOException e)
            {
                Sharpen.Runtime.PrintStackTrace(e);
            }
        }
Beispiel #25
0
        /// <summary>
        /// Read a single element containing atomic value (returns empty string if not found).
        /// Error message if more than one element with the specified name is present.
        /// </summary>
        public string ReadValueElement(string elementName)
        {
            ITreeReader elementReader = ReadElement(elementName);

            // Return value if element is present, otherwise null
            if (elementReader != null)
            {
                return(elementReader.ReadValue());
            }
            else
            {
                return(String.Empty);
            }
        }
        /// <param name="args"/>
        public static void Main(string[] args)
        {
            if (args.Length != 1)
            {
                System.Console.Error.Printf("Usage: java %s tree_file%n", typeof(TreeToMorfette).FullName);
                System.Environment.Exit(-1);
            }
            string             treeFile = args[0];
            ITreeReaderFactory trf      = new FrenchTreeReaderFactory();

            try
            {
                ITreeReader tr = trf.NewTreeReader(new BufferedReader(new InputStreamReader(new FileInputStream(treeFile), "UTF-8")));
                for (Tree tree1; (tree1 = tr.ReadTree()) != null;)
                {
                    IList <ILabel> pretermYield = tree1.PreTerminalYield();
                    IList <ILabel> yield        = tree1.Yield();
                    int            yieldLen     = yield.Count;
                    for (int i = 0; i < yieldLen; ++i)
                    {
                        CoreLabel             rawToken   = (CoreLabel)yield[i];
                        string                word       = rawToken.Value();
                        string                morphStr   = rawToken.OriginalText();
                        Pair <string, string> lemmaMorph = MorphoFeatureSpecification.SplitMorphString(word, morphStr);
                        string                lemma      = lemmaMorph.First();
                        string                morph      = lemmaMorph.Second();
                        if (morph == null || morph.Equals(string.Empty) || morph.Equals("XXX"))
                        {
                            morph = ((CoreLabel)pretermYield[i]).Value();
                        }
                        System.Console.Out.Printf("%s %s %s%n", word, lemma, morph);
                    }
                    System.Console.Out.WriteLine();
                }
                tr.Close();
            }
            catch (UnsupportedEncodingException e)
            {
                Sharpen.Runtime.PrintStackTrace(e);
            }
            catch (FileNotFoundException e)
            {
                Sharpen.Runtime.PrintStackTrace(e);
            }
            catch (IOException e)
            {
                Sharpen.Runtime.PrintStackTrace(e);
            }
        }
Beispiel #27
0
        /// <param name="args"/>
        public static void Main(string[] args)
        {
            Properties options = StringUtils.ArgsToProperties(args, argOptionDefs);

            if (!options.Contains(string.Empty) || options.Contains("help"))
            {
                log.Info(Usage());
                return;
            }
            bool retainNER = PropertiesUtils.GetBool(options, "ner", false);
            bool normalize = PropertiesUtils.GetBool(options, "normalize", true);
            File treeFile  = new File(options.GetProperty(string.Empty));
            TwoDimensionalCounter <string, string> labelTerm     = new TwoDimensionalCounter <string, string>();
            TwoDimensionalCounter <string, string> termLabel     = new TwoDimensionalCounter <string, string>();
            TwoDimensionalCounter <string, string> labelPreterm  = new TwoDimensionalCounter <string, string>();
            TwoDimensionalCounter <string, string> pretermLabel  = new TwoDimensionalCounter <string, string>();
            TwoDimensionalCounter <string, string> unigramTagger = new TwoDimensionalCounter <string, string>();

            try
            {
                BufferedReader     br  = new BufferedReader(new InputStreamReader(new FileInputStream(treeFile), "UTF-8"));
                ITreeReaderFactory trf = new SpanishTreeReaderFactory();
                ITreeReader        tr  = trf.NewTreeReader(br);
                for (Tree t; (t = tr.ReadTree()) != null;)
                {
                    UpdateTagger(unigramTagger, t);
                }
                tr.Close();
                //Closes the underlying reader
                System.Console.Out.WriteLine("Resolving DUMMY tags");
                ResolveDummyTags(treeFile, unigramTagger, retainNER, normalize ? new SpanishTreeNormalizer(true, false, false) : null);
                System.Console.Out.WriteLine("#Unknown Word Types: " + MultiWordPreprocessor.ManualUWModel.nUnknownWordTypes);
                System.Console.Out.WriteLine(string.Format("#Missing POS: %d (fixed: %d, %.2f%%)", nMissingPOS, nFixedPOS, (double)nFixedPOS / nMissingPOS * 100));
                System.Console.Out.WriteLine(string.Format("#Missing Phrasal: %d (fixed: %d, %.2f%%)", nMissingPhrasal, nFixedPhrasal, (double)nFixedPhrasal / nMissingPhrasal * 100));
                System.Console.Out.WriteLine("Done!");
            }
            catch (UnsupportedEncodingException e)
            {
                Sharpen.Runtime.PrintStackTrace(e);
            }
            catch (FileNotFoundException e)
            {
                Sharpen.Runtime.PrintStackTrace(e);
            }
            catch (IOException e)
            {
                Sharpen.Runtime.PrintStackTrace(e);
            }
        }
Beispiel #28
0
        public void Read(ITreeReader reader)
        {
            reader.ReadObject(this, setters);

            if (IsNull.Count == 0 && Values.Count > 0)
            {
                // Only wrote values means all values are non-null
                IsNull[Values.Count - 1] = false;
                IsNull.SetAll(false);
            }
            else if (IsNull.Count > 0 && Values.Count == 0)
            {
                // Only wrote nulls means all values are null
                Values[IsNull.Count - 1] = ArraySlice <byte> .Empty;
            }
        }
Beispiel #29
0
        public virtual ITreeReader NewTreeReader(Reader @in)
        {
            ITreeReader tr = null;

            if (noNormalization)
            {
                tr = new PennTreeReader(@in, new LabeledScoredTreeFactory(), new TreeNormalizer(), new ArabicTreebankTokenizer(@in));
            }
            else
            {
                tr = new PennTreeReader(@in, new LabeledScoredTreeFactory(), new ArabicTreeNormalizer(retainNPTmp, retainPRD, changeNoLabels, retainNPSbj, retainPPClr), new ArabicTreebankTokenizer(@in));
            }
            if (filterX)
            {
                tr = new FilteringTreeReader(tr, new ArabicTreeReaderFactory.XFilter());
            }
            return(tr);
        }
Beispiel #30
0
        public void Read(ITreeReader reader)
        {
            reader.ReadObject(this, setters);

            if (IsNull != null)
            {
                if (IsNull.Count == 0 && Values.Count > 0)
                {
                    // Only wrote values means all values are non-null
                    IsNull[Values.Count - 1] = false;
                    IsNull.SetAll(false);
                }
                else if (IsNull.Count > 0 && Values.Count == 0)
                {
                    // Only wrote nulls means all values are null
                    Values[IsNull.Count - 1] = default(T);
                }
            }
        }