C# (CSharp) ITreebankLangParserParams.TransformTree Examples

Programming Language: C# (CSharp)

Method/Function: TransformTree

Examples at hotexamples.com: 4

C# (CSharp) ITreebankLangParserParams.TransformTree - 4 examples found. These are the top rated real world C# (CSharp) examples of ITreebankLangParserParams.TransformTree extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

TreebankLanguagePack(14)

Pw(10)

DiskTreebank(9)

HeadFinder(9)

SetInputEncoding(4)

TransformTree(4)

Collinizer(3)

SetOutputEncoding(3)

SetOptionFlag(2)

MLEDependencyGrammarSmoothingParams(1)

MemoryTreebank(1)

SisterSplitters(1)

TestMemoryTreebank(1)

TypedDependencyHeadFinder(1)

Example #1

Show file

File: FactoredLexicon.cs Project: zerouid/Stanford.CoreNLP.NET

        private static IList <FactoredLexiconEvent> GetTuningSet(Treebank devTreebank, Edu.Stanford.Nlp.Parser.Lexparser.FactoredLexicon lexicon, ITreebankLangParserParams tlpp)
        {
            IList <Tree> devTrees = new List <Tree>(3000);

            foreach (Tree tree in devTreebank)
            {
                foreach (Tree subTree in tree)
                {
                    if (!subTree.IsLeaf())
                    {
                        tlpp.TransformTree(subTree, tree);
                    }
                }
                devTrees.Add(tree);
            }
            IList <FactoredLexiconEvent> tuningSet = TreebankToLexiconEvents(devTrees, lexicon);

            return(tuningSet);
        }

Example #2

Show file

        //  private static String stripTag(String tag) {
        //    if (tag.startsWith("DT")) {
        //      String newTag = tag.substring(2, tag.length());
        //      return newTag.length() > 0 ? newTag : tag;
        //    }
        //    return tag;
        //  }
        /// <param name="args"/>
        public static void Main(string[] args)
        {
            if (args.Length != 3)
            {
                System.Console.Error.Printf("Usage: java %s language filename features%n", typeof(TreebankFactoredLexiconStats).FullName);
                System.Environment.Exit(-1);
            }
            Language language = Language.ValueOf(args[0]);
            ITreebankLangParserParams tlpp = language.@params;

            if (language.Equals(Language.Arabic))
            {
                string[] options = new string[] { "-arabicFactored" };
                tlpp.SetOptionFlag(options, 0);
            }
            else
            {
                string[] options = new string[] { "-frenchFactored" };
                tlpp.SetOptionFlag(options, 0);
            }
            Treebank tb = tlpp.DiskTreebank();

            tb.LoadPath(args[1]);
            MorphoFeatureSpecification morphoSpec = language.Equals(Language.Arabic) ? new ArabicMorphoFeatureSpecification() : new FrenchMorphoFeatureSpecification();

            string[] features = args[2].Trim().Split(",");
            foreach (string feature in features)
            {
                morphoSpec.Activate(MorphoFeatureSpecification.MorphoFeatureType.ValueOf(feature));
            }
            // Counters
            ICounter <string> wordTagCounter  = new ClassicCounter <string>(30000);
            ICounter <string> morphTagCounter = new ClassicCounter <string>(500);
            //    Counter<String> signatureTagCounter = new ClassicCounter<String>();
            ICounter <string> morphCounter           = new ClassicCounter <string>(500);
            ICounter <string> wordCounter            = new ClassicCounter <string>(30000);
            ICounter <string> tagCounter             = new ClassicCounter <string>(300);
            ICounter <string> lemmaCounter           = new ClassicCounter <string>(25000);
            ICounter <string> lemmaTagCounter        = new ClassicCounter <string>(25000);
            ICounter <string> richTagCounter         = new ClassicCounter <string>(1000);
            ICounter <string> reducedTagCounter      = new ClassicCounter <string>(500);
            ICounter <string> reducedTagLemmaCounter = new ClassicCounter <string>(500);
            IDictionary <string, ICollection <string> > wordLemmaMap           = Generics.NewHashMap();
            TwoDimensionalIntCounter <string, string>   lemmaReducedTagCounter = new TwoDimensionalIntCounter <string, string>(30000);
            TwoDimensionalIntCounter <string, string>   reducedTagTagCounter   = new TwoDimensionalIntCounter <string, string>(500);
            TwoDimensionalIntCounter <string, string>   tagReducedTagCounter   = new TwoDimensionalIntCounter <string, string>(300);
            int numTrees = 0;

            foreach (Tree tree in tb)
            {
                foreach (Tree subTree in tree)
                {
                    if (!subTree.IsLeaf())
                    {
                        tlpp.TransformTree(subTree, tree);
                    }
                }
                IList <ILabel> pretermList = tree.PreTerminalYield();
                IList <ILabel> yield       = tree.Yield();
                System.Diagnostics.Debug.Assert(yield.Count == pretermList.Count);
                int yieldLen = yield.Count;
                for (int i = 0; i < yieldLen; ++i)
                {
                    string tag   = pretermList[i].Value();
                    string word  = yield[i].Value();
                    string morph = ((CoreLabel)yield[i]).OriginalText();
                    // Note: if there is no lemma, then we use the surface form.
                    Pair <string, string> lemmaTag = MorphoFeatureSpecification.SplitMorphString(word, morph);
                    string lemma   = lemmaTag.First();
                    string richTag = lemmaTag.Second();
                    // WSGDEBUG
                    if (tag.Contains("MW"))
                    {
                        lemma += "-MWE";
                    }
                    lemmaCounter.IncrementCount(lemma);
                    lemmaTagCounter.IncrementCount(lemma + tag);
                    richTagCounter.IncrementCount(richTag);
                    string reducedTag = morphoSpec.StrToFeatures(richTag).ToString();
                    reducedTagCounter.IncrementCount(reducedTag);
                    reducedTagLemmaCounter.IncrementCount(reducedTag + lemma);
                    wordTagCounter.IncrementCount(word + tag);
                    morphTagCounter.IncrementCount(morph + tag);
                    morphCounter.IncrementCount(morph);
                    wordCounter.IncrementCount(word);
                    tagCounter.IncrementCount(tag);
                    reducedTag = reducedTag.Equals(string.Empty) ? "NONE" : reducedTag;
                    if (wordLemmaMap.Contains(word))
                    {
                        wordLemmaMap[word].Add(lemma);
                    }
                    else
                    {
                        ICollection <string> lemmas = Generics.NewHashSet(1);
                        wordLemmaMap[word] = lemmas;
                    }
                    lemmaReducedTagCounter.IncrementCount(lemma, reducedTag);
                    reducedTagTagCounter.IncrementCount(lemma + reducedTag, tag);
                    tagReducedTagCounter.IncrementCount(tag, reducedTag);
                }
                ++numTrees;
            }
            // Barf...
            System.Console.Out.WriteLine("Language: " + language.ToString());
            System.Console.Out.Printf("#trees:\t%d%n", numTrees);
            System.Console.Out.Printf("#tokens:\t%d%n", (int)wordCounter.TotalCount());
            System.Console.Out.Printf("#words:\t%d%n", wordCounter.KeySet().Count);
            System.Console.Out.Printf("#tags:\t%d%n", tagCounter.KeySet().Count);
            System.Console.Out.Printf("#wordTagPairs:\t%d%n", wordTagCounter.KeySet().Count);
            System.Console.Out.Printf("#lemmas:\t%d%n", lemmaCounter.KeySet().Count);
            System.Console.Out.Printf("#lemmaTagPairs:\t%d%n", lemmaTagCounter.KeySet().Count);
            System.Console.Out.Printf("#feattags:\t%d%n", reducedTagCounter.KeySet().Count);
            System.Console.Out.Printf("#feattag+lemmas:\t%d%n", reducedTagLemmaCounter.KeySet().Count);
            System.Console.Out.Printf("#richtags:\t%d%n", richTagCounter.KeySet().Count);
            System.Console.Out.Printf("#richtag+lemma:\t%d%n", morphCounter.KeySet().Count);
            System.Console.Out.Printf("#richtag+lemmaTagPairs:\t%d%n", morphTagCounter.KeySet().Count);
            // Extra
            System.Console.Out.WriteLine("==================");
            StringBuilder sbNoLemma    = new StringBuilder();
            StringBuilder sbMultLemmas = new StringBuilder();

            foreach (KeyValuePair <string, ICollection <string> > wordLemmas in wordLemmaMap)
            {
                string word = wordLemmas.Key;
                ICollection <string> lemmas = wordLemmas.Value;
                if (lemmas.Count == 0)
                {
                    sbNoLemma.Append("NO LEMMAS FOR WORD: " + word + "\n");
                    continue;
                }
                if (lemmas.Count > 1)
                {
                    sbMultLemmas.Append("MULTIPLE LEMMAS: " + word + " " + SetToString(lemmas) + "\n");
                    continue;
                }
                string lemma = lemmas.GetEnumerator().Current;
                ICollection <string> reducedTags = lemmaReducedTagCounter.GetCounter(lemma).KeySet();
                if (reducedTags.Count > 1)
                {
                    System.Console.Out.Printf("%s --> %s%n", word, lemma);
                    foreach (string reducedTag in reducedTags)
                    {
                        int    count   = lemmaReducedTagCounter.GetCount(lemma, reducedTag);
                        string posTags = SetToString(reducedTagTagCounter.GetCounter(lemma + reducedTag).KeySet());
                        System.Console.Out.Printf("\t%s\t%d\t%s%n", reducedTag, count, posTags);
                    }
                    System.Console.Out.WriteLine();
                }
            }
            System.Console.Out.WriteLine("==================");
            System.Console.Out.WriteLine(sbNoLemma.ToString());
            System.Console.Out.WriteLine(sbMultLemmas.ToString());
            System.Console.Out.WriteLine("==================");
            IList <string> tags = new List <string>(tagReducedTagCounter.FirstKeySet());

            tags.Sort();
            foreach (string tag_1 in tags)
            {
                System.Console.Out.WriteLine(tag_1);
                ICollection <string> reducedTags = tagReducedTagCounter.GetCounter(tag_1).KeySet();
                foreach (string reducedTag in reducedTags)
                {
                    int count = tagReducedTagCounter.GetCount(tag_1, reducedTag);
                    //        reducedTag = reducedTag.equals("") ? "NONE" : reducedTag;
                    System.Console.Out.Printf("\t%s\t%d%n", reducedTag, count);
                }
                System.Console.Out.WriteLine();
            }
            System.Console.Out.WriteLine("==================");
        }

Example #3

Show file

File: FactoredLexicon.cs Project: zerouid/Stanford.CoreNLP.NET

        /// <param name="args"/>
        public static void Main(string[] args)
        {
            if (args.Length != 4)
            {
                System.Console.Error.Printf("Usage: java %s language features train_file dev_file%n", typeof(Edu.Stanford.Nlp.Parser.Lexparser.FactoredLexicon).FullName);
                System.Environment.Exit(-1);
            }
            // Command line options
            Language language = Language.ValueOf(args[0]);
            ITreebankLangParserParams tlpp = language.@params;
            Treebank trainTreebank         = tlpp.DiskTreebank();

            trainTreebank.LoadPath(args[2]);
            Treebank devTreebank = tlpp.DiskTreebank();

            devTreebank.LoadPath(args[3]);
            MorphoFeatureSpecification morphoSpec;
            Options options = GetOptions(language);

            if (language.Equals(Language.Arabic))
            {
                morphoSpec = new ArabicMorphoFeatureSpecification();
                string[] languageOptions = new string[] { "-arabicFactored" };
                tlpp.SetOptionFlag(languageOptions, 0);
            }
            else
            {
                if (language.Equals(Language.French))
                {
                    morphoSpec = new FrenchMorphoFeatureSpecification();
                    string[] languageOptions = new string[] { "-frenchFactored" };
                    tlpp.SetOptionFlag(languageOptions, 0);
                }
                else
                {
                    throw new NotSupportedException();
                }
            }
            string featureList = args[1];

            string[] features = featureList.Trim().Split(",");
            foreach (string feature in features)
            {
                morphoSpec.Activate(MorphoFeatureSpecification.MorphoFeatureType.ValueOf(feature));
            }
            System.Console.Out.WriteLine("Language: " + language.ToString());
            System.Console.Out.WriteLine("Features: " + args[1]);
            // Create word and tag indices
            // Save trees in a collection since the interface requires that....
            System.Console.Out.Write("Loading training trees...");
            IList <Tree>    trainTrees = new List <Tree>(19000);
            IIndex <string> wordIndex  = new HashIndex <string>();
            IIndex <string> tagIndex   = new HashIndex <string>();

            foreach (Tree tree in trainTreebank)
            {
                foreach (Tree subTree in tree)
                {
                    if (!subTree.IsLeaf())
                    {
                        tlpp.TransformTree(subTree, tree);
                    }
                }
                trainTrees.Add(tree);
            }
            System.Console.Out.Printf("Done! (%d trees)%n", trainTrees.Count);
            // Setup and train the lexicon.
            System.Console.Out.Write("Collecting sufficient statistics for lexicon...");
            Edu.Stanford.Nlp.Parser.Lexparser.FactoredLexicon lexicon = new Edu.Stanford.Nlp.Parser.Lexparser.FactoredLexicon(options, morphoSpec, wordIndex, tagIndex);
            lexicon.InitializeTraining(trainTrees.Count);
            lexicon.Train(trainTrees, null);
            lexicon.FinishTraining();
            System.Console.Out.WriteLine("Done!");
            trainTrees = null;
            // Load the tuning set
            System.Console.Out.Write("Loading tuning set...");
            IList <FactoredLexiconEvent> tuningSet = GetTuningSet(devTreebank, lexicon, tlpp);

            System.Console.Out.Printf("...Done! (%d events)%n", tuningSet.Count);
            // Print the probabilities that we obtain
            // TODO(spenceg): Implement tagging accuracy with FactLex
            int nCorrect             = 0;
            ICounter <string> errors = new ClassicCounter <string>();

            foreach (FactoredLexiconEvent @event in tuningSet)
            {
                IEnumerator <IntTaggedWord> itr = lexicon.RuleIteratorByWord(@event.Word(), @event.GetLoc(), @event.FeatureStr());
                ICounter <int> logScores        = new ClassicCounter <int>();
                bool           noRules          = true;
                int            goldTagId        = -1;
                while (itr.MoveNext())
                {
                    noRules = false;
                    IntTaggedWord iTW = itr.Current;
                    if (iTW.Tag() == @event.TagId())
                    {
                        log.Info("GOLD-");
                        goldTagId = iTW.Tag();
                    }
                    float tagScore = lexicon.Score(iTW, @event.GetLoc(), @event.Word(), @event.FeatureStr());
                    logScores.IncrementCount(iTW.Tag(), tagScore);
                }
                if (noRules)
                {
                    System.Console.Error.Printf("NO TAGGINGS: %s %s%n", @event.Word(), @event.FeatureStr());
                }
                else
                {
                    // Score the tagging
                    int hypTagId = Counters.Argmax(logScores);
                    if (hypTagId == goldTagId)
                    {
                        ++nCorrect;
                    }
                    else
                    {
                        string goldTag = goldTagId < 0 ? "UNSEEN" : lexicon.tagIndex.Get(goldTagId);
                        errors.IncrementCount(goldTag);
                    }
                }
                log.Info();
            }
            // Output accuracy
            double acc = (double)nCorrect / (double)tuningSet.Count;

            System.Console.Error.Printf("%n%nACCURACY: %.2f%n%n", acc * 100.0);
            log.Info("% of errors by type:");
            IList <string> biggestKeys = new List <string>(errors.KeySet());

            biggestKeys.Sort(Counters.ToComparator(errors, false, true));
            Counters.Normalize(errors);
            foreach (string key in biggestKeys)
            {
                System.Console.Error.Printf("%s\t%.2f%n", key, errors.GetCount(key) * 100.0);
            }
        }

Example #4

Show file

        /// <summary>Do the category splitting of the tree passed in.</summary>
        /// <remarks>
        /// Do the category splitting of the tree passed in.
        /// This is initially called on the root node of a tree, and it recursively
        /// calls itself on children.  A depth first left-to-right traversal is
        /// done whereby a tree node's children are first transformed and then
        /// the parent is transformed.  At the time of calling, the original root
        /// always sits above the current node.  This routine can be assumed to,
        /// and does, change the tree passed in: it destructively modifies tree nodes,
        /// and makes new tree structure when it needs to.
        /// </remarks>
        /// <param name="t">The tree node to subcategorize.</param>
        /// <param name="root">
        /// The root of the tree.  It must contain
        /// <paramref name="t"/>
        /// or
        /// this code will throw a NullPointerException.
        /// </param>
        /// <returns>The annotated tree.</returns>
        private Tree TransformTreeHelper(Tree t, Tree root)
        {
            if (t == null)
            {
                // handle null
                return(null);
            }
            if (t.IsLeaf())
            {
                //No need to change the label
                return(t);
            }
            string cat = t.Label().Value();
            Tree   parent;
            string parentStr;
            string grandParentStr;

            if (root == null || t.Equals(root))
            {
                parent    = null;
                parentStr = string.Empty;
            }
            else
            {
                parent    = t.Parent(root);
                parentStr = parent.Label().Value();
            }
            if (parent == null || parent.Equals(root))
            {
                grandParentStr = string.Empty;
            }
            else
            {
                grandParentStr = parent.Parent(root).Label().Value();
            }
            string baseParentStr      = tlpParams.TreebankLanguagePack().BasicCategory(parentStr);
            string baseGrandParentStr = tlpParams.TreebankLanguagePack().BasicCategory(grandParentStr);

            //System.out.println(t.label().value() + " " + parentStr + " " + grandParentStr);
            if (t.IsPreTerminal())
            {
                // handle tags
                Tree childResult = TransformTreeHelper(t.Children()[0], null);
                // recurse
                string word = childResult.Value();
                // would be nicer if Word/CWT ??
                if (!trainOptions.noTagSplit)
                {
                    if (trainOptions.tagPA)
                    {
                        string test = cat + "^" + baseParentStr;
                        if (!trainOptions.tagSelectiveSplit || trainOptions.splitters.Contains(test))
                        {
                            cat = test;
                        }
                    }
                    if (trainOptions.markUnaryTags && parent.NumChildren() == 1)
                    {
                        cat = cat + "^U";
                    }
                }
                // otherwise, leave the tags alone!
                // Label label = new CategoryWordTag(cat, word, cat);
                ILabel label = t.Label().LabelFactory().NewLabel(t.Label());
                label.SetValue(cat);
                if (label is IHasCategory)
                {
                    ((IHasCategory)label).SetCategory(cat);
                }
                if (label is IHasWord)
                {
                    ((IHasWord)label).SetWord(word);
                }
                if (label is IHasTag)
                {
                    ((IHasTag)label).SetTag(cat);
                }
                t.SetLabel(label);
                t.SetChild(0, childResult);
                // just in case word is changed
                if (trainOptions.noTagSplit)
                {
                    return(t);
                }
                else
                {
                    // language-specific transforms
                    return(tlpParams.TransformTree(t, root));
                }
            }
            // end isPreTerminal()
            // handle phrasal categories
            Tree[] kids = t.Children();
            for (int childNum = 0; childNum < kids.Length; childNum++)
            {
                Tree child       = kids[childNum];
                Tree childResult = TransformTreeHelper(child, root);
                // recursive call
                t.SetChild(childNum, childResult);
            }
            Tree headChild = hf.DetermineHead(t);

            if (headChild == null || headChild.Label() == null)
            {
                throw new Exception("TreeAnnotator: null head found for tree [suggesting incomplete/wrong HeadFinder]:\n" + t);
            }
            ILabel headLabel = headChild.Label();

            if (!(headLabel is IHasWord))
            {
                throw new Exception("TreeAnnotator: Head label lacks a Word annotation!");
            }
            if (!(headLabel is IHasTag))
            {
                throw new Exception("TreeAnnotator: Head label lacks a Tag annotation!");
            }
            string word_1 = ((IHasWord)headLabel).Word();
            string tag    = ((IHasTag)headLabel).Tag();
            // String baseTag = tlpParams.treebankLanguagePack().basicCategory(tag);
            string baseCat = tlpParams.TreebankLanguagePack().BasicCategory(cat);

            /* Sister annotation. Potential problem: if multiple sisters are
             * strong indicators for a single category's expansions.  This
             * happens concretely in the Chinese Treebank when NP (object)
             * has left sisters VV and AS.  Could lead to too much
             * sparseness.  The ideal solution would be to give the
             * splitting list an ordering, and take only the highest (~most
             * informative/reliable) sister annotation.
             */
            if (trainOptions.sisterAnnotate && !trainOptions.smoothing && baseParentStr.Length > 0)
            {
                IList <string> leftSis  = ListBasicCategories(SisterAnnotationStats.LeftSisterLabels(t, parent));
                IList <string> rightSis = ListBasicCategories(SisterAnnotationStats.RightSisterLabels(t, parent));
                IList <string> leftAnn  = new List <string>();
                IList <string> rightAnn = new List <string>();
                foreach (string s in leftSis)
                {
                    //s = baseCat+"=l="+tlpParams.treebankLanguagePack().basicCategory(s);
                    leftAnn.Add(baseCat + "=l=" + tlpParams.TreebankLanguagePack().BasicCategory(s));
                }
                //System.out.println("left-annotated test string " + s);
                foreach (string s_1 in rightSis)
                {
                    //s = baseCat+"=r="+tlpParams.treebankLanguagePack().basicCategory(s);
                    rightAnn.Add(baseCat + "=r=" + tlpParams.TreebankLanguagePack().BasicCategory(s_1));
                }
                for (IEnumerator <string> j = rightAnn.GetEnumerator(); j.MoveNext();)
                {
                }
                //System.out.println("new rightsis " + (String)j.next()); //debugging
                foreach (string annCat in trainOptions.sisterSplitters)
                {
                    //System.out.println("annotated test string " + annCat);
                    if (leftAnn.Contains(annCat) || rightAnn.Contains(annCat))
                    {
                        cat = cat + annCat.ReplaceAll("^" + baseCat, string.Empty);
                        break;
                    }
                }
            }
            if (trainOptions.Pa && !trainOptions.smoothing && baseParentStr.Length > 0)
            {
                string cat2 = baseCat + "^" + baseParentStr;
                if (!trainOptions.selectiveSplit || trainOptions.splitters.Contains(cat2))
                {
                    cat = cat + "^" + baseParentStr;
                }
            }
            if (trainOptions.gPA && !trainOptions.smoothing && grandParentStr.Length > 0)
            {
                if (trainOptions.selectiveSplit)
                {
                    string cat2 = baseCat + "^" + baseParentStr + "~" + baseGrandParentStr;
                    if (cat.Contains("^") && trainOptions.splitters.Contains(cat2))
                    {
                        cat = cat + "~" + baseGrandParentStr;
                    }
                }
                else
                {
                    cat = cat + "~" + baseGrandParentStr;
                }
            }
            if (trainOptions.markUnary > 0)
            {
                if (trainOptions.markUnary == 1 && kids.Length == 1 && kids[0].Depth() >= 2)
                {
                    cat = cat + "-U";
                }
                else
                {
                    if (trainOptions.markUnary == 2 && parent != null && parent.NumChildren() == 1 && t.Depth() >= 2)
                    {
                        cat = cat + "-u";
                    }
                }
            }
            if (trainOptions.rightRec && RightRec(t, baseCat))
            {
                cat = cat + "-R";
            }
            if (trainOptions.leftRec && LeftRec(t, baseCat))
            {
                cat = cat + "-L";
            }
            if (trainOptions.splitPrePreT && t.IsPrePreTerminal())
            {
                cat = cat + "-PPT";
            }
            //    Label label = new CategoryWordTag(cat, word, tag);
            ILabel label_1 = t.Label().LabelFactory().NewLabel(t.Label());

            label_1.SetValue(cat);
            if (label_1 is IHasCategory)
            {
                ((IHasCategory)label_1).SetCategory(cat);
            }
            if (label_1 is IHasWord)
            {
                ((IHasWord)label_1).SetWord(word_1);
            }
            if (label_1 is IHasTag)
            {
                ((IHasTag)label_1).SetTag(tag);
            }
            t.SetLabel(label_1);
            return(tlpParams.TransformTree(t, root));
        }