private static IList <FactoredLexiconEvent> GetTuningSet(Treebank devTreebank, Edu.Stanford.Nlp.Parser.Lexparser.FactoredLexicon lexicon, ITreebankLangParserParams tlpp)
        {
            IList <Tree> devTrees = new List <Tree>(3000);

            foreach (Tree tree in devTreebank)
            {
                foreach (Tree subTree in tree)
                {
                    if (!subTree.IsLeaf())
                    {
                        tlpp.TransformTree(subTree, tree);
                    }
                }
                devTrees.Add(tree);
            }
            IList <FactoredLexiconEvent> tuningSet = TreebankToLexiconEvents(devTrees, lexicon);

            return(tuningSet);
        }
        /// <param name="args"/>
        public static void Main(string[] args)
        {
            if (args.Length != 4)
            {
                System.Console.Error.Printf("Usage: java %s language features train_file dev_file%n", typeof(Edu.Stanford.Nlp.Parser.Lexparser.FactoredLexicon).FullName);
                System.Environment.Exit(-1);
            }
            // Command line options
            Language language = Language.ValueOf(args[0]);
            ITreebankLangParserParams tlpp = language.@params;
            Treebank trainTreebank         = tlpp.DiskTreebank();

            trainTreebank.LoadPath(args[2]);
            Treebank devTreebank = tlpp.DiskTreebank();

            devTreebank.LoadPath(args[3]);
            MorphoFeatureSpecification morphoSpec;
            Options options = GetOptions(language);

            if (language.Equals(Language.Arabic))
            {
                morphoSpec = new ArabicMorphoFeatureSpecification();
                string[] languageOptions = new string[] { "-arabicFactored" };
                tlpp.SetOptionFlag(languageOptions, 0);
            }
            else
            {
                if (language.Equals(Language.French))
                {
                    morphoSpec = new FrenchMorphoFeatureSpecification();
                    string[] languageOptions = new string[] { "-frenchFactored" };
                    tlpp.SetOptionFlag(languageOptions, 0);
                }
                else
                {
                    throw new NotSupportedException();
                }
            }
            string featureList = args[1];

            string[] features = featureList.Trim().Split(",");
            foreach (string feature in features)
            {
                morphoSpec.Activate(MorphoFeatureSpecification.MorphoFeatureType.ValueOf(feature));
            }
            System.Console.Out.WriteLine("Language: " + language.ToString());
            System.Console.Out.WriteLine("Features: " + args[1]);
            // Create word and tag indices
            // Save trees in a collection since the interface requires that....
            System.Console.Out.Write("Loading training trees...");
            IList <Tree>    trainTrees = new List <Tree>(19000);
            IIndex <string> wordIndex  = new HashIndex <string>();
            IIndex <string> tagIndex   = new HashIndex <string>();

            foreach (Tree tree in trainTreebank)
            {
                foreach (Tree subTree in tree)
                {
                    if (!subTree.IsLeaf())
                    {
                        tlpp.TransformTree(subTree, tree);
                    }
                }
                trainTrees.Add(tree);
            }
            System.Console.Out.Printf("Done! (%d trees)%n", trainTrees.Count);
            // Setup and train the lexicon.
            System.Console.Out.Write("Collecting sufficient statistics for lexicon...");
            Edu.Stanford.Nlp.Parser.Lexparser.FactoredLexicon lexicon = new Edu.Stanford.Nlp.Parser.Lexparser.FactoredLexicon(options, morphoSpec, wordIndex, tagIndex);
            lexicon.InitializeTraining(trainTrees.Count);
            lexicon.Train(trainTrees, null);
            lexicon.FinishTraining();
            System.Console.Out.WriteLine("Done!");
            trainTrees = null;
            // Load the tuning set
            System.Console.Out.Write("Loading tuning set...");
            IList <FactoredLexiconEvent> tuningSet = GetTuningSet(devTreebank, lexicon, tlpp);

            System.Console.Out.Printf("...Done! (%d events)%n", tuningSet.Count);
            // Print the probabilities that we obtain
            // TODO(spenceg): Implement tagging accuracy with FactLex
            int nCorrect             = 0;
            ICounter <string> errors = new ClassicCounter <string>();

            foreach (FactoredLexiconEvent @event in tuningSet)
            {
                IEnumerator <IntTaggedWord> itr = lexicon.RuleIteratorByWord(@event.Word(), @event.GetLoc(), @event.FeatureStr());
                ICounter <int> logScores        = new ClassicCounter <int>();
                bool           noRules          = true;
                int            goldTagId        = -1;
                while (itr.MoveNext())
                {
                    noRules = false;
                    IntTaggedWord iTW = itr.Current;
                    if (iTW.Tag() == @event.TagId())
                    {
                        log.Info("GOLD-");
                        goldTagId = iTW.Tag();
                    }
                    float tagScore = lexicon.Score(iTW, @event.GetLoc(), @event.Word(), @event.FeatureStr());
                    logScores.IncrementCount(iTW.Tag(), tagScore);
                }
                if (noRules)
                {
                    System.Console.Error.Printf("NO TAGGINGS: %s %s%n", @event.Word(), @event.FeatureStr());
                }
                else
                {
                    // Score the tagging
                    int hypTagId = Counters.Argmax(logScores);
                    if (hypTagId == goldTagId)
                    {
                        ++nCorrect;
                    }
                    else
                    {
                        string goldTag = goldTagId < 0 ? "UNSEEN" : lexicon.tagIndex.Get(goldTagId);
                        errors.IncrementCount(goldTag);
                    }
                }
                log.Info();
            }
            // Output accuracy
            double acc = (double)nCorrect / (double)tuningSet.Count;

            System.Console.Error.Printf("%n%nACCURACY: %.2f%n%n", acc * 100.0);
            log.Info("% of errors by type:");
            IList <string> biggestKeys = new List <string>(errors.KeySet());

            biggestKeys.Sort(Counters.ToComparator(errors, false, true));
            Counters.Normalize(errors);
            foreach (string key in biggestKeys)
            {
                System.Console.Error.Printf("%s\t%.2f%n", key, errors.GetCount(key) * 100.0);
            }
        }
        /// <summary>
        /// Convert a treebank to factored lexicon events for fast iteration in the
        /// optimizer.
        /// </summary>
        private static IList <FactoredLexiconEvent> TreebankToLexiconEvents(IList <Tree> treebank, Edu.Stanford.Nlp.Parser.Lexparser.FactoredLexicon lexicon)
        {
            IList <FactoredLexiconEvent> events = new List <FactoredLexiconEvent>(70000);

            foreach (Tree tree in treebank)
            {
                IList <ILabel> yield   = tree.Yield();
                IList <ILabel> preterm = tree.PreTerminalYield();
                System.Diagnostics.Debug.Assert(yield.Count == preterm.Count);
                int yieldLen = yield.Count;
                for (int i = 0; i < yieldLen; ++i)
                {
                    string tag    = preterm[i].Value();
                    int    tagId  = lexicon.tagIndex.IndexOf(tag);
                    string word   = yield[i].Value();
                    int    wordId = lexicon.wordIndex.IndexOf(word);
                    // Two checks to see if we keep this example
                    if (tagId < 0)
                    {
                        log.Info("Discarding training example: " + word + " " + tag);
                        continue;
                    }
                    //        if (counts.probWordTag(wordId, tagId) == 0.0) {
                    //          log.info("Discarding low counts <w,t> pair: " + word + " " + tag);
                    //          continue;
                    //        }
                    string featureStr = ((CoreLabel)yield[i]).OriginalText();
                    Pair <string, string> lemmaMorph = MorphoFeatureSpecification.SplitMorphString(word, featureStr);
                    string lemma      = lemmaMorph.First();
                    string richTag    = lemmaMorph.Second();
                    string reducedTag = lexicon.morphoSpec.StrToFeatures(richTag).ToString();
                    reducedTag = reducedTag.Length == 0 ? NoMorphAnalysis : reducedTag;
                    int lemmaId = lexicon.wordIndex.IndexOf(lemma);
                    int morphId = lexicon.morphIndex.IndexOf(reducedTag);
                    FactoredLexiconEvent @event = new FactoredLexiconEvent(wordId, tagId, lemmaId, morphId, i, word, featureStr);
                    events.Add(@event);
                }
            }
            return(events);
        }