Пример #1
0
        public static void runNewsClassifierDerivation(string inFile, string outDirectory, int count, int iterations)
        {
            //Load the database:
            DiscreteSeriesDatabase <string> data = getNewsDataset(inFile, count);
            //data = data.SplitDatabase (.1).Item1;


            IEnumerable <Tuple <string, IEventSeriesProbabalisticClassifier <string> > > classifiers = TextClassifierFactory.NewsTestClassifiers().Concat(TextClassifierFactory.NewsTestAdvancedClassifiers().Skip(1));
            IFeatureSynthesizer <string> synth = new CompoundFeatureSynthesizer <string>("author", new IFeatureSynthesizer <string>[] {
                new VarKmerFrequencyFeatureSynthesizer <string>("author", 3, 2, 60, 0.7, false),
                new VarKmerFrequencyFeatureSynthesizer <string>("location", 3, 3, 50, 1, false),
                new VarKmerFrequencyFeatureSynthesizer <string>("gender", 3, 8, 50, 10, false),
                new DateValueFeatureSynthesizer("date"),
                new LatinLanguageFeatureSynthesizer("author")
            });

            WriteupGenerator.ProduceClassifierComparisonWriteup <string>("Classifier Comparison Analysis on Ekantipur News Articles", "Cyrus Cousins with Shirish Pokharel", 20, 20, outDirectory, classifiers.ToArray(), "News", data, "author", iterations, new[] { "author", "location", "date", "gender" }, synth);
        }
Пример #2
0
        /*
         * public void TestNewClassifiers(){
         *
         * }
         */

        public static void TestLatex()
        {
            bool test      = true;
            bool shorten   = true;
            bool costarica = true;
            bool cuba      = true;

            if (test)
            {
                costarica = cuba = false;
            }

            DiscreteSeriesDatabase <string> allData = LoadRegionsDatabase(test, shorten, costarica, cuba);


            /*
             * IFeatureSynthesizer<string> testSynth = new VarKmerFrequencyFeatureSynthesizer<string>("region", 3, 4, 50, 2.0, true);
             * testSynth.Train (allData);
             *
             * Console.WriteLine (testSynth.GetFeatureSchema().FoldToString ());
             * Console.WriteLine (testSynth.SynthesizeFeaturesSumToOne(new DiscreteEventSeries<string>(allData.data.First ().labels, allData.data.First ().Take (25).ToArray ())).FoldToString (d => d.ToString ("F3")));
             * Console.ReadLine ();
             */

            /*
             * if(test){
             *      allData = allData.SplitDatabase (.25).Item1;
             * }
             */


            //TODO: Add length distribution for documents and each type.

            //Create a feature synthesizer

            //IFeatureSynthesizer<string> synth = new RegressorFeatureSynthesizerKmerFrequenciesVarK<string>("region", 8, 2, 100, 3); //Slowld way
            //IFeatureSynthesizer<string> synth = new VarKmerFrequencyFeatureSynthesizer<string>("region", 3, 4, 50, 2.0, true);

            //IEventSeriesProbabalisticClassifier<string> textClassifier // = TextClassifierFactory.TextClassifier ("region", new[]{"region", "type"});

            //string documentTitle, string author, int width, int height, string outFile, IEnumerable<Tuple<string, IEventSeriesProbabalisticClassifier<Ty>>> classifiers, string datasetTitle, DiscreteSeriesDatabase<Ty> dataset, string criterionByWhichToClassify
            //IEnumerable<Tuple<string, IEventSeriesProbabalisticClassifier<string>>> classifiers = TextClassifierFactory.RegionsTestClassifiers().ToArray ();
            IEnumerable <Tuple <string, IEventSeriesProbabalisticClassifier <string> > > classifiers = TextClassifierFactory.RegionsPerceptronTestClassifiers().ToArray();

            IFeatureSynthesizer <string> synthesizer = new CompoundFeatureSynthesizer <string>(
                "region",
                new IFeatureSynthesizer <string>[] {
                new VarKmerFrequencyFeatureSynthesizerToRawFrequencies <string>("region", 2, 2, 16, .1, false),
                new LatinLanguageFeatureSynthesizer("region"),
                new VarKmerFrequencyFeatureSynthesizer <string>("region", 3, 4, 50, 2.0, false),
                new VarKmerFrequencyFeatureSynthesizer <string>("type", 3, 3, 50, 2.0, false)
            }
                );


            if (test)
            {
                classifiers = classifiers.Take(2);
            }



            WriteupGenerator.ProduceClassifierComparisonWriteup <string>("Spanish Language Dialect Analysis", "Cyrus Cousins", 11, 16, "../../out/spanish/classification/", classifiers.ToArray(), "Spanish Language", allData, "region", test ? 1 : 4, analysisCriteria: new[] { "region", "type" }, synthesizer: synthesizer);

            /*
             * if (classifier is SeriesFeatureSynthesizerToVectorProbabalisticClassifierEventSeriesProbabalisticClassifier<string>) {
             *      IFeatureSynthesizer<string> synthesizer = ((SeriesFeatureSynthesizerToVectorProbabalisticClassifierEventSeriesProbabalisticClassifier<string>)classifier).synthesizer;
             *
             *      //doc.Append ("\\section{Feature Synthesizer Analysis}\n\n");
             *      //doc.Append (synthesizer.FeatureSynthesizerLatexString(allData));
             * }
             */
        }