Esempio n. 1
0
        public static IEventSeriesProbabalisticClassifier <string> PerceptronCollectionClassifier(string criterion)
        {
            IFeatureSynthesizer <string> synthesizer = new CompoundFeatureSynthesizer <string>(
                criterion,
                new IFeatureSynthesizer <string>[] {
                //string criterion, int k, int minKmerCount, int kmersToTake, double smoothingAmt, bool useUncategorizedForBaseline
                //new VarKmerFrequencyFeatureSynthesizerToRawFrequencies<string>(criterion, 2, 2, 8, .1, false),
                //new LatinLanguageFeatureSynthesizer(criterion),
                new VarKmerFrequencyFeatureSynthesizer <string>(criterion, 2, 2, 32, 1, false)
            }
                );


            IProbabalisticClassifier classifier = new PerceptronCloud(32.0);
            //IProbabalisticClassifier classifier = new ProbabalisticKNN(5, ProbabalisticKNN.WEIGHT_INVERSE_DISTANCE_SQUARED);

            IEventSeriesProbabalisticClassifier <string> eventSeriesClassifier = new SeriesFeatureSynthesizerToVectorProbabalisticClassifierEventSeriesProbabalisticClassifier <string>(synthesizer, classifier);

            return(eventSeriesClassifier);
        }
Esempio n. 2
0
        public static void runNewsClassification(string inFile, string outDirectory, int count, int iterations)
        {
            DiscreteSeriesDatabase <string> data = getNewsDataset(inFile, count);

            //Create the classifier

            /*
             * IEventSeriesProbabalisticClassifier<string> classifier = new SeriesFeatureSynthesizerToVectorProbabalisticClassifierEventSeriesProbabalisticClassifier<string>(
             *      new VarKmerFrequencyFeatureSynthesizer<string>("author", 3, 2, 60, 0.1, false),
             *      new NullProbabalisticClassifier()
             * );
             */

            IEventSeriesProbabalisticClassifier <string> classifier = new SeriesFeatureSynthesizerToVectorProbabalisticClassifierEventSeriesProbabalisticClassifier <string>(
                new VarKmerFrequencyFeatureSynthesizer <string>("author", 3, 2, 50, 0.6, false),
                new PerceptronCloud(16.0, PerceptronTrainingMode.TRAIN_ALL_DATA, PerceptronClassificationMode.USE_NEGATIVES | PerceptronClassificationMode.USE_SCORES, 1.5, false)
                );

            //string documentTitle, string author, int width, int height, string outFile, IEventSeriesProbabalisticClassifier<Ty> classifier, DiscreteEventSeries<Ty> dataset, string datasetTitle, string criterionByWhichToClassify
            WriteupGenerator.ProduceClassificationReport <string>("Analysis and Classification of " + data.data.Count + " Ekantipur Articles", "Cyrus Cousins with Shirish Pokharel", 20, 20, outDirectory, classifier, "characteristic kmer classifier", data, "News", "author", iterations);
        }
Esempio n. 3
0
        public static IEventSeriesProbabalisticClassifier <string> TextClassifier(string criterionByWhichToClassify, string[] availableCriteria)
        {
            IFeatureSynthesizer <string> synthesizer = new CompoundFeatureSynthesizer <string>(
                criterionByWhichToClassify,

                new IFeatureSynthesizer <string>[] {
                new VarKmerFrequencyFeatureSynthesizerToRawFrequencies <string>(criterionByWhichToClassify, 2, 2, 8, .1, false),
                new LatinLanguageFeatureSynthesizer(criterionByWhichToClassify),
            }.Concat(availableCriteria.Select(criterion =>
                                              new VarKmerFrequencyFeatureSynthesizer <string>(criterion, 2, 2, 32, 1, false))).ToArray()

                );

            IProbabalisticClassifier classifier = new EnsembleProbabalisticClassifier(
                new IProbabalisticClassifier[] {
                new PerceptronCloud(4.0),
                new ZScoreNormalizerClassifierWrapper(new ProbabalisticKnn(3, KnnClassificationMode.WEIGHT_INVERSE_DISTANCE_SQUARED, KnnTrainingMode.TRAIN_ALL_DATA))
            }
                );

            IEventSeriesProbabalisticClassifier <string> eventSeriesClassifier = new SeriesFeatureSynthesizerToVectorProbabalisticClassifierEventSeriesProbabalisticClassifier <string>(synthesizer, classifier);

            return(eventSeriesClassifier);
        }
Esempio n. 4
0
        public static IEnumerable <Tuple <string, IEventSeriesProbabalisticClassifier <string> > > RegionsTestClassifiers()
        {
            IEventSeriesProbabalisticClassifier <string> synthesizerClassifier =
                new SeriesFeatureSynthesizerToVectorProbabalisticClassifierEventSeriesProbabalisticClassifier <string>(
                    new VarKmerFrequencyFeatureSynthesizer <string>("region", 3, 4, 50, 2.0, false),
                    new NullProbabalisticClassifier()
                    );

            IEventSeriesProbabalisticClassifier <string> doublePowerSynthesizerClassifier =
                new SeriesFeatureSynthesizerToVectorProbabalisticClassifierEventSeriesProbabalisticClassifier <string>(
                    new VarKmerFrequencyFeatureSynthesizer <string>("region", 3, 4, 100, 2.0, false),
                    new NullProbabalisticClassifier()
                    );

            IEventSeriesProbabalisticClassifier <string> perceptronBasedClassifier =
                new SeriesFeatureSynthesizerToVectorProbabalisticClassifierEventSeriesProbabalisticClassifier <string>(
                    new CompoundFeatureSynthesizer <string>(
                        "region",

                        new IFeatureSynthesizer <string>[] {
                new VarKmerFrequencyFeatureSynthesizerToRawFrequencies <string>("region", 2, 2, 8, .1, false),
                new LatinLanguageFeatureSynthesizer("region"),
                new VarKmerFrequencyFeatureSynthesizer <string>("region", 3, 4, 50, 2.0, false),
                new VarKmerFrequencyFeatureSynthesizer <string>("type", 3, 4, 50, 2.0, false),
            }
                        ),
                    new PerceptronCloud(16.0, PerceptronTrainingMode.TRAIN_EVEN_WEIGHTS, PerceptronClassificationMode.USE_NEGATIVES)
                    );

            IEventSeriesProbabalisticClassifier <string> evenKnnBasedClassifier =
                new SeriesFeatureSynthesizerToVectorProbabalisticClassifierEventSeriesProbabalisticClassifier <string>(
                    new CompoundFeatureSynthesizer <string>(
                        "region",

                        new IFeatureSynthesizer <string>[] {
                //new VarKmerFrequencyFeatureSynthesizerToRawFrequencies<string>("region", 2, 2, 8, .1, false),
                new LatinLanguageFeatureSynthesizer("region"),
                new VarKmerFrequencyFeatureSynthesizer <string>("region", 3, 4, 50, 2.0, false),
                new VarKmerFrequencyFeatureSynthesizer <string>("type", 3, 4, 50, 2.0, false),
            }
                        ),
                    new ProbabalisticKnn(3, KnnClassificationMode.WEIGHT_INVERSE_DISTANCE_SQUARED, KnnTrainingMode.TRAIN_ALL_DATA)
                    );

            IEventSeriesProbabalisticClassifier <string> allKnnBasedClassifier =
                new SeriesFeatureSynthesizerToVectorProbabalisticClassifierEventSeriesProbabalisticClassifier <string>(
                    new CompoundFeatureSynthesizer <string>(
                        "region",

                        new IFeatureSynthesizer <string>[] {
                //new VarKmerFrequencyFeatureSynthesizerToRawFrequencies<string>("region", 2, 2, 8, .1, false),
                new LatinLanguageFeatureSynthesizer("region"),
                new VarKmerFrequencyFeatureSynthesizer <string>("region", 3, 4, 50, 2.0, false),
                new VarKmerFrequencyFeatureSynthesizer <string>("type", 3, 4, 50, 2.0, false),
            }
                        ),
                    new ProbabalisticKnn(5, KnnClassificationMode.WEIGHT_INVERSE_DISTANCE_SQUARED, KnnTrainingMode.TRAIN_EVEN_CLASS_SIZES)
                    );

            IEventSeriesProbabalisticClassifier <string> normalizedKnnBasedClassifier =
                new SeriesFeatureSynthesizerToVectorProbabalisticClassifierEventSeriesProbabalisticClassifier <string>(
                    new CompoundFeatureSynthesizer <string>(
                        "region",

                        new IFeatureSynthesizer <string>[] {
                //new VarKmerFrequencyFeatureSynthesizerToRawFrequencies<string>("region", 2, 2, 8, .1, false),
                new LatinLanguageFeatureSynthesizer("region"),
                new VarKmerFrequencyFeatureSynthesizer <string>("region", 3, 4, 50, 2.0, false),
                new VarKmerFrequencyFeatureSynthesizer <string>("type", 3, 4, 50, 2.0, false),
            }
                        ),
                    new ZScoreNormalizerClassifierWrapper(new ProbabalisticKnn(5, KnnClassificationMode.WEIGHT_INVERSE_DISTANCE_SQUARED, KnnTrainingMode.TRAIN_EVEN_CLASS_SIZES))
                    );

            IEventSeriesProbabalisticClassifier <string> normalizedKnnBasedClassifier2 =
                new SeriesFeatureSynthesizerToVectorProbabalisticClassifierEventSeriesProbabalisticClassifier <string>(
                    new CompoundFeatureSynthesizer <string>(
                        "region",

                        new IFeatureSynthesizer <string>[] {
                //new VarKmerFrequencyFeatureSynthesizerToRawFrequencies<string>("region", 2, 2, 8, .1, false),
                new LatinLanguageFeatureSynthesizer("region"),
                new VarKmerFrequencyFeatureSynthesizer <string>("region", 3, 4, 50, 2.0, false),
                new VarKmerFrequencyFeatureSynthesizer <string>("type", 3, 4, 50, 2.0, false),
            }
                        ),
                    new ZScoreNormalizerClassifierWrapper(new ProbabalisticKnn(5, KnnClassificationMode.WEIGHT_INVERSE_DISTANCE_SQUARED, KnnTrainingMode.TRAIN_ALL_DATA))
                    );

            IEventSeriesProbabalisticClassifier <string> normalizedKnnBasedClassifier3 =
                new SeriesFeatureSynthesizerToVectorProbabalisticClassifierEventSeriesProbabalisticClassifier <string>(
                    new CompoundFeatureSynthesizer <string>(
                        "region",

                        new IFeatureSynthesizer <string>[] {
                //new VarKmerFrequencyFeatureSynthesizerToRawFrequencies<string>("region", 2, 2, 8, .1, false),
                //new LatinLanguageFeatureSynthesizer("region"),
                new VarKmerFrequencyFeatureSynthesizer <string>("region", 3, 4, 50, 2.0, false),
                new VarKmerFrequencyFeatureSynthesizer <string>("type", 3, 4, 50, 2.0, false),
            }
                        ),
                    new ZScoreNormalizerClassifierWrapper(new ProbabalisticKnn(5, KnnClassificationMode.WEIGHT_INVERSE_DISTANCE_SQUARED, KnnTrainingMode.TRAIN_EVEN_CLASS_SIZES))
                    );

            IEventSeriesProbabalisticClassifier <string> normalizedKnnBasedClassifier4 =
                new SeriesFeatureSynthesizerToVectorProbabalisticClassifierEventSeriesProbabalisticClassifier <string>(
                    new CompoundFeatureSynthesizer <string>(
                        "region",

                        new IFeatureSynthesizer <string>[] {
                //new VarKmerFrequencyFeatureSynthesizerToRawFrequencies<string>("region", 2, 2, 8, .1, false),
                //new LatinLanguageFeatureSynthesizer("region"),
                new VarKmerFrequencyFeatureSynthesizer <string>("region", 3, 4, 50, 2.0, false),
                new VarKmerFrequencyFeatureSynthesizer <string>("type", 3, 4, 50, 2.0, false),
            }
                        ),
                    new ZScoreNormalizerClassifierWrapper(new ProbabalisticKnn(5, KnnClassificationMode.WEIGHT_INVERSE_DISTANCE_SQUARED, KnnTrainingMode.TRAIN_ALL_DATA))
                    );


            IEventSeriesProbabalisticClassifier <string> ensembleClassifier =
                new SeriesFeatureSynthesizerToVectorProbabalisticClassifierEventSeriesProbabalisticClassifier <string>(
                    new CompoundFeatureSynthesizer <string>(
                        "region",

                        new IFeatureSynthesizer <string>[] {
                new VarKmerFrequencyFeatureSynthesizerToRawFrequencies <string>("region", 2, 2, 8, .1, false),
                new LatinLanguageFeatureSynthesizer("region"),
                new VarKmerFrequencyFeatureSynthesizer <string>("region", 3, 4, 50, 2.0, false),
                new VarKmerFrequencyFeatureSynthesizer <string>("type", 3, 4, 50, 2.0, false),
            }
                        ),
                    new EnsembleProbabalisticClassifier(
                        new IProbabalisticClassifier[] {
                //new ZScoreNormalizer(new ProbabalisticKnn(3, KnnClassificationMode.WEIGHT_INVERSE_DISTANCE_SQUARED, KnnTrainingMode.TRAIN_EVEN_CLASS_SIZES)),
                //new ZScoreNormalizer(new ProbabalisticKnn(5, KnnClassificationMode.WEIGHT_INVERSE_DISTANCE_SQUARED, KnnTrainingMode.TRAIN_ALL_DATA)),
                new ProbabalisticKnn(3, KnnClassificationMode.WEIGHT_INVERSE_DISTANCE_SQUARED, KnnTrainingMode.TRAIN_EVEN_CLASS_SIZES),
                new ProbabalisticKnn(5, KnnClassificationMode.WEIGHT_INVERSE_DISTANCE, KnnTrainingMode.TRAIN_ALL_DATA),
                new PerceptronCloud(4.0),
                new PerceptronCloud(4.0, classificationMode: PerceptronClassificationMode.USE_NEGATIVES),
                new PerceptronCloud(4.0, classificationMode: PerceptronClassificationMode.USE_SCORES),
                new PerceptronCloud(4.0, classificationMode: PerceptronClassificationMode.USE_SCORES | PerceptronClassificationMode.USE_NEGATIVES)
            }
                        )
                    );

            return(new[] {
                "region feature synthesizer based classifier",
                "double power region feature based classifier",
                "perceptron based classifier",
                "even distribution KNN based classifier",
                "all data KNN based classifier",
                "normalized KNN based classifier even",
                "normalized KNN based classifier all",
                "normalized KNN based classifier even (fewer features)",
                "normalized KNN based classifier all (fewer features)",
                "ensemble based classifier"
            }.Zip(
                       new[] {
                synthesizerClassifier,
                doublePowerSynthesizerClassifier,
                perceptronBasedClassifier,
                evenKnnBasedClassifier,
                allKnnBasedClassifier,
                normalizedKnnBasedClassifier,
                normalizedKnnBasedClassifier2,
                normalizedKnnBasedClassifier3,
                normalizedKnnBasedClassifier4,
                ensembleClassifier
            }));
        }