public static void runNewsClassifierDerivation(string inFile, string outDirectory, int count, int iterations) { //Load the database: DiscreteSeriesDatabase <string> data = getNewsDataset(inFile, count); //data = data.SplitDatabase (.1).Item1; IEnumerable <Tuple <string, IEventSeriesProbabalisticClassifier <string> > > classifiers = TextClassifierFactory.NewsTestClassifiers().Concat(TextClassifierFactory.NewsTestAdvancedClassifiers().Skip(1)); IFeatureSynthesizer <string> synth = new CompoundFeatureSynthesizer <string>("author", new IFeatureSynthesizer <string>[] { new VarKmerFrequencyFeatureSynthesizer <string>("author", 3, 2, 60, 0.7, false), new VarKmerFrequencyFeatureSynthesizer <string>("location", 3, 3, 50, 1, false), new VarKmerFrequencyFeatureSynthesizer <string>("gender", 3, 8, 50, 10, false), new DateValueFeatureSynthesizer("date"), new LatinLanguageFeatureSynthesizer("author") }); WriteupGenerator.ProduceClassifierComparisonWriteup <string>("Classifier Comparison Analysis on Ekantipur News Articles", "Cyrus Cousins with Shirish Pokharel", 20, 20, outDirectory, classifiers.ToArray(), "News", data, "author", iterations, new[] { "author", "location", "date", "gender" }, synth); }
public static IEventSeriesProbabalisticClassifier <string> PerceptronCollectionClassifier(string criterion) { IFeatureSynthesizer <string> synthesizer = new CompoundFeatureSynthesizer <string>( criterion, new IFeatureSynthesizer <string>[] { //string criterion, int k, int minKmerCount, int kmersToTake, double smoothingAmt, bool useUncategorizedForBaseline //new VarKmerFrequencyFeatureSynthesizerToRawFrequencies<string>(criterion, 2, 2, 8, .1, false), //new LatinLanguageFeatureSynthesizer(criterion), new VarKmerFrequencyFeatureSynthesizer <string>(criterion, 2, 2, 32, 1, false) } ); IProbabalisticClassifier classifier = new PerceptronCloud(32.0); //IProbabalisticClassifier classifier = new ProbabalisticKNN(5, ProbabalisticKNN.WEIGHT_INVERSE_DISTANCE_SQUARED); IEventSeriesProbabalisticClassifier <string> eventSeriesClassifier = new SeriesFeatureSynthesizerToVectorProbabalisticClassifierEventSeriesProbabalisticClassifier <string>(synthesizer, classifier); return(eventSeriesClassifier); }
public static IEventSeriesProbabalisticClassifier <string> TextClassifier(string criterionByWhichToClassify, string[] availableCriteria) { IFeatureSynthesizer <string> synthesizer = new CompoundFeatureSynthesizer <string>( criterionByWhichToClassify, new IFeatureSynthesizer <string>[] { new VarKmerFrequencyFeatureSynthesizerToRawFrequencies <string>(criterionByWhichToClassify, 2, 2, 8, .1, false), new LatinLanguageFeatureSynthesizer(criterionByWhichToClassify), }.Concat(availableCriteria.Select(criterion => new VarKmerFrequencyFeatureSynthesizer <string>(criterion, 2, 2, 32, 1, false))).ToArray() ); IProbabalisticClassifier classifier = new EnsembleProbabalisticClassifier( new IProbabalisticClassifier[] { new PerceptronCloud(4.0), new ZScoreNormalizerClassifierWrapper(new ProbabalisticKnn(3, KnnClassificationMode.WEIGHT_INVERSE_DISTANCE_SQUARED, KnnTrainingMode.TRAIN_ALL_DATA)) } ); IEventSeriesProbabalisticClassifier <string> eventSeriesClassifier = new SeriesFeatureSynthesizerToVectorProbabalisticClassifierEventSeriesProbabalisticClassifier <string>(synthesizer, classifier); return(eventSeriesClassifier); }
/* * public void TestNewClassifiers(){ * * } */ public static void TestLatex() { bool test = true; bool shorten = true; bool costarica = true; bool cuba = true; if (test) { costarica = cuba = false; } DiscreteSeriesDatabase <string> allData = LoadRegionsDatabase(test, shorten, costarica, cuba); /* * IFeatureSynthesizer<string> testSynth = new VarKmerFrequencyFeatureSynthesizer<string>("region", 3, 4, 50, 2.0, true); * testSynth.Train (allData); * * Console.WriteLine (testSynth.GetFeatureSchema().FoldToString ()); * Console.WriteLine (testSynth.SynthesizeFeaturesSumToOne(new DiscreteEventSeries<string>(allData.data.First ().labels, allData.data.First ().Take (25).ToArray ())).FoldToString (d => d.ToString ("F3"))); * Console.ReadLine (); */ /* * if(test){ * allData = allData.SplitDatabase (.25).Item1; * } */ //TODO: Add length distribution for documents and each type. //Create a feature synthesizer //IFeatureSynthesizer<string> synth = new RegressorFeatureSynthesizerKmerFrequenciesVarK<string>("region", 8, 2, 100, 3); //Slowld way //IFeatureSynthesizer<string> synth = new VarKmerFrequencyFeatureSynthesizer<string>("region", 3, 4, 50, 2.0, true); //IEventSeriesProbabalisticClassifier<string> textClassifier // = TextClassifierFactory.TextClassifier ("region", new[]{"region", "type"}); //string documentTitle, string author, int width, int height, string outFile, IEnumerable<Tuple<string, IEventSeriesProbabalisticClassifier<Ty>>> classifiers, string datasetTitle, DiscreteSeriesDatabase<Ty> dataset, string criterionByWhichToClassify //IEnumerable<Tuple<string, IEventSeriesProbabalisticClassifier<string>>> classifiers = TextClassifierFactory.RegionsTestClassifiers().ToArray (); IEnumerable <Tuple <string, IEventSeriesProbabalisticClassifier <string> > > classifiers = TextClassifierFactory.RegionsPerceptronTestClassifiers().ToArray(); IFeatureSynthesizer <string> synthesizer = new CompoundFeatureSynthesizer <string>( "region", new IFeatureSynthesizer <string>[] { new VarKmerFrequencyFeatureSynthesizerToRawFrequencies <string>("region", 2, 2, 16, .1, false), new LatinLanguageFeatureSynthesizer("region"), new VarKmerFrequencyFeatureSynthesizer <string>("region", 3, 4, 50, 2.0, false), new VarKmerFrequencyFeatureSynthesizer <string>("type", 3, 3, 50, 2.0, false) } ); if (test) { classifiers = classifiers.Take(2); } WriteupGenerator.ProduceClassifierComparisonWriteup <string>("Spanish Language Dialect Analysis", "Cyrus Cousins", 11, 16, "../../out/spanish/classification/", classifiers.ToArray(), "Spanish Language", allData, "region", test ? 1 : 4, analysisCriteria: new[] { "region", "type" }, synthesizer: synthesizer); /* * if (classifier is SeriesFeatureSynthesizerToVectorProbabalisticClassifierEventSeriesProbabalisticClassifier<string>) { * IFeatureSynthesizer<string> synthesizer = ((SeriesFeatureSynthesizerToVectorProbabalisticClassifierEventSeriesProbabalisticClassifier<string>)classifier).synthesizer; * * //doc.Append ("\\section{Feature Synthesizer Analysis}\n\n"); * //doc.Append (synthesizer.FeatureSynthesizerLatexString(allData)); * } */ }