public void Test() { var trainingDocuments = new Dictionary <string, string> { { "sports", File.ReadAllText("..\\..\\TestData\\Sports.txt") }, { "economy", File.ReadAllText("..\\..\\TestData\\Economy.txt") }, }; var featureExtractor = new BagOfWordsFeatureExtractor(); var trainedModels = new Dictionary <IDistribution <string>, string>(); foreach (var trainingItem in trainingDocuments) { var distribution = CreateModel(featureExtractor, trainingItem.Value); trainedModels.Add(distribution, trainingItem.Key); } var classifier = new KnnMonoCategorizedClassifier <IDistribution <string>, string>(new VectorDistanceCalculator <string>(), trainedModels); var resultSports = classifier.Classify(CreateModel(featureExtractor, "Fitch Ratings on Wednesday said Britain's latest budget proposals show commitment to its existing deficit reduction strategy and do not impact its AAA credit rating.")).ToArray(); Assert.GreaterOrEqual(resultSports.Length, 1); Assert.AreEqual(resultSports[0].Item1, "economy"); var resultFinance = classifier.Classify(CreateModel(featureExtractor, "Ryan Flannigan strikes a four off the last ball to help Scotland claim a four-wicket win over Canada in the fifth-place play-off at the qualifying tournament for the ICC World Twenty20 in Dubai.")).ToArray(); Assert.GreaterOrEqual(resultFinance.Length, 1); Assert.AreEqual(resultFinance[0].Item1, "sports"); }
public void Test() { var trainingDocuments = new Dictionary<string, string> { { "sports", File.ReadAllText("..\\..\\TestData\\Sports.txt") }, { "economy", File.ReadAllText("..\\..\\TestData\\Economy.txt") }, }; var featureExtractor = new BagOfWordsFeatureExtractor(); var trainedModels = new Dictionary<IDistribution<string>, string>(); foreach (var trainingItem in trainingDocuments) { var distribution = CreateModel(featureExtractor, trainingItem.Value); trainedModels.Add(distribution, trainingItem.Key); } var classifier = new KnnMonoCategorizedClassifier<IDistribution<string>, string>(new VectorDistanceCalculator<string>(), trainedModels); var resultSports = classifier.Classify(CreateModel(featureExtractor, "Fitch Ratings on Wednesday said Britain's latest budget proposals show commitment to its existing deficit reduction strategy and do not impact its AAA credit rating.")).ToArray(); Assert.GreaterOrEqual(resultSports.Length, 1); Assert.AreEqual(resultSports[0].Item1, "economy"); var resultFinance = classifier.Classify(CreateModel(featureExtractor, "Ryan Flannigan strikes a four off the last ball to help Scotland claim a four-wicket win over Canada in the fifth-place play-off at the qualifying tournament for the ICC World Twenty20 in Dubai.")).ToArray(); Assert.GreaterOrEqual(resultFinance.Length, 1); Assert.AreEqual(resultFinance[0].Item1, "sports"); }
public IEnumerable <Tuple <LanguageInfo, double> > Classify(IDistribution <T> guessedLanguageModel) { IDictionary <T, int> rankedGuessedLanguageModel = GetRankedLanguageModel(guessedLanguageModel); var classifier = new KnnMonoCategorizedClassifier <IDictionary <T, int>, LanguageInfo>( new RankingDistanceCalculator <T>(_defaultNgramRankOnAbsence), _etalonLanguageModel2languageName); IEnumerable <Tuple <LanguageInfo, double> > likelyLanguages = classifier.Classify(rankedGuessedLanguageModel); return(likelyLanguages); }