public void Test()
        {
            var trainingDocuments =
                new Dictionary <string, string>
            {
                { "sports", File.ReadAllText("..\\..\\TestData\\Sports.txt") },
                { "economy", File.ReadAllText("..\\..\\TestData\\Economy.txt") },
            };
            var featureExtractor = new BagOfWordsFeatureExtractor();
            var trainedModels    = new Dictionary <IDistribution <string>, string>();

            foreach (var trainingItem in trainingDocuments)
            {
                var distribution = CreateModel(featureExtractor, trainingItem.Value);
                trainedModels.Add(distribution, trainingItem.Key);
            }
            var classifier =
                new KnnMonoCategorizedClassifier <IDistribution <string>, string>(new VectorDistanceCalculator <string>(), trainedModels);
            var resultSports = classifier.Classify(CreateModel(featureExtractor,
                                                               "Fitch Ratings on Wednesday said Britain's latest budget proposals show commitment to its existing deficit reduction strategy and do not impact its AAA credit rating.")).ToArray();

            Assert.GreaterOrEqual(resultSports.Length, 1);
            Assert.AreEqual(resultSports[0].Item1, "economy");
            var resultFinance = classifier.Classify(CreateModel(featureExtractor,
                                                                "Ryan Flannigan strikes a four off the last ball to help Scotland claim a four-wicket win over Canada in the fifth-place play-off at the qualifying tournament for the ICC World Twenty20 in Dubai.")).ToArray();

            Assert.GreaterOrEqual(resultFinance.Length, 1);
            Assert.AreEqual(resultFinance[0].Item1, "sports");
        }
 public void Test()
 {
     var trainingDocuments =
         new Dictionary<string, string>
             {
                 { "sports", File.ReadAllText("..\\..\\TestData\\Sports.txt") },
                 { "economy", File.ReadAllText("..\\..\\TestData\\Economy.txt") },
             };
     var featureExtractor = new BagOfWordsFeatureExtractor();
     var trainedModels = new Dictionary<IDistribution<string>, string>();
     foreach (var trainingItem in trainingDocuments)
     {
         var distribution = CreateModel(featureExtractor, trainingItem.Value);
         trainedModels.Add(distribution, trainingItem.Key);
     }
     var classifier =
         new KnnMonoCategorizedClassifier<IDistribution<string>, string>(new VectorDistanceCalculator<string>(), trainedModels);
     var resultSports = classifier.Classify(CreateModel(featureExtractor,
         "Fitch Ratings on Wednesday said Britain's latest budget proposals show commitment to its existing deficit reduction strategy and do not impact its AAA credit rating.")).ToArray();
     Assert.GreaterOrEqual(resultSports.Length, 1);
     Assert.AreEqual(resultSports[0].Item1, "economy");
     var resultFinance = classifier.Classify(CreateModel(featureExtractor,
         "Ryan Flannigan strikes a four off the last ball to help Scotland claim a four-wicket win over Canada in the fifth-place play-off at the qualifying tournament for the ICC World Twenty20 in Dubai.")).ToArray();
     Assert.GreaterOrEqual(resultFinance.Length, 1);
     Assert.AreEqual(resultFinance[0].Item1, "sports");
 }
Beispiel #3
0
        public IEnumerable <Tuple <LanguageInfo, double> > Classify(IDistribution <T> guessedLanguageModel)
        {
            IDictionary <T, int> rankedGuessedLanguageModel = GetRankedLanguageModel(guessedLanguageModel);
            var classifier =
                new KnnMonoCategorizedClassifier <IDictionary <T, int>, LanguageInfo>(
                    new RankingDistanceCalculator <T>(_defaultNgramRankOnAbsence),
                    _etalonLanguageModel2languageName);
            IEnumerable <Tuple <LanguageInfo, double> > likelyLanguages = classifier.Classify(rankedGuessedLanguageModel);

            return(likelyLanguages);
        }