Exemple #1
0
        public static string Classify(string snippet, out Dictionary <string, double> scores)
        {
            if (_instance == null)
            {
                _instance = new CodeClassifier();
            }

            scores = new Dictionary <string, double>();

            List <Token> tokens            = GetAllTokens(snippet);
            double       maxScore          = 0;
            string       bestMatchLanguage = null;

            foreach (MatchTree matchTree in _matchTrees)
            {
                double score = 0;
                for (int index = 0; index < tokens.Count; index++)
                {
                    score += ScoreTokens(matchTree.MatchTreeRoot, tokens, index);
                }
                score = score / tokens.Count() / matchTree.TotalPossibleScore;

                //Console.WriteLine(matchTree.Language + "\t" + score);
                scores.Add(matchTree.Language, score);
                if (score > maxScore)
                {
                    maxScore          = score;
                    bestMatchLanguage = matchTree.Language;
                }
            }
            return(bestMatchLanguage);
        }
Exemple #2
0
        public static string Classify(string snippet, out Dictionary<string, double> scores  )
        {
            if (_instance == null)
            {
                _instance = new CodeClassifier();
            }

            scores = new Dictionary<string, double>();

            List<Token> tokens = GetAllTokens(snippet);
            double maxScore = 0;
            string bestMatchLanguage = null;

            foreach (MatchTree matchTree in _matchTrees)
            {
                double score = 0;
                for (int index = 0; index < tokens.Count; index++)
                {
                    score += ScoreTokens(matchTree.MatchTreeRoot, tokens, index);
                }
                score = score / tokens.Count() / matchTree.TotalPossibleScore;

                //Console.WriteLine(matchTree.Language + "\t" + score);
                scores.Add(matchTree.Language, score);
                if (score > maxScore)
                {
                    maxScore = score;
                    bestMatchLanguage = matchTree.Language;
                }
            }
            return bestMatchLanguage;
        }
        public static string Classify(string snippet, out double certainty, out Dictionary <string, double> scores)
        {
            if (_instance == null)
            {
                _instance = new CodeClassifier();
            }

            Dictionary <string, double> scoresTp;
            string bestLanguageTp = ClassifyByTokenProbability(snippet, out scoresTp);

            Console.WriteLine("\n\nToken frequencies: ");
            OutputLanguageScores(scoresTp, bestLanguageTp);
            double certTp = CalculateCertainty(scoresTp);

            Dictionary <string, double> scoresMt;
            string bestLanguageMt = ClassifyByMatchTrees(snippet, out scoresMt);

            Console.WriteLine("\n\nMatch trees: ");
            OutputLanguageScores(scoresMt, bestLanguageMt);
            double certMt = CalculateCertainty(scoresMt);

            scores = new Dictionary <string, double>();
            foreach (string language in scoresTp.Keys.ToList())
            {
                scores[language] = scoresTp[language] * certTp + scoresMt[language] * certMt;
            }


            certainty = CalculateCertainty(scores);
            return(CalculateWinningLanguage(scores));

            //if (certTp > certMt)
            //{
            //    scores = scoresTp;
            //    certainty = certTp;
            //    return bestLanguageTp;
            //}
            //else
            //{
            //    scores = scoresMt;
            //    certainty = certMt;
            //    return bestLanguageMt;
            //}
        }
        public static string Classify(string snippet, out double certainty, out Dictionary<string, double> scores)
        {
            if (_instance == null)
            {
                _instance = new CodeClassifier();
            }

            Dictionary<string, double> scoresTp;
            string bestLanguageTp = ClassifyByTokenProbability(snippet, out scoresTp);
            Console.WriteLine("\n\nToken frequencies: ");
            OutputLanguageScores(scoresTp, bestLanguageTp);
            double certTp = CalculateCertainty(scoresTp);

            Dictionary<string, double> scoresMt;
            string bestLanguageMt = ClassifyByMatchTrees(snippet, out scoresMt);
            Console.WriteLine("\n\nMatch trees: ");
            OutputLanguageScores(scoresMt, bestLanguageMt);
            double certMt = CalculateCertainty(scoresMt);

            scores = new Dictionary<string, double>();
            foreach (string language in scoresTp.Keys.ToList())
            {
                scores[language] = scoresTp[language]*certTp + scoresMt[language]*certMt;
            }

            certainty = CalculateCertainty(scores);
            return CalculateWinningLanguage(scores);

            //if (certTp > certMt)
            //{
            //    scores = scoresTp;
            //    certainty = certTp;
            //    return bestLanguageTp;
            //}
            //else
            //{
            //    scores = scoresMt;
            //    certainty = certMt;
            //    return bestLanguageMt;
            //}
        }