public static string Classify(string snippet, out Dictionary <string, double> scores) { if (_instance == null) { _instance = new CodeClassifier(); } scores = new Dictionary <string, double>(); List <Token> tokens = GetAllTokens(snippet); double maxScore = 0; string bestMatchLanguage = null; foreach (MatchTree matchTree in _matchTrees) { double score = 0; for (int index = 0; index < tokens.Count; index++) { score += ScoreTokens(matchTree.MatchTreeRoot, tokens, index); } score = score / tokens.Count() / matchTree.TotalPossibleScore; //Console.WriteLine(matchTree.Language + "\t" + score); scores.Add(matchTree.Language, score); if (score > maxScore) { maxScore = score; bestMatchLanguage = matchTree.Language; } } return(bestMatchLanguage); }
public static string Classify(string snippet, out Dictionary<string, double> scores ) { if (_instance == null) { _instance = new CodeClassifier(); } scores = new Dictionary<string, double>(); List<Token> tokens = GetAllTokens(snippet); double maxScore = 0; string bestMatchLanguage = null; foreach (MatchTree matchTree in _matchTrees) { double score = 0; for (int index = 0; index < tokens.Count; index++) { score += ScoreTokens(matchTree.MatchTreeRoot, tokens, index); } score = score / tokens.Count() / matchTree.TotalPossibleScore; //Console.WriteLine(matchTree.Language + "\t" + score); scores.Add(matchTree.Language, score); if (score > maxScore) { maxScore = score; bestMatchLanguage = matchTree.Language; } } return bestMatchLanguage; }
public static string Classify(string snippet, out double certainty, out Dictionary <string, double> scores) { if (_instance == null) { _instance = new CodeClassifier(); } Dictionary <string, double> scoresTp; string bestLanguageTp = ClassifyByTokenProbability(snippet, out scoresTp); Console.WriteLine("\n\nToken frequencies: "); OutputLanguageScores(scoresTp, bestLanguageTp); double certTp = CalculateCertainty(scoresTp); Dictionary <string, double> scoresMt; string bestLanguageMt = ClassifyByMatchTrees(snippet, out scoresMt); Console.WriteLine("\n\nMatch trees: "); OutputLanguageScores(scoresMt, bestLanguageMt); double certMt = CalculateCertainty(scoresMt); scores = new Dictionary <string, double>(); foreach (string language in scoresTp.Keys.ToList()) { scores[language] = scoresTp[language] * certTp + scoresMt[language] * certMt; } certainty = CalculateCertainty(scores); return(CalculateWinningLanguage(scores)); //if (certTp > certMt) //{ // scores = scoresTp; // certainty = certTp; // return bestLanguageTp; //} //else //{ // scores = scoresMt; // certainty = certMt; // return bestLanguageMt; //} }
public static string Classify(string snippet, out double certainty, out Dictionary<string, double> scores) { if (_instance == null) { _instance = new CodeClassifier(); } Dictionary<string, double> scoresTp; string bestLanguageTp = ClassifyByTokenProbability(snippet, out scoresTp); Console.WriteLine("\n\nToken frequencies: "); OutputLanguageScores(scoresTp, bestLanguageTp); double certTp = CalculateCertainty(scoresTp); Dictionary<string, double> scoresMt; string bestLanguageMt = ClassifyByMatchTrees(snippet, out scoresMt); Console.WriteLine("\n\nMatch trees: "); OutputLanguageScores(scoresMt, bestLanguageMt); double certMt = CalculateCertainty(scoresMt); scores = new Dictionary<string, double>(); foreach (string language in scoresTp.Keys.ToList()) { scores[language] = scoresTp[language]*certTp + scoresMt[language]*certMt; } certainty = CalculateCertainty(scores); return CalculateWinningLanguage(scores); //if (certTp > certMt) //{ // scores = scoresTp; // certainty = certTp; // return bestLanguageTp; //} //else //{ // scores = scoresMt; // certainty = certMt; // return bestLanguageMt; //} }