protected IList <string> CompareRake(ExampleArticles article, KeywordResources resource = KeywordResources.ALL, int minCharLength = 1, int maxWordsLength = 5, double minWordFreq = 1) { string stopListPath = MapPath(".") + "/Files/SmartStoplist.txt"; TextCheating txtCheats = new TextCheating(); IList <string> keywords = txtCheats.ExpectedKeywords(ExampleArticles.HackersTopologyMatterGeography, KeywordResources.ALL); Rake rake = new Rake(stopListPath, minCharLength, maxWordsLength, minWordFreq); var resultsDict = rake.Run(txtCheats.GetArticleText(ExampleArticles.HackersTopologyMatterGeography)); var results = resultsDict.Keys.ToList(); List <string> fullMatch = new List <string>(); Dictionary <string, string> resultsInKeywords = new Dictionary <string, string>(); Dictionary <string, string> keywordsInResults = new Dictionary <string, string>(); List <string> missedYouNoob = new List <string>(); for (int i = 0; i < keywords.Count; i++) { bool match = false; for (int j = 0; j < results.Count; j++) { string _res = results[j].ToLower().Trim(); string _keyword = keywords[i].ToLower().Trim(); if (_res == _keyword) { match = true; if (!fullMatch.Contains(_keyword)) { fullMatch.Add(_keyword); } } else if (_res.Contains(" " + _keyword + " ")) { if (!keywordsInResults.Keys.Contains(_keyword)) { keywordsInResults.Add(_keyword, _res); } } else if (_keyword.Contains(" " + _res + " ")) { if (!resultsInKeywords.Keys.Contains(_res)) { resultsInKeywords.Add(_res, _keyword); } } } if (!match) { missedYouNoob.Add(keywords[i]); } } var t = ""; Dictionary <string, double> fullMatchScores = new Dictionary <string, double>(); for (int i = 0; i < fullMatch.Count; i++) { fullMatchScores.Add(fullMatch[i], resultsDict[fullMatch[i]]); t += fullMatch[i] + "\r\n"; } return(fullMatch); }
/// <summary> /// Knowledge and Social Networks in Yahoo! Answers /// </summary> /// <returns>All keywords </returns> public IList <string> ExpectedKeywords(ExampleArticles article, KeywordResources resource) { string path = HttpContext.Current.Server.MapPath(".") + "/Files/Amit Article Text/Online Keywords/"; switch (article) { case ExampleArticles.KnowledgeAndSocialNetworks: path += "Knowledge and Social Networks.txt"; break; case ExampleArticles.NotAllIsGoldThatGlitters: path += "Not All Is Gold That Glitters .txt"; break; case ExampleArticles.HackersTopologyMatterGeography: path += "Hackers topology matter geography.txt"; break; default: break; } List <string> ieee = new List <string>(); List <string> inspec_controlled = new List <string>(); List <string> inspec_non_controlled = new List <string>(); List <string> author = new List <string>(); string[] res = File.ReadAllLines(path); for (int i = 0; i < res.Length - 1; i++) { if (res[i].ToLower().Trim().Contains("ieee")) { ieee = res[i + 1].ToLower().Trim().Split(',').ToList(); } else if (res[i].ToLower().Trim().Contains("inspec - controlled")) { inspec_controlled = res[i + 1].ToLower().Trim().Split(',').ToList(); } else if (res[i].ToLower().Trim().Contains("inspec - non")) { inspec_non_controlled = res[i + 1].ToLower().Trim().Split(',').ToList(); } else if (res[i].ToLower().Trim().Contains("author keywords")) { author = res[i + 1].ToLower().Trim().Split(',').ToList(); } } switch (resource) { case KeywordResources.IEEE: return(ieee); case KeywordResources.INSPEC_Controlled: return(inspec_controlled); case KeywordResources.INSPEC_Non_Controlled: return(inspec_non_controlled); case KeywordResources.Author: return(author); case KeywordResources.ALL: List <string> all = new List <string>(); all.AddRange(ieee); all.AddRange(inspec_controlled); all.AddRange(inspec_non_controlled); all.AddRange(author); return(all); default: return(null); } }