/// <summary> /// Gets the 4 and 3 words phrases /// </summary> /// <param name="article">Which article to extract from</param> /// <returns>List of phrases with 3 or 4 words</returns> protected List <string> GetLongPhrases(ExampleArticles article) { string stopListPath = MapPath(".") + "/Files/SmartStoplist.txt"; TextCheating txtCheats = new TextCheating(); string articleText = new TextCheating().GetArticleText(ExampleArticles.HackersTopologyMatterGeography); Rake rake = new Rake(stopListPath, 1, 4, 2); var resultsDict = rake.Run(articleText); List <string> fourWordsList = new List <string>(); List <string> threeWordsList = new List <string>(); foreach (string phrase in resultsDict.Keys) { int numOfWords = phrase.Split(' ').Length; if (numOfWords == 4) { fourWordsList.Add(phrase); } else if (numOfWords == 3) { threeWordsList.Add(phrase); } } List <string> FinalKeywordsCandidates = new List <string>(); FinalKeywordsCandidates.AddRange(fourWordsList); FinalKeywordsCandidates.AddRange(threeWordsList); return(FinalKeywordsCandidates); }
public string GetArticleText(ExampleArticles article) { string path = HttpContext.Current.Server.MapPath(".") + "/Files/Amit Article Text/"; switch (article) { case ExampleArticles.KnowledgeAndSocialNetworks: path += "knowledge_and_Social_Networks_in_Yahoo_Answers_HICSS_12092011.txt"; break; case ExampleArticles.NotAllIsGoldThatGlitters: path += "Not_all_is_Gold_that_Glitters_Response_t.txt"; break; case ExampleArticles.HackersTopologyMatterGeography: path += "Hackers_Topology_Matter_Geography.txt"; break; default: break; } return(File.ReadAllText(path)); }
protected List <string> GetOneWordKeywords(ExampleArticles article) { int minCharLength = 1; //I f**k up when i change this int maxWordsCount = 2; //we need one word, but how does that effect our scoring? int minWordFreq = 7; //When i do 3, i miss most of the 2 word phrases found by scholar string stopListPath = MapPath(".") + "/Files/SmartStoplist.txt"; TextCheating txtCheats = new TextCheating(); string articleText = new TextCheating().GetArticleText(ExampleArticles.HackersTopologyMatterGeography); Rake rake = new Rake(stopListPath, minCharLength, maxWordsCount, minWordFreq); var resultsDict = rake.Run(articleText); List <string> oneWordList = new List <string>(); foreach (string phrase in resultsDict.Keys) { int numOfWords = phrase.Split(' ').Length; if (numOfWords == 1) { oneWordList.Add(phrase); } } return(oneWordList); }
protected IList <string> CompareRake(ExampleArticles article, KeywordResources resource = KeywordResources.ALL, int minCharLength = 1, int maxWordsLength = 5, double minWordFreq = 1) { string stopListPath = MapPath(".") + "/Files/SmartStoplist.txt"; TextCheating txtCheats = new TextCheating(); IList <string> keywords = txtCheats.ExpectedKeywords(ExampleArticles.HackersTopologyMatterGeography, KeywordResources.ALL); Rake rake = new Rake(stopListPath, minCharLength, maxWordsLength, minWordFreq); var resultsDict = rake.Run(txtCheats.GetArticleText(ExampleArticles.HackersTopologyMatterGeography)); var results = resultsDict.Keys.ToList(); List <string> fullMatch = new List <string>(); Dictionary <string, string> resultsInKeywords = new Dictionary <string, string>(); Dictionary <string, string> keywordsInResults = new Dictionary <string, string>(); List <string> missedYouNoob = new List <string>(); for (int i = 0; i < keywords.Count; i++) { bool match = false; for (int j = 0; j < results.Count; j++) { string _res = results[j].ToLower().Trim(); string _keyword = keywords[i].ToLower().Trim(); if (_res == _keyword) { match = true; if (!fullMatch.Contains(_keyword)) { fullMatch.Add(_keyword); } } else if (_res.Contains(" " + _keyword + " ")) { if (!keywordsInResults.Keys.Contains(_keyword)) { keywordsInResults.Add(_keyword, _res); } } else if (_keyword.Contains(" " + _res + " ")) { if (!resultsInKeywords.Keys.Contains(_res)) { resultsInKeywords.Add(_res, _keyword); } } } if (!match) { missedYouNoob.Add(keywords[i]); } } var t = ""; Dictionary <string, double> fullMatchScores = new Dictionary <string, double>(); for (int i = 0; i < fullMatch.Count; i++) { fullMatchScores.Add(fullMatch[i], resultsDict[fullMatch[i]]); t += fullMatch[i] + "\r\n"; } return(fullMatch); }
/// <summary> /// Knowledge and Social Networks in Yahoo! Answers /// </summary> /// <returns>All keywords </returns> public IList <string> ExpectedKeywords(ExampleArticles article, KeywordResources resource) { string path = HttpContext.Current.Server.MapPath(".") + "/Files/Amit Article Text/Online Keywords/"; switch (article) { case ExampleArticles.KnowledgeAndSocialNetworks: path += "Knowledge and Social Networks.txt"; break; case ExampleArticles.NotAllIsGoldThatGlitters: path += "Not All Is Gold That Glitters .txt"; break; case ExampleArticles.HackersTopologyMatterGeography: path += "Hackers topology matter geography.txt"; break; default: break; } List <string> ieee = new List <string>(); List <string> inspec_controlled = new List <string>(); List <string> inspec_non_controlled = new List <string>(); List <string> author = new List <string>(); string[] res = File.ReadAllLines(path); for (int i = 0; i < res.Length - 1; i++) { if (res[i].ToLower().Trim().Contains("ieee")) { ieee = res[i + 1].ToLower().Trim().Split(',').ToList(); } else if (res[i].ToLower().Trim().Contains("inspec - controlled")) { inspec_controlled = res[i + 1].ToLower().Trim().Split(',').ToList(); } else if (res[i].ToLower().Trim().Contains("inspec - non")) { inspec_non_controlled = res[i + 1].ToLower().Trim().Split(',').ToList(); } else if (res[i].ToLower().Trim().Contains("author keywords")) { author = res[i + 1].ToLower().Trim().Split(',').ToList(); } } switch (resource) { case KeywordResources.IEEE: return(ieee); case KeywordResources.INSPEC_Controlled: return(inspec_controlled); case KeywordResources.INSPEC_Non_Controlled: return(inspec_non_controlled); case KeywordResources.Author: return(author); case KeywordResources.ALL: List <string> all = new List <string>(); all.AddRange(ieee); all.AddRange(inspec_controlled); all.AddRange(inspec_non_controlled); all.AddRange(author); return(all); default: return(null); } }