Esempio n. 1
0
    /// <summary>
    /// Gets the 4 and 3 words phrases
    /// </summary>
    /// <param name="article">Which article to extract from</param>
    /// <returns>List of phrases with 3 or 4 words</returns>
    protected List <string> GetLongPhrases(ExampleArticles article)
    {
        string        stopListPath   = MapPath(".") + "/Files/SmartStoplist.txt";
        TextCheating  txtCheats      = new TextCheating();
        string        articleText    = new TextCheating().GetArticleText(ExampleArticles.HackersTopologyMatterGeography);
        Rake          rake           = new Rake(stopListPath, 1, 4, 2);
        var           resultsDict    = rake.Run(articleText);
        List <string> fourWordsList  = new List <string>();
        List <string> threeWordsList = new List <string>();

        foreach (string phrase in resultsDict.Keys)
        {
            int numOfWords = phrase.Split(' ').Length;
            if (numOfWords == 4)
            {
                fourWordsList.Add(phrase);
            }
            else if (numOfWords == 3)
            {
                threeWordsList.Add(phrase);
            }
        }
        List <string> FinalKeywordsCandidates = new List <string>();

        FinalKeywordsCandidates.AddRange(fourWordsList);
        FinalKeywordsCandidates.AddRange(threeWordsList);
        return(FinalKeywordsCandidates);
    }
Esempio n. 2
0
    public string GetArticleText(ExampleArticles article)
    {
        string path = HttpContext.Current.Server.MapPath(".") + "/Files/Amit Article Text/";

        switch (article)
        {
        case ExampleArticles.KnowledgeAndSocialNetworks:
            path += "knowledge_and_Social_Networks_in_Yahoo_Answers_HICSS_12092011.txt";
            break;

        case ExampleArticles.NotAllIsGoldThatGlitters:
            path += "Not_all_is_Gold_that_Glitters_Response_t.txt";
            break;

        case ExampleArticles.HackersTopologyMatterGeography:
            path += "Hackers_Topology_Matter_Geography.txt";
            break;

        default:
            break;
        }
        return(File.ReadAllText(path));
    }
Esempio n. 3
0
    protected List <string> GetOneWordKeywords(ExampleArticles article)
    {
        int minCharLength = 1; //I f**k up when i change this
        int maxWordsCount = 2; //we need one word, but how does that effect our scoring?
        int minWordFreq   = 7; //When i do 3, i miss most of the 2 word phrases found by scholar

        string        stopListPath = MapPath(".") + "/Files/SmartStoplist.txt";
        TextCheating  txtCheats    = new TextCheating();
        string        articleText  = new TextCheating().GetArticleText(ExampleArticles.HackersTopologyMatterGeography);
        Rake          rake         = new Rake(stopListPath, minCharLength, maxWordsCount, minWordFreq);
        var           resultsDict  = rake.Run(articleText);
        List <string> oneWordList  = new List <string>();

        foreach (string phrase in resultsDict.Keys)
        {
            int numOfWords = phrase.Split(' ').Length;
            if (numOfWords == 1)
            {
                oneWordList.Add(phrase);
            }
        }
        return(oneWordList);
    }
Esempio n. 4
0
    protected IList <string> CompareRake(ExampleArticles article, KeywordResources resource = KeywordResources.ALL, int minCharLength = 1, int maxWordsLength = 5, double minWordFreq = 1)
    {
        string         stopListPath = MapPath(".") + "/Files/SmartStoplist.txt";
        TextCheating   txtCheats    = new TextCheating();
        IList <string> keywords     = txtCheats.ExpectedKeywords(ExampleArticles.HackersTopologyMatterGeography, KeywordResources.ALL);
        Rake           rake         = new Rake(stopListPath, minCharLength, maxWordsLength, minWordFreq);
        var            resultsDict  = rake.Run(txtCheats.GetArticleText(ExampleArticles.HackersTopologyMatterGeography));
        var            results      = resultsDict.Keys.ToList();


        List <string> fullMatch = new List <string>();
        Dictionary <string, string> resultsInKeywords = new Dictionary <string, string>();
        Dictionary <string, string> keywordsInResults = new Dictionary <string, string>();
        List <string> missedYouNoob = new List <string>();

        for (int i = 0; i < keywords.Count; i++)
        {
            bool match = false;
            for (int j = 0; j < results.Count; j++)
            {
                string _res     = results[j].ToLower().Trim();
                string _keyword = keywords[i].ToLower().Trim();
                if (_res == _keyword)
                {
                    match = true;
                    if (!fullMatch.Contains(_keyword))
                    {
                        fullMatch.Add(_keyword);
                    }
                }
                else if (_res.Contains(" " + _keyword + " "))
                {
                    if (!keywordsInResults.Keys.Contains(_keyword))
                    {
                        keywordsInResults.Add(_keyword, _res);
                    }
                }
                else if (_keyword.Contains(" " + _res + " "))
                {
                    if (!resultsInKeywords.Keys.Contains(_res))
                    {
                        resultsInKeywords.Add(_res, _keyword);
                    }
                }
            }
            if (!match)
            {
                missedYouNoob.Add(keywords[i]);
            }
        }
        var t = "";
        Dictionary <string, double> fullMatchScores = new Dictionary <string, double>();

        for (int i = 0; i < fullMatch.Count; i++)
        {
            fullMatchScores.Add(fullMatch[i], resultsDict[fullMatch[i]]);
            t += fullMatch[i] + "\r\n";
        }

        return(fullMatch);
    }
Esempio n. 5
0
    /// <summary>
    /// Knowledge and Social Networks in Yahoo! Answers
    /// </summary>
    /// <returns>All keywords </returns>
    public IList <string> ExpectedKeywords(ExampleArticles article, KeywordResources resource)
    {
        string path = HttpContext.Current.Server.MapPath(".") + "/Files/Amit Article Text/Online Keywords/";

        switch (article)
        {
        case ExampleArticles.KnowledgeAndSocialNetworks:
            path += "Knowledge and Social Networks.txt";
            break;

        case ExampleArticles.NotAllIsGoldThatGlitters:
            path += "Not All Is Gold That Glitters .txt";
            break;

        case ExampleArticles.HackersTopologyMatterGeography:
            path += "Hackers topology matter geography.txt";
            break;

        default:
            break;
        }
        List <string> ieee = new List <string>();
        List <string> inspec_controlled     = new List <string>();
        List <string> inspec_non_controlled = new List <string>();
        List <string> author = new List <string>();

        string[] res = File.ReadAllLines(path);
        for (int i = 0; i < res.Length - 1; i++)
        {
            if (res[i].ToLower().Trim().Contains("ieee"))
            {
                ieee = res[i + 1].ToLower().Trim().Split(',').ToList();
            }
            else if (res[i].ToLower().Trim().Contains("inspec - controlled"))
            {
                inspec_controlled = res[i + 1].ToLower().Trim().Split(',').ToList();
            }
            else if (res[i].ToLower().Trim().Contains("inspec - non"))
            {
                inspec_non_controlled = res[i + 1].ToLower().Trim().Split(',').ToList();
            }
            else if (res[i].ToLower().Trim().Contains("author keywords"))
            {
                author = res[i + 1].ToLower().Trim().Split(',').ToList();
            }
        }

        switch (resource)
        {
        case KeywordResources.IEEE:
            return(ieee);

        case KeywordResources.INSPEC_Controlled:
            return(inspec_controlled);

        case KeywordResources.INSPEC_Non_Controlled:
            return(inspec_non_controlled);

        case KeywordResources.Author:
            return(author);

        case KeywordResources.ALL:
            List <string> all = new List <string>();
            all.AddRange(ieee);
            all.AddRange(inspec_controlled);
            all.AddRange(inspec_non_controlled);
            all.AddRange(author);
            return(all);

        default:
            return(null);
        }
    }