コード例 #1
0
    /// <summary>
    /// Gets the 4 and 3 words phrases
    /// </summary>
    /// <param name="article">Which article to extract from</param>
    /// <returns>List of phrases with 3 or 4 words</returns>
    protected List <string> GetLongPhrases(ExampleArticles article)
    {
        string        stopListPath   = MapPath(".") + "/Files/SmartStoplist.txt";
        TextCheating  txtCheats      = new TextCheating();
        string        articleText    = new TextCheating().GetArticleText(ExampleArticles.HackersTopologyMatterGeography);
        Rake          rake           = new Rake(stopListPath, 1, 4, 2);
        var           resultsDict    = rake.Run(articleText);
        List <string> fourWordsList  = new List <string>();
        List <string> threeWordsList = new List <string>();

        foreach (string phrase in resultsDict.Keys)
        {
            int numOfWords = phrase.Split(' ').Length;
            if (numOfWords == 4)
            {
                fourWordsList.Add(phrase);
            }
            else if (numOfWords == 3)
            {
                threeWordsList.Add(phrase);
            }
        }
        List <string> FinalKeywordsCandidates = new List <string>();

        FinalKeywordsCandidates.AddRange(fourWordsList);
        FinalKeywordsCandidates.AddRange(threeWordsList);
        return(FinalKeywordsCandidates);
    }
コード例 #2
0
    protected void RunRakeWithUserParams()
    {
        string stopListPath    = MapPath(".") + "/Files/SmartStoplist.txt";
        int    minCharLength   = int.Parse(minCharLengthDDL.SelectedValue);
        int    maxWordsLength  = int.Parse(maxWordsLengthDDL.SelectedValue);
        int    minWordsFreq    = int.Parse(minKeywordFreqDDL.SelectedValue);
        int    amountOfResults = int.Parse(topTB.Text);
        Rake   rake            = new Rake(stopListPath, minCharLength, maxWordsLength, minWordsFreq);

        string text    = File.ReadAllText(articleDDL.SelectedValue);
        var    results = rake.Run(text);

        int minRating;

        if (minKeywordRatingDDL.SelectedValue.ToLower() == "optional")
        {
            minRating = 0;
        }
        else
        {
            minRating = int.Parse(minKeywordRatingDDL.SelectedValue);
        }

        // var topResults = results.OrderByDescending(pair => pair.Value).Take(5);
        //var topResults = GetTopResults(results, amountOfResults);

        Dictionary <string, double> f = GetTopResults2(results, amountOfResults);

        lbl_res.Text = ToLabelString(f);
    }
コード例 #3
0
    protected void DisplayResults()
    {
        //get rake and text reults
        TextCheating cheat        = new TextCheating();
        string       stopListPath = MapPath(".") + "/Files/SmartStoplist.txt";
        Rake         rake         = new Rake(stopListPath, 1, 20, 1);

        sample1.Text = cheat.Sample1();
        var results = rake.Run(cheat.Sample1());

        //calculate results vs expected
        var resultsString = results.Keys.ToArray();
        var expected      = cheat.ExpectedSample1();
        var match         = KeywordsFound(resultsString, expected);
        var topResults    = TopResults(results);
        var missed        = KeywordsMissed(resultsString, expected);
        var newKeywords   = NewKeywords(topResults, expected);

        //place in form

        lbl_expected.Text = ReadyForDisplayLabel(expected, "Expected:");
        lbl_newWords.Text = ReadyForDisplayLabel(newKeywords, "New Keywords:");
        lbl_match.Text    = ReadyForDisplayLabel(match, "Matches:");
        lbl_misses.Text   = ReadyForDisplayLabel(missed, "Missed Keywords:");
    }
コード例 #4
0
ファイル: RakeTests.cs プロジェクト: polytronicgr/Rake
        public void Just_Looking_At_Some_Results()
        {
            var text =
                @"Iraq has launched the long-awaited offensive to expel Islamic State from its second largest city Mosul and Australian personnel and aircraft will certainly be involved in support operations.

            But Defence won't say just what or how.

            'Defence will not discuss specific details for operational security reasons,' a defence spokesman said.

            Defence Minister Marise Payne declined to comment on operational details, saying it would take time and she was awaiting updates.

            She also declined to elaborate on predictions of civilian casualties.

            'I don't think my conjecture on rates of casualties or otherwise would be helpful at this point,' she said.

            Australia has a substantial force in the Middle East, extensively involved in the fight against Islamic State.

            The six F/A-18 Hornets of the RAAF Air Task Group will operate as part of the coalition air contingent, hitting IS targets in the city.

            The RAAF KC-30A refueling aircraft will support the air campaign, as will the E-7A Wedgetail airborne warning and control aircraft.

            Closest to Australian boots on the ground could be the 80-stong special operations task group whose members have advised and mentored Iraq's elite Counter-Terrorism Service.

            This unit, referred to as the Golden Division, played a key role in the fight to retake Ramadi.

            Iraqi infantry trained by the 300 Australians and 100 New Zealanders of Task Group Taji will be in the thick of the fighting.

            Another 30 Australian personnel are embedded in coalition headquarters in Baghdad.

            US Lieutenant General Stephen Townsend, commander of the coalition taskforce, said the operation to regain control of Mosul would likely continue for weeks, possibly longer.

            He said Iraq was supported by a wide range of coalition capabilities, including air support, artillery, intelligence, advisors and forward air controllers.

            'But to be clear, the thousands of ground combat forces who will liberate Mosul are all Iraqis,' he said in a statement.

            'This may prove to be a long and tough battle, but the Iraqis have prepared for it and we will stand by them.'";

            var rake = new Rake(minCharLength: 4, maxWordsLength: 12);

            var result = rake.Run(text);

            Assert.IsNotNull(result);

            var result2 = rake.Run(string.Join("|", result.Select(pair => pair.Key)));

            Assert.IsNotNull(result2);
        }
コード例 #5
0
ファイル: RakeTests.cs プロジェクト: polytronicgr/Rake
        public void Rake_Sort_Of_Works()
        {
            const string text = @"Compatibility of systems of linear constraints over the set of natural numbers. 
Criteria of compatibility of a system of linear Diophantine equations, strict inequations, and nonstrict inequations are considered. 
Upper bounds for components of a minimal set of solutions and algorithms of construction of minimal generating sets of solutions for all types of systems are given. 
These criteria and the corresponding algorithms for constructing a minimal supporting set of solutions can be used in solving all the considered types of systems and systems of mixed types.";

            var rake = new Rake();

            var result = rake.Run(text);

            Assert.IsNotNull(result);

            // from https://www.airpair.com/nlp/keyword-extraction-tutorial

            // expected was

            /*
             * Keyword:  minimal generating sets , score:  8.66666666667
             * Keyword:  linear diophantine equations , score:  8.5
             * Keyword:  minimal supporting set , score:  7.66666666667
             * Keyword:  minimal set , score:  4.66666666667
             * Keyword:  linear constraints , score:  4.5
             * Keyword:  upper bounds , score:  4.0
             * Keyword:  natural numbers , score:  4.0
             * Keyword:  nonstrict inequations , score:  4.0
             */

            Assert.AreEqual("minimal generating sets", result.Skip(0).First().Key);
            Assert.AreEqual("linear diophantine equations", result.Skip(1).First().Key);
            Assert.AreEqual("minimal supporting set", result.Skip(2).First().Key);
            Assert.AreEqual("minimal set", result.Skip(3).First().Key);
            Assert.AreEqual("linear constraints", result.Skip(4).First().Key);

            // we then hit a few that are scored the same and the order is slightly different
            //Assert.AreEqual("upper bounds", result.Skip(5).First().Key);
            //Assert.AreEqual("natural numbers", result.Skip(6).First().Key);
            //Assert.AreEqual("nonstrict inequations", result.Skip(7).First().Key);

            // but the score is OK so we are green
            Assert.AreEqual(4.0, result["upper bounds"]);
        }
コード例 #6
0
    protected void BigRakeTestAttempt()
    {
        string   stopListPath   = MapPath(".") + "/Files/SmartStoplist.txt";
        int      minCharLength  = 1;
        int      maxWordsLength = 1;
        int      minWordsFreq   = 1;
        string   articleTitle   = articleDDL.Items[articleDDL.SelectedIndex].ToString();
        string   text           = File.ReadAllText(articleDDL.SelectedValue);
        Rake     rake;
        RAKETest rt = new RAKETest();



        int id = 1;
        List <Dictionary <string, double> > allResults = new List <Dictionary <string, double> >();

        for (int i = 1; i < 4; i++)
        {
            for (int j = 1; j < 5; j++)
            {
                for (int k = 1; k < 6; k++)
                {
                    maxWordsLength = i;
                    minCharLength  = j;
                    minWordsFreq   = k;
                    rake           = new Rake(stopListPath, minCharLength, maxWordsLength, minWordsFreq);
                    var results = rake.Run(text);
                    var temp    = GetTopResults2(results, 30);
                    allResults.Add(temp);


                    rt.AddTest(new RAKETest(temp, (id), minCharLength, maxWordsLength, minWordsFreq, articleTitle));

                    id++;
                }
            }
        }

        //Console.WriteLine(allResults);
    }
コード例 #7
0
    protected List <string> GetOneWordKeywords(ExampleArticles article)
    {
        int minCharLength = 1; //I f**k up when i change this
        int maxWordsCount = 2; //we need one word, but how does that effect our scoring?
        int minWordFreq   = 7; //When i do 3, i miss most of the 2 word phrases found by scholar

        string        stopListPath = MapPath(".") + "/Files/SmartStoplist.txt";
        TextCheating  txtCheats    = new TextCheating();
        string        articleText  = new TextCheating().GetArticleText(ExampleArticles.HackersTopologyMatterGeography);
        Rake          rake         = new Rake(stopListPath, minCharLength, maxWordsCount, minWordFreq);
        var           resultsDict  = rake.Run(articleText);
        List <string> oneWordList  = new List <string>();

        foreach (string phrase in resultsDict.Keys)
        {
            int numOfWords = phrase.Split(' ').Length;
            if (numOfWords == 1)
            {
                oneWordList.Add(phrase);
            }
        }
        return(oneWordList);
    }
コード例 #8
0
    protected void submitBTN_Click(object sender, EventArgs e)
    {
        string stopListPath = MapPath(".") + "/Files/SmartStoplist.txt";
        Rake   rake         = new Rake(stopListPath, 1, 4, 1);
        string text         = textTB.Text;
        var    results      = rake.Run(text);

        string res     = "";
        int    counter = 1;

        foreach (var item in results.Keys)
        {
            if (results[item] >= 7)
            {
                res += (counter++) + ") " + item + ", " + results[item].ToString();
                if (results.Keys.Last() != item)
                {
                    res += "<br>";
                }
            }
        }

        lbl_results.Text = res;
    }
コード例 #9
0
    protected IList <string> CompareRake(ExampleArticles article, KeywordResources resource = KeywordResources.ALL, int minCharLength = 1, int maxWordsLength = 5, double minWordFreq = 1)
    {
        string         stopListPath = MapPath(".") + "/Files/SmartStoplist.txt";
        TextCheating   txtCheats    = new TextCheating();
        IList <string> keywords     = txtCheats.ExpectedKeywords(ExampleArticles.HackersTopologyMatterGeography, KeywordResources.ALL);
        Rake           rake         = new Rake(stopListPath, minCharLength, maxWordsLength, minWordFreq);
        var            resultsDict  = rake.Run(txtCheats.GetArticleText(ExampleArticles.HackersTopologyMatterGeography));
        var            results      = resultsDict.Keys.ToList();


        List <string> fullMatch = new List <string>();
        Dictionary <string, string> resultsInKeywords = new Dictionary <string, string>();
        Dictionary <string, string> keywordsInResults = new Dictionary <string, string>();
        List <string> missedYouNoob = new List <string>();

        for (int i = 0; i < keywords.Count; i++)
        {
            bool match = false;
            for (int j = 0; j < results.Count; j++)
            {
                string _res     = results[j].ToLower().Trim();
                string _keyword = keywords[i].ToLower().Trim();
                if (_res == _keyword)
                {
                    match = true;
                    if (!fullMatch.Contains(_keyword))
                    {
                        fullMatch.Add(_keyword);
                    }
                }
                else if (_res.Contains(" " + _keyword + " "))
                {
                    if (!keywordsInResults.Keys.Contains(_keyword))
                    {
                        keywordsInResults.Add(_keyword, _res);
                    }
                }
                else if (_keyword.Contains(" " + _res + " "))
                {
                    if (!resultsInKeywords.Keys.Contains(_res))
                    {
                        resultsInKeywords.Add(_res, _keyword);
                    }
                }
            }
            if (!match)
            {
                missedYouNoob.Add(keywords[i]);
            }
        }
        var t = "";
        Dictionary <string, double> fullMatchScores = new Dictionary <string, double>();

        for (int i = 0; i < fullMatch.Count; i++)
        {
            fullMatchScores.Add(fullMatch[i], resultsDict[fullMatch[i]]);
            t += fullMatch[i] + "\r\n";
        }

        return(fullMatch);
    }
コード例 #10
0
        public Dictionary <string, double> GetRakeKeywords(int length)
        {
            Rake generator = new Rake(ToolInfo.root + "\\SMARTstopset.txt", 3, length, 3);

            return(generator.Run(this.BuildBigString(this.GetList())));
        }