Exemple #1
0
        public ActionResult Index(string gelenUrl)
        {
            WebClient client = new WebClient();
            string    url    = gelenUrl;

            Uri urlDomain = new Uri(url);


            string downloadString = client.DownloadString(url);//parametre olarak gelcek -- HTML olarak content indirilir

            byte[] bytes = Encoding.Default.GetBytes(downloadString);
            downloadString = Encoding.UTF8.GetString(bytes); //indirilen HTML utf-8 e çevrildi. Yapılmasa da olur zira ingilizce yaptık sonradan.

            var htmlDoc = new HtmlDocument();

            htmlDoc.LoadHtml(downloadString);             //Oluşturulan HtmlDocument tipindeki veriye indirilen html içeriği atanır.

            int cumleSayisi = 0;                          //TF-IDF hesaplamaları için  cümle sayılarının tutulacağı değişken.

            var stopWords = StopWords.GetStopWords("en"); // Metin işlenirken yararı olmayacak kelimelerin ayıklanması adına ingilizce stopwordsun ilgili değişkene atanması.

            List <string> kelimeler = new List <string>();

            HtmlIsleyici htmlIsleyici1 = new HtmlIsleyici();

            htmlIsleyici1.htmlIsle(htmlDoc);
            kelimeler   = htmlIsleyici1.kelimeler;
            cumleSayisi = htmlIsleyici1.cumleSayisi;

            KelimeDuzeltici kelimeDuzeltici1 = new KelimeDuzeltici();

            kelimeler = kelimeDuzeltici1.kelimeDuzelt(kelimeler, urlDomain);

            List <WordAndFreq> kelimeFrekans = new List <WordAndFreq>();

            KelimeFrekansYapici kelimeFrekansYapici1 = new KelimeFrekansYapici();

            kelimeFrekans = kelimeFrekansYapici1.KelimeFrekansYap(kelimeler);


            TfIdfCalculator agirlikHesap = new TfIdfCalculator();

            List <WordAndWeight> weihtedKelimeler = new List <WordAndWeight>();

            AgirlikliKelimeListesi agirlikliKelimeListesi1 = new AgirlikliKelimeListesi();

            weihtedKelimeler = agirlikliKelimeListesi1.AgirlikliListeYap(kelimeFrekans, kelimeler.Count, cumleSayisi);

            AnahtarKelimeBelirleyici anahtarKelimeBelirleyici1 = new AnahtarKelimeBelirleyici();
            List <WordAndFreq>       anahtarKelimeler          = new List <WordAndFreq>();

            anahtarKelimeler = anahtarKelimeBelirleyici1.AnahtarKelimeBelirle(weihtedKelimeler, kelimeFrekans);

            Asama2ViewModel asama2ViewModel = new Asama2ViewModel();

            asama2ViewModel.KeywordListesi = anahtarKelimeler;

            //return View(asama2ViewModel);
            return(RedirectToAction("Index", "Asama3", new { gelenUrl = url }));
        }
Exemple #2
0
        private static void Main(string[] args)
        {
            var htmlCleaner                       = new HtmlCleaner(new ISiteHtmlCleaner[] { new DailyHtmlCleaner(), new MirrorHtmlCleaner() });
            var htmlLoader                        = new HtmlLoader();
            var articleProvider                   = new ArticleProvider(htmlCleaner, htmlLoader);
            var cosineSimilarityCalculator        = new CosineSimilarityCalculator();
            IDocumentFrequencyProvider dfProvider = LoadFrequencies();
            var tfIdfCalculator                   = new TfIdfCalculator(dfProvider);
            var tokenizer        = new Tokenizer();
            var articleProcessor = new ArticleProcessor(tfIdfCalculator, tokenizer);
            var articleComparer  = new TextProcessing.ArticleComparer(articleProvider, cosineSimilarityCalculator,
                                                                      articleProcessor);

            Console.WriteLine("Similar articles:");
            double similarity =
                articleComparer.Compare(
                    @"http://www.dailymail.co.uk/news/article-2489957/Britains-spy-chiefs-grilled-MPs-television-time.html",
                    @"http://www.mirror.co.uk/news/uk-news/mi6-mi5-gchq-bosses-questioned-2685310");

            Console.WriteLine(similarity);
            similarity =
                articleComparer.Compare(
                    @"http://www.dailymail.co.uk/news/article-2489640/80-parents-caught-children-copying-p**n-style-dances-offensive-lyrics.html",
                    @"http://www.mirror.co.uk/news/uk-news/miley-cyrus-twerking-kids-copying-2685363");
            Console.WriteLine(similarity);

            Console.WriteLine("Same article:");
            similarity =
                articleComparer.Compare(
                    @"http://www.dailymail.co.uk/news/article-2490296/You-STILL-likely-lose-job-recession-25s-shop-workers-risk.html",
                    @"http://www.dailymail.co.uk/news/article-2490296/You-STILL-likely-lose-job-recession-25s-shop-workers-risk.html");
            Console.WriteLine(similarity);

            Console.WriteLine("Different articles:");
            similarity =
                articleComparer.Compare(
                    @"http://www.dailymail.co.uk/femail/article-2489984/Needy-people-likely-cheat.html",
                    @"http://www.dailymail.co.uk/news/article-2490531/Worlds-oldest-paperboy-deliver-round-71-years-route.html");
            Console.WriteLine(similarity);
            similarity =
                articleComparer.Compare(
                    @"http://www.dailymail.co.uk/news/article-2490412/Wikileaks-journalist-spent-4-months-Edward-Snowden-leaves-Russia.html",
                    @"http://www.dailymail.co.uk/news/article-2489994/Twitter-share-prices-soar-firms-day-trading.html");
            Console.WriteLine(similarity);
            Console.ReadKey();
        }
Exemple #3
0
        public void CalculateTest()
        {
            var dfProvider = MockRepository.GenerateStub <IDocumentFrequencyProvider>();

            dfProvider.Stub(dfp => dfp.CorpusSize)
            .Return(10);
            dfProvider.Stub(dfp => dfp.GeDocumentsWithTokenCount(Arg <IToken> .Is.Anything))
            .Return(1);
            var calculator = new TfIdfCalculator(dfProvider);
            var token1     = MockRepository.GenerateStub <IToken>();

            token1.Stub(t => t.Text)
            .Return("мама");
            var token2 = MockRepository.GenerateStub <IToken>();

            token2.Stub(t => t.Text)
            .Return("мыла");

            Dictionary <IToken, TfIdf> tfidfs = calculator.Calculate(new[] { token1, token2, token2 });

            Assert.AreEqual(2, tfidfs.Count);
            Assert.IsTrue(tfidfs[token1].Value.IsAbout(1.279, 0.001));
            Assert.IsTrue(tfidfs[token2].Value.IsAbout(1.705, 0.001));
        }
Exemple #4
0
        public void CalculateTestNullTokensExc()
        {
            var calculator = new TfIdfCalculator(MockRepository.GenerateStub <IDocumentFrequencyProvider>());

            calculator.Calculate(null);
        }