public ActionResult Index(string gelenUrl) { WebClient client = new WebClient(); string url = gelenUrl; Uri urlDomain = new Uri(url); string downloadString = client.DownloadString(url);//parametre olarak gelcek -- HTML olarak content indirilir byte[] bytes = Encoding.Default.GetBytes(downloadString); downloadString = Encoding.UTF8.GetString(bytes); //indirilen HTML utf-8 e çevrildi. Yapılmasa da olur zira ingilizce yaptık sonradan. var htmlDoc = new HtmlDocument(); htmlDoc.LoadHtml(downloadString); //Oluşturulan HtmlDocument tipindeki veriye indirilen html içeriği atanır. int cumleSayisi = 0; //TF-IDF hesaplamaları için cümle sayılarının tutulacağı değişken. var stopWords = StopWords.GetStopWords("en"); // Metin işlenirken yararı olmayacak kelimelerin ayıklanması adına ingilizce stopwordsun ilgili değişkene atanması. List <string> kelimeler = new List <string>(); HtmlIsleyici htmlIsleyici1 = new HtmlIsleyici(); htmlIsleyici1.htmlIsle(htmlDoc); kelimeler = htmlIsleyici1.kelimeler; cumleSayisi = htmlIsleyici1.cumleSayisi; KelimeDuzeltici kelimeDuzeltici1 = new KelimeDuzeltici(); kelimeler = kelimeDuzeltici1.kelimeDuzelt(kelimeler, urlDomain); List <WordAndFreq> kelimeFrekans = new List <WordAndFreq>(); KelimeFrekansYapici kelimeFrekansYapici1 = new KelimeFrekansYapici(); kelimeFrekans = kelimeFrekansYapici1.KelimeFrekansYap(kelimeler); TfIdfCalculator agirlikHesap = new TfIdfCalculator(); List <WordAndWeight> weihtedKelimeler = new List <WordAndWeight>(); AgirlikliKelimeListesi agirlikliKelimeListesi1 = new AgirlikliKelimeListesi(); weihtedKelimeler = agirlikliKelimeListesi1.AgirlikliListeYap(kelimeFrekans, kelimeler.Count, cumleSayisi); AnahtarKelimeBelirleyici anahtarKelimeBelirleyici1 = new AnahtarKelimeBelirleyici(); List <WordAndFreq> anahtarKelimeler = new List <WordAndFreq>(); anahtarKelimeler = anahtarKelimeBelirleyici1.AnahtarKelimeBelirle(weihtedKelimeler, kelimeFrekans); Asama2ViewModel asama2ViewModel = new Asama2ViewModel(); asama2ViewModel.KeywordListesi = anahtarKelimeler; //return View(asama2ViewModel); return(RedirectToAction("Index", "Asama3", new { gelenUrl = url })); }
private static void Main(string[] args) { var htmlCleaner = new HtmlCleaner(new ISiteHtmlCleaner[] { new DailyHtmlCleaner(), new MirrorHtmlCleaner() }); var htmlLoader = new HtmlLoader(); var articleProvider = new ArticleProvider(htmlCleaner, htmlLoader); var cosineSimilarityCalculator = new CosineSimilarityCalculator(); IDocumentFrequencyProvider dfProvider = LoadFrequencies(); var tfIdfCalculator = new TfIdfCalculator(dfProvider); var tokenizer = new Tokenizer(); var articleProcessor = new ArticleProcessor(tfIdfCalculator, tokenizer); var articleComparer = new TextProcessing.ArticleComparer(articleProvider, cosineSimilarityCalculator, articleProcessor); Console.WriteLine("Similar articles:"); double similarity = articleComparer.Compare( @"http://www.dailymail.co.uk/news/article-2489957/Britains-spy-chiefs-grilled-MPs-television-time.html", @"http://www.mirror.co.uk/news/uk-news/mi6-mi5-gchq-bosses-questioned-2685310"); Console.WriteLine(similarity); similarity = articleComparer.Compare( @"http://www.dailymail.co.uk/news/article-2489640/80-parents-caught-children-copying-p**n-style-dances-offensive-lyrics.html", @"http://www.mirror.co.uk/news/uk-news/miley-cyrus-twerking-kids-copying-2685363"); Console.WriteLine(similarity); Console.WriteLine("Same article:"); similarity = articleComparer.Compare( @"http://www.dailymail.co.uk/news/article-2490296/You-STILL-likely-lose-job-recession-25s-shop-workers-risk.html", @"http://www.dailymail.co.uk/news/article-2490296/You-STILL-likely-lose-job-recession-25s-shop-workers-risk.html"); Console.WriteLine(similarity); Console.WriteLine("Different articles:"); similarity = articleComparer.Compare( @"http://www.dailymail.co.uk/femail/article-2489984/Needy-people-likely-cheat.html", @"http://www.dailymail.co.uk/news/article-2490531/Worlds-oldest-paperboy-deliver-round-71-years-route.html"); Console.WriteLine(similarity); similarity = articleComparer.Compare( @"http://www.dailymail.co.uk/news/article-2490412/Wikileaks-journalist-spent-4-months-Edward-Snowden-leaves-Russia.html", @"http://www.dailymail.co.uk/news/article-2489994/Twitter-share-prices-soar-firms-day-trading.html"); Console.WriteLine(similarity); Console.ReadKey(); }
public void CalculateTest() { var dfProvider = MockRepository.GenerateStub <IDocumentFrequencyProvider>(); dfProvider.Stub(dfp => dfp.CorpusSize) .Return(10); dfProvider.Stub(dfp => dfp.GeDocumentsWithTokenCount(Arg <IToken> .Is.Anything)) .Return(1); var calculator = new TfIdfCalculator(dfProvider); var token1 = MockRepository.GenerateStub <IToken>(); token1.Stub(t => t.Text) .Return("мама"); var token2 = MockRepository.GenerateStub <IToken>(); token2.Stub(t => t.Text) .Return("мыла"); Dictionary <IToken, TfIdf> tfidfs = calculator.Calculate(new[] { token1, token2, token2 }); Assert.AreEqual(2, tfidfs.Count); Assert.IsTrue(tfidfs[token1].Value.IsAbout(1.279, 0.001)); Assert.IsTrue(tfidfs[token2].Value.IsAbout(1.705, 0.001)); }
public void CalculateTestNullTokensExc() { var calculator = new TfIdfCalculator(MockRepository.GenerateStub <IDocumentFrequencyProvider>()); calculator.Calculate(null); }