예제 #1
0
        // action without url expanding
        private static ScrapeResult Action(object state)
        {
            Stopwatch sw = new Stopwatch();
            sw.Reset();
            sw.Start();

            var url = (string)state;

            var scrapeResult = new ScrapeResult(url);

            string text = HtmlToText.GetText(HtmlToText.GetHtml(new Uri(url)));

            if (argParser.CharsNumberNeeded)
            {
                scrapeResult.NumOfChars = DataProcessingHelper.GetCharsCount(text);
            }
            if (argParser.WordsNumberNeeded)
            {
                scrapeResult.NumOfWordsFound =
                    DataProcessingHelper.GetWordsCount(text, argParser.Words);
            }
            if (argParser.SentencesNeeded)
            {
                scrapeResult.Sentences = new List<string>(
                    DataProcessingHelper.GetSentences(text, argParser.Words));
            }

            Console.WriteLine(scrapeResult.ToString());

            sw.Stop();

            if(argParser.Verbose)
            {
                Console.WriteLine("Elapsed {0} ms", sw.ElapsedMilliseconds);
            }
            Console.WriteLine();

            return scrapeResult;
        }
예제 #2
0
        // action with url expanding
        private static ScrapeResult ActionUrlExpansion(object state)
        {
            Stopwatch sw = new Stopwatch();
            sw.Reset();
            sw.Start();

            var url = (string)state;

            var expandedUrls = HtmlToText.ExpandUrls(new string[] {url}, argParser.Words);

            List<ScrapeResult> expandedScrapeResults = new List<ScrapeResult>();

            foreach (var expandedUrl in expandedUrls)
            {
                ScrapeResult expandedScrapeResult = new ScrapeResult(expandedUrl);

                string text = HtmlToText.GetText(HtmlToText.GetHtml(new Uri(expandedUrl)));

                if (argParser.CharsNumberNeeded)
                {
                    expandedScrapeResult.NumOfChars = DataProcessingHelper.GetCharsCount(text);
                }
                if (argParser.WordsNumberNeeded)
                {
                    expandedScrapeResult.NumOfWordsFound =
                        DataProcessingHelper.GetWordsCount(text, argParser.Words);
                }
                if (argParser.SentencesNeeded)
                {
                    expandedScrapeResult.Sentences = new List<string>(
                        DataProcessingHelper.GetSentences(text, argParser.Words));
                }

                expandedScrapeResults.Add(expandedScrapeResult);

                Console.WriteLine(expandedScrapeResult.ToString());
            }

            ScrapeResult scrapeResult = new ScrapeResult(url);
            foreach (var expScrapeRes in expandedScrapeResults)
            {
                scrapeResult.NumOfChars += expScrapeRes.NumOfChars;
                scrapeResult.NumOfWordsFound += expScrapeRes.NumOfWordsFound;
                scrapeResult.Sentences.AddRange(expScrapeRes.Sentences);
            }

            sw.Stop();

            if (argParser.Verbose)
            {
                Console.WriteLine("Elapsed {0} ms", sw.ElapsedMilliseconds);
            }
            Console.WriteLine();

            return scrapeResult;
        }