Esempio n. 1
0
        // action with url expanding
        private static ScrapeResult ActionUrlExpansion(object state)
        {
            Stopwatch sw = new Stopwatch();
            sw.Reset();
            sw.Start();

            var url = (string)state;

            var expandedUrls = HtmlToText.ExpandUrls(new string[] {url}, argParser.Words);

            List<ScrapeResult> expandedScrapeResults = new List<ScrapeResult>();

            foreach (var expandedUrl in expandedUrls)
            {
                ScrapeResult expandedScrapeResult = new ScrapeResult(expandedUrl);

                string text = HtmlToText.GetText(HtmlToText.GetHtml(new Uri(expandedUrl)));

                if (argParser.CharsNumberNeeded)
                {
                    expandedScrapeResult.NumOfChars = DataProcessingHelper.GetCharsCount(text);
                }
                if (argParser.WordsNumberNeeded)
                {
                    expandedScrapeResult.NumOfWordsFound =
                        DataProcessingHelper.GetWordsCount(text, argParser.Words);
                }
                if (argParser.SentencesNeeded)
                {
                    expandedScrapeResult.Sentences = new List<string>(
                        DataProcessingHelper.GetSentences(text, argParser.Words));
                }

                expandedScrapeResults.Add(expandedScrapeResult);

                Console.WriteLine(expandedScrapeResult.ToString());
            }

            ScrapeResult scrapeResult = new ScrapeResult(url);
            foreach (var expScrapeRes in expandedScrapeResults)
            {
                scrapeResult.NumOfChars += expScrapeRes.NumOfChars;
                scrapeResult.NumOfWordsFound += expScrapeRes.NumOfWordsFound;
                scrapeResult.Sentences.AddRange(expScrapeRes.Sentences);
            }

            sw.Stop();

            if (argParser.Verbose)
            {
                Console.WriteLine("Elapsed {0} ms", sw.ElapsedMilliseconds);
            }
            Console.WriteLine();

            return scrapeResult;
        }
Esempio n. 2
0
        // action without url expanding
        private static ScrapeResult Action(object state)
        {
            Stopwatch sw = new Stopwatch();
            sw.Reset();
            sw.Start();

            var url = (string)state;

            var scrapeResult = new ScrapeResult(url);

            string text = HtmlToText.GetText(HtmlToText.GetHtml(new Uri(url)));

            if (argParser.CharsNumberNeeded)
            {
                scrapeResult.NumOfChars = DataProcessingHelper.GetCharsCount(text);
            }
            if (argParser.WordsNumberNeeded)
            {
                scrapeResult.NumOfWordsFound =
                    DataProcessingHelper.GetWordsCount(text, argParser.Words);
            }
            if (argParser.SentencesNeeded)
            {
                scrapeResult.Sentences = new List<string>(
                    DataProcessingHelper.GetSentences(text, argParser.Words));
            }

            Console.WriteLine(scrapeResult.ToString());

            sw.Stop();

            if(argParser.Verbose)
            {
                Console.WriteLine("Elapsed {0} ms", sw.ElapsedMilliseconds);
            }
            Console.WriteLine();

            return scrapeResult;
        }
Esempio n. 3
0
        static void Main(string[] args)
        {
            Stopwatch sw = new Stopwatch();
            sw.Reset();
            sw.Start();

            AppDomain.CurrentDomain.UnhandledException += CurrentDomainOnUnhandledException;

            argParser = new ArgumentsParser();
            argParser.Parse(args);

            var tasks = new Task<ScrapeResult>[argParser.Urls.Length];

            for (int i = 0; i < argParser.Urls.Length; i++)
            {
                var url = argParser.Urls[i];

                tasks[i] = Task.Factory.StartNew((Func<object, ScrapeResult>)ActionUrlExpansion, url);
            }

            var errors = new StringBuilder();

            try
            {
                Task.WaitAll(tasks);
            }
            catch (AggregateException aggregateException)
            {
                foreach (Exception exception in aggregateException.InnerExceptions)
                {
                    errors.AppendFormat(CultureInfo.InvariantCulture,
                        "\n-------------------------------------------------\n{0}{1}",
                        exception, Environment.NewLine);
                }
            }

            var totals = new ScrapeResult("TOTALS\r\n======");
            foreach (var task in tasks)
            {
                totals.NumOfChars += task.Result.NumOfChars;
                totals.NumOfWordsFound += task.Result.NumOfWordsFound;
                totals.Sentences.AddRange(task.Result.Sentences);
            }
            Console.WriteLine();
            Console.WriteLine(totals);

            if(argParser.Verbose && errors.Length > 0)
            {
                Console.WriteLine("Errors occured during procession: ");
                Console.WriteLine(errors.ToString());
            }

            sw.Stop();
            if(argParser.Verbose)
            {
                Console.WriteLine("Total elapsed {0} ms", sw.ElapsedMilliseconds);
            }

            Console.WriteLine("Press any key for exit");
            Console.ReadKey(true);
        }