示例#1
0
        private static void Main()
        {
            // Copy the website link and paste it in between quotes!
            // If the program crashes, just hold Ctrl + z and the code will go back to a working state.
            const string url = "http://lgsdlgapk6.ss7.sharpschool.com/our_school/staff_directory";

            EmailExtractor.ExtractEmails(url);
        }
示例#2
0
        public ExtractionResponse RunExtraction()
        {
            List <InputFileItem> inputItemCollection;
            var fileReader = new InputFileReader(_csvDelimiter);

            try
            {
                inputItemCollection = fileReader.GetInputItemCollection(Request.InputFilePath);
            }
            catch (Exception ex)
            {
                Response.GeneralException = ex.GetInnerMostException();
                return(Response);
            }

            var successCnt       = 0;
            var markupAggregator = new MarkupAggregator(
                HttpAgent, VerboseLogger, _hrefRegex, _invalidSiteLinkPatterns);
            var emailExtractor = new EmailExtractor(_emailRegex);
            var fileWriter     = new OutputFileWriter(_csvDelimiter, Request.OutputDirectory);

            foreach (var inputItem in inputItemCollection)
            {
                VerboseLogger.Log($"Extracting from {inputItem.SiteUrl} url...");

                try
                {
                    ProcessInputFileItem(inputItem, markupAggregator, emailExtractor, fileWriter);

                    VerboseLogger.Log($"Extraction completed for {inputItem.SiteUrl} url...");

                    successCnt++;
                }
                catch (Exception ex)
                {
                    var eEx = new ExtractionException("", ex)
                    {
                        InputUrl = inputItem.SiteUrl
                    };

                    VerboseLogger.Log("Extraction failed. Moving to next url...");

                    Response.ExtractionExceptions = Response.ExtractionExceptions ?? new List <ExtractionException>();
                    Response.ExtractionExceptions.Add(eEx);
                }
            }

            Response.SuccessfulExtractions = successCnt;

            return(Response);
        }
示例#3
0
        private void ProcessInputFileItem(InputFileItem inputItem, MarkupAggregator markupAggregator,
                                          EmailExtractor emailExtractor, OutputFileWriter fileWriter)
        {
            var aggregatedSiteMarkup = markupAggregator.AggregateMarkupFromRootUrl(inputItem.SiteUrl);

            VerboseLogger.Log($"...collected #{aggregatedSiteMarkup.Count} unique pages");

            var extractResult = emailExtractor.ExtractEmailsFromMarkup(inputItem.SiteUrl, aggregatedSiteMarkup);

            VerboseLogger.Log($"...identified #{extractResult.Emails.Count} unique emails " +
                              $"and has mailto tag = {extractResult.HasMailto}");

            fileWriter.WriteExtractResult(extractResult);
        }
示例#4
0
        static void Main(string[] args)
        {
            AppDomain domain = AppDomain.CurrentDomain;

            // Set a timeout interval of 10 seconds for infinite web response times
            domain.SetData("REGEX_DEFAULT_MATCH_TIMEOUT", TimeSpan.FromSeconds(10));

            // Sample useage only
            var extractor = new EmailExtractor();

            while (true)
            {
                extractor.LoopOverResults();
                Trace.TraceInformation("100 entries checked checkpoint");
            }
        }