private static void Main() { // Copy the website link and paste it in between quotes! // If the program crashes, just hold Ctrl + z and the code will go back to a working state. const string url = "http://lgsdlgapk6.ss7.sharpschool.com/our_school/staff_directory"; EmailExtractor.ExtractEmails(url); }
public ExtractionResponse RunExtraction() { List <InputFileItem> inputItemCollection; var fileReader = new InputFileReader(_csvDelimiter); try { inputItemCollection = fileReader.GetInputItemCollection(Request.InputFilePath); } catch (Exception ex) { Response.GeneralException = ex.GetInnerMostException(); return(Response); } var successCnt = 0; var markupAggregator = new MarkupAggregator( HttpAgent, VerboseLogger, _hrefRegex, _invalidSiteLinkPatterns); var emailExtractor = new EmailExtractor(_emailRegex); var fileWriter = new OutputFileWriter(_csvDelimiter, Request.OutputDirectory); foreach (var inputItem in inputItemCollection) { VerboseLogger.Log($"Extracting from {inputItem.SiteUrl} url..."); try { ProcessInputFileItem(inputItem, markupAggregator, emailExtractor, fileWriter); VerboseLogger.Log($"Extraction completed for {inputItem.SiteUrl} url..."); successCnt++; } catch (Exception ex) { var eEx = new ExtractionException("", ex) { InputUrl = inputItem.SiteUrl }; VerboseLogger.Log("Extraction failed. Moving to next url..."); Response.ExtractionExceptions = Response.ExtractionExceptions ?? new List <ExtractionException>(); Response.ExtractionExceptions.Add(eEx); } } Response.SuccessfulExtractions = successCnt; return(Response); }
private void ProcessInputFileItem(InputFileItem inputItem, MarkupAggregator markupAggregator, EmailExtractor emailExtractor, OutputFileWriter fileWriter) { var aggregatedSiteMarkup = markupAggregator.AggregateMarkupFromRootUrl(inputItem.SiteUrl); VerboseLogger.Log($"...collected #{aggregatedSiteMarkup.Count} unique pages"); var extractResult = emailExtractor.ExtractEmailsFromMarkup(inputItem.SiteUrl, aggregatedSiteMarkup); VerboseLogger.Log($"...identified #{extractResult.Emails.Count} unique emails " + $"and has mailto tag = {extractResult.HasMailto}"); fileWriter.WriteExtractResult(extractResult); }
static void Main(string[] args) { AppDomain domain = AppDomain.CurrentDomain; // Set a timeout interval of 10 seconds for infinite web response times domain.SetData("REGEX_DEFAULT_MATCH_TIMEOUT", TimeSpan.FromSeconds(10)); // Sample useage only var extractor = new EmailExtractor(); while (true) { extractor.LoopOverResults(); Trace.TraceInformation("100 entries checked checkpoint"); } }