public void Validate(Candidate candidate, List <string> dataRows, IRegexCompiler regexCompiler) { var states = ResumeFilterHelper.GetStates(); var stateOfOriginRegex = regexCompiler.Compile(RegexOptionHelper.StatOfOriginRegex); var wordsToIgnore = ResumeFilterHelper.GetStopWords(); foreach (var dataRow in dataRows) { if (stateOfOriginRegex.IsMatch(dataRow)) { var stateRow = dataRow.Trim().ToLower(); var possibleStateTokens = Regex.Replace(stateRow, @"[^a-z-,: ]", string.Empty, RegexOptions.IgnoreCase) .Split(new[] { ' ', '-', ',' }, StringSplitOptions.RemoveEmptyEntries); if (possibleStateTokens.All(state => !wordsToIgnore.Contains(state, new StateComparer()))) { var foundState = possibleStateTokens.FirstOrDefault(state => states.Contains(state, new StateComparer())); if (!string.IsNullOrEmpty(foundState)) { candidate.StateOfOrigin = states.First(state => state.ToLower().Contains(foundState)); return; } } } var trimmedStateRow = dataRow.Trim().ToLower(); if (trimmedStateRow.Contains("origin") || trimmedStateRow.Contains("state") && trimmedStateRow.Contains("origin") || trimmedStateRow.Contains("nationality")) { var possibleStateTokens = Regex.Replace(trimmedStateRow, @"[^a-z-,: ]", string.Empty, RegexOptions.IgnoreCase) .Split(new[] { ' ', '-', ',' }, StringSplitOptions.RemoveEmptyEntries); var foundState = possibleStateTokens.FirstOrDefault(state => states.Contains(state, new StateComparer())); if (!string.IsNullOrEmpty(foundState)) { candidate.StateOfOrigin = states.First(state => state.ToLower().Contains(foundState)); return; } } } HashSet <string> wordsInRow = new HashSet <string>(dataRows.SelectMany(row => row.ToLower().Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries))); var candidateState = states.FirstOrDefault(state => wordsInRow.Contains(state.ToLower())); if (candidateState != null) { candidate.StateOfOrigin = candidateState; } }
public void Execute() { int batchCount = Convert.ToInt32(ConfigurationManager.AppSettings["UnprocessedEmailBatchCount"]); var emails = _dataRepositoryFactory.Create <IInboundEmailRepository>() .GetUnprocessedInboundEmails(batchCount); foreach (var email in emails) { foreach (var attachment in email.InboundAttachments.TryGetValidCvsOrDefault(email.SenderName)) { var documentExtractor = _documentExtractorFactory.GetExtractor(attachment.FileType); var parsedRows = documentExtractor.GetRows(attachment.FilePath, ResumeFilterHelper.GetStopWords(), ResumeFilterHelper.GetSkipWords()); if (parsedRows.Count <= 0) { continue; } var candidate = _candidateBuilder.BuildFrom(parsedRows); if (candidate.IsValidCandidate()) { ValidateSenderDetails(candidate, parsedRows, email.SenderName, email.Sender); candidate.InboundEmailId = email.InboundEmailId; candidate.InboundAttachmentId = attachment.InboundAttachmentId; _dataRepositoryFactory.Create <ICandidateRepository>().Save(candidate); var incompleteCandidateDetails = candidate.GetIncompleteCandidateDetails(); if (incompleteCandidateDetails.Count > 0) { SendIncompleteDetailsNotification(candidate, incompleteCandidateDetails); } } } email.Processed = 1; _dataRepositoryFactory.Create <IInboundEmailRepository>() .DetachAndUpdate(email.InboundEmailId, email); } }