public void Validate(Candidate candidate, List <string> dataRows, IRegexCompiler regexCompiler) { var fileLineNameRegex = regexCompiler.Compile(RegexOptionHelper.FirstLineContainingNameRegex); var candidateNameRegex = regexCompiler.Compile(RegexOptionHelper.CandidateNameRegex); var phrasesToIgnore = ResumeFilterHelper.GetPhrasesNotNames(); foreach (var dataRow in dataRows) { if (fileLineNameRegex.IsMatch(dataRow)) { var firstValidText = fileLineNameRegex.Match(dataRow).Value; var possibleNameTokens = Regex.Replace(firstValidText, @"[^a-z- ]", string.Empty, RegexOptions.IgnoreCase) .Split(new [] { ' ', '\'', '-' }, StringSplitOptions.RemoveEmptyEntries); if (possibleNameTokens.Length > 0 && !possibleNameTokens.Any(name => phrasesToIgnore.Contains(name, new CaseInsensitiveStringComparer()))) { MakeCandidateName(candidate, firstValidText.Trim(), true); break; } continue; } if (candidateNameRegex.IsMatch(dataRow)) { var matchedName = candidateNameRegex.Match(dataRow).Groups[1].Value; MakeCandidateName(candidate, matchedName.Trim()); break; } } }
public void Validate(Candidate candidate, List <string> dataRows, IRegexCompiler regexCompiler) { //var pattern = @"\+?234[ ]*[7-9]\d{9}|0[7-9]\d{9}"; var phoneNumberRegex = regexCompiler.Compile(RegexOptionHelper.PhoneNumberRegex); foreach (var dataRow in dataRows) { if (phoneNumberRegex.IsMatch(dataRow)) { var phoneNumber = Regex.Replace(dataRow, @"[\S ]*(\d{10})", "0$1"); if (!string.IsNullOrEmpty(phoneNumber) && phoneNumber.Length == 11) { candidate.PhoneNumber = phoneNumber; break; } var match = phoneNumberRegex.Match(dataRow); var strippedPhoneNumber = Regex.Replace(match.Value, @"[^0-9]", string.Empty); if (!string.IsNullOrEmpty(strippedPhoneNumber)) { strippedPhoneNumber = Regex.Replace(strippedPhoneNumber, @"^(2340)", "0"); strippedPhoneNumber = Regex.Replace(strippedPhoneNumber, @"^(234)", string.Empty); if (strippedPhoneNumber.Length == 10) { strippedPhoneNumber = "0" + strippedPhoneNumber; } candidate.PhoneNumber = strippedPhoneNumber; break; } } } }
public void Validate(Candidate candidate, List <string> dataRows, IRegexCompiler regexCompiler) { var states = ResumeFilterHelper.GetStates(); var stateOfOriginRegex = regexCompiler.Compile(RegexOptionHelper.StatOfOriginRegex); var wordsToIgnore = ResumeFilterHelper.GetStopWords(); foreach (var dataRow in dataRows) { if (stateOfOriginRegex.IsMatch(dataRow)) { var stateRow = dataRow.Trim().ToLower(); var possibleStateTokens = Regex.Replace(stateRow, @"[^a-z-,: ]", string.Empty, RegexOptions.IgnoreCase) .Split(new[] { ' ', '-', ',' }, StringSplitOptions.RemoveEmptyEntries); if (possibleStateTokens.All(state => !wordsToIgnore.Contains(state, new StateComparer()))) { var foundState = possibleStateTokens.FirstOrDefault(state => states.Contains(state, new StateComparer())); if (!string.IsNullOrEmpty(foundState)) { candidate.StateOfOrigin = states.First(state => state.ToLower().Contains(foundState)); return; } } } var trimmedStateRow = dataRow.Trim().ToLower(); if (trimmedStateRow.Contains("origin") || trimmedStateRow.Contains("state") && trimmedStateRow.Contains("origin") || trimmedStateRow.Contains("nationality")) { var possibleStateTokens = Regex.Replace(trimmedStateRow, @"[^a-z-,: ]", string.Empty, RegexOptions.IgnoreCase) .Split(new[] { ' ', '-', ',' }, StringSplitOptions.RemoveEmptyEntries); var foundState = possibleStateTokens.FirstOrDefault(state => states.Contains(state, new StateComparer())); if (!string.IsNullOrEmpty(foundState)) { candidate.StateOfOrigin = states.First(state => state.ToLower().Contains(foundState)); return; } } } HashSet <string> wordsInRow = new HashSet <string>(dataRows.SelectMany(row => row.ToLower().Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries))); var candidateState = states.FirstOrDefault(state => wordsInRow.Contains(state.ToLower())); if (candidateState != null) { candidate.StateOfOrigin = candidateState; } }
public void Validate(Candidate candidate, List <string> dataRows, IRegexCompiler regexCompiler) { var emailRegex = regexCompiler.Compile(RegexOptionHelper.EmailRegex); foreach (var dataRow in dataRows) { if (emailRegex.IsMatch(dataRow)) { var matchedGroup = emailRegex.Match(dataRow); candidate.Email = matchedGroup.Value.Trim(); break; } } }
public void Validate(Candidate candidate, List <string> dataRows, IRegexCompiler regexCompiler) { var dobRegex = regexCompiler.Compile(RegexOptionHelper.DobRegex); var ageRegex = regexCompiler.Compile(RegexOptionHelper.AgeRegex); var lastResortDobRegex = regexCompiler.Compile(RegexOptionHelper.LastResortDobRegex); foreach (var dataRow in dataRows) { // DOB REGEX if (dobRegex.IsMatch(dataRow.Trim())) { var dobMatches = dobRegex.Match(dataRow.Trim()).Groups; var year = !string.IsNullOrEmpty(dobMatches[4].Value) ? dobMatches[4].Value : dobMatches[3].Value; candidate.Age = CalculateCandidateAge(Regex.Replace(year, @"[^0-9]", string.Empty)); break; } // AGE REGEX if (ageRegex.IsMatch(dataRow.Trim())) { var ageMatches = ageRegex.Match(dataRow).Groups; var possibleAge = Regex.Replace(ageMatches[1].Value, @"[^0-9]", string.Empty); if (!string.IsNullOrEmpty(possibleAge) && possibleAge.Length == 2) { candidate.Age = Convert.ToInt32(possibleAge); return; } var yearInTwoDigits = DateTime.Now.Year % 100; candidate.Age = Convert.ToInt32(Convert.ToInt32(possibleAge) > yearInTwoDigits ? possibleAge : "20" + possibleAge); break; } // LAST RESORT DOB REGEX if (lastResortDobRegex.IsMatch(dataRow.Trim())) { var ageMatch = lastResortDobRegex.Match(dataRow); candidate.Age = CalculateCandidateAge(Regex.Replace(ageMatch.Value, @"[^0-9]", string.Empty)); break; } } }