public ResumeData Parse() { try { // pre-process preProcess(); // building education info extractEducationExperience(); // building basic info extractBasicInfo(); // add highest edu-info into basic info if (resumedata.LatestSchool == null || resumedata.LatestSchool.Trim() == "") { if (resumedata.EducationExperience.Count > 0) { EducationExperienceData edu_item = EduAnalyzer .getHighestEduExperience(resumedata.EducationExperience); resumedata.LatestSchool = edu_item.School; resumedata.LatestDegree = edu_item.Degree; resumedata.LatestMajor = edu_item.Major; } } // build work info extractWorkExperience(); WorkExperienceData workData = WorkAnalyzer.getearliestWorkExperience(resumedata.WorkExperience); if (workData != null && workData.StartTime != "") { int workYears = 0; if (workData.StartTime != "") { int workStartYear = Convert.ToInt32(workData.StartTime.Trim().Split(new char[] { '-' })[0]); workYears = DateTime.Now.Year - workStartYear; resumedata.WorkYears = workYears; } } // build except job info extractJobTarget(); // build self-evaluation info extractSelfEvaluation(); // building language-skill info extractLanguageSkill(); } catch (Exception ex) { LoggerWrapper.Logger.Error("简历解析错误", ex); } return(resumedata); }
private bool containsWorkExperience(List <WorkExperienceData> workExperienceDataList, WorkExperienceData currentWorkExp) { foreach (WorkExperienceData workexp in workExperienceDataList) { if (workexp.StartTime == currentWorkExp.StartTime && workexp.EndTime == currentWorkExp.EndTime) { return(true); } } return(false); }
public List <WorkExperienceData> searchWorkExperience(int start, int end) { List <WorkExperienceData> workExperienceDataList = new List <WorkExperienceData>(); // 抽取公司名称 for (int i = start; i < end; i++) { string line = resumeContentList[i]; string companyName = extractCompany_exact(line); if (companyName != null && companyName != "") { WorkExperienceData workExperienceData = new WorkExperienceData(); workExperienceData.CompanyName = companyName; workExperienceDataList.Add(workExperienceData); // 全局模糊搜索到一个公司即可 return(workExperienceDataList); } } return(workExperienceDataList); }
public List <WorkExperienceData> extractWorkExperience(int start, int end) { // isWorkSection 用于当前分析的段是否为工作经历段落 List <WorkExperienceData> workExperienceDataList = new List <WorkExperienceData>(); List <string> workContentList = new List <string>(); // 按行匹配,通过工作时间部分统计工作经验个数 int workCount = 0; string subWorkContent = ""; for (int i = start; i < end; i++) { string line = resumeContentList[i]; var pattern = new Regex( "((((19[6789][0-9]|20[01][0-9])\\s*(年|/|[.]|-|—|–))(\\s*(1[02]|[0]?[123456789])\\s*(月|/|[.]|-|—|–)?)(\\s*(3[01]|[12][0-9]|[0]?[1-9])(\\s*日)?)?)|(19[6789][0-9]|20[01][0-9])|([0-9]{2}\\s*年)(\\s*(1[02]|[0]?[123456789])\\s*月)?)" + "\\s*((至\\s*今|现\\s*在|\\s*今)|((\\s|-|—|~|–|~|至|到)+)\\s*" + "((((19[6789][0-9]|20[01][0-9])\\s*(年|/|[.]|-|—|–))(\\s*(1[02]|[0]?[123456789])\\s*(月|/|[.]|-|—|–)?)(\\s*(3[01]|[12][0-9]|[0]?[1-9])(\\s*日)?)?)|(19[6789][0-9]|20[01][0-9])|([0-9]{2}\\s*年)(\\s*(1[02]|[0]?[123456789])\\s*月)?|至\\s*今|现\\s*在|\\s*今))"); var matcher = pattern.Match(line); if (matcher.Success) { // 抽取日期合法性判定 string[] items = Regex.Split(matcher.Groups[0].Value.Trim(), "\\D"); bool valid_date_format = true; foreach (string item in items) { if (item.Trim().Length == 3 || item.Trim().Length > 4) { valid_date_format = false; break; } } if (!valid_date_format) { continue; } WorkExperienceData workExperienceData = new WorkExperienceData(); string startTime = DateTools.dateFormat(matcher.Groups[1].Value); string endTime = DateTools.dateFormat(matcher.Groups[16].Value); workExperienceData.StartTime = startTime; workExperienceData.EndTime = endTime; // 判断是否已存在工作时间段子项,假设同一时间段只做一份工 /* * JR126243590R90000000000.pdf 2011.04-至今 中安网脉(北京)技术股份有限公司 公司行业: * 计算机硬件/网络设备 研发经理 2011.04-至今 所在地区: 北京 下属人数: 14 人 工作职责: 1. 开发 * PCI-E 卡算法加速模块并集成到基于 x86 工控机的 IPSec VPN 系统中,执行管理部 门的测试和检验流程。 */ if (containsWorkExperience(workExperienceDataList, workExperienceData)) { subWorkContent = subWorkContent + "###" + line; // if (i == end - 1) { // workContentList.Add(subWorkContent); // } // subWorkContent=""; continue; } workExperienceDataList.Add(workExperienceData); if (workCount > 0) { workContentList.Add(subWorkContent); subWorkContent = line; } else { subWorkContent = line; } workCount++; continue; } else { subWorkContent = subWorkContent + "###" + line; } } // 添加最后一项工作子项的工作内容 workContentList.Add(subWorkContent); // 包含工作经历时间项 if (workCount > 0) { for (int j = 0; j < workExperienceDataList.Count; j++) { string line = workContentList[j]; string positionTitle = extractPosition(line); string companyName = extractCompany_fuzzy(line); workExperienceDataList[j].CompanyName = companyName; workExperienceDataList[j].PositionTitle = positionTitle; string jobDesc = line; workExperienceDataList[j].JobDesc = jobDesc; } return(workExperienceDataList); } // 未包含工作经历时间部分 if (workCount == 0) { workExperienceDataList = searchWorkExperience(start, end); } return(workExperienceDataList); }