Example #1
0
        public ResumeData Parse()
        {
            try
            {
                // pre-process
                preProcess();
                // building education info
                extractEducationExperience();
                // building basic info
                extractBasicInfo();

                // add highest edu-info into basic info
                if (resumedata.LatestSchool == null || resumedata.LatestSchool.Trim() == "")
                {
                    if (resumedata.EducationExperience.Count > 0)
                    {
                        EducationExperienceData edu_item = EduAnalyzer
                                                           .getHighestEduExperience(resumedata.EducationExperience);
                        resumedata.LatestSchool = edu_item.School;
                        resumedata.LatestDegree = edu_item.Degree;
                        resumedata.LatestMajor  = edu_item.Major;
                    }
                }

                // build work info
                extractWorkExperience();

                WorkExperienceData workData = WorkAnalyzer.getearliestWorkExperience(resumedata.WorkExperience);
                if (workData != null && workData.StartTime != "")
                {
                    int workYears = 0;
                    if (workData.StartTime != "")
                    {
                        int workStartYear = Convert.ToInt32(workData.StartTime.Trim().Split(new char[] { '-' })[0]);

                        workYears            = DateTime.Now.Year - workStartYear;
                        resumedata.WorkYears = workYears;
                    }
                }

                // build except job info
                extractJobTarget();
                // build self-evaluation info
                extractSelfEvaluation();
                // building language-skill info
                extractLanguageSkill();
            }
            catch (Exception ex)
            {
                LoggerWrapper.Logger.Error("简历解析错误", ex);
            }
            return(resumedata);
        }
Example #2
0
 private bool containsWorkExperience(List <WorkExperienceData> workExperienceDataList,
                                     WorkExperienceData currentWorkExp)
 {
     foreach (WorkExperienceData workexp in workExperienceDataList)
     {
         if (workexp.StartTime == currentWorkExp.StartTime &&
             workexp.EndTime == currentWorkExp.EndTime)
         {
             return(true);
         }
     }
     return(false);
 }
Example #3
0
        public List <WorkExperienceData> searchWorkExperience(int start, int end)
        {
            List <WorkExperienceData> workExperienceDataList = new List <WorkExperienceData>();

            // 抽取公司名称
            for (int i = start; i < end; i++)
            {
                string line = resumeContentList[i];

                string companyName = extractCompany_exact(line);
                if (companyName != null && companyName != "")
                {
                    WorkExperienceData workExperienceData = new WorkExperienceData();
                    workExperienceData.CompanyName = companyName;
                    workExperienceDataList.Add(workExperienceData);
                    // 全局模糊搜索到一个公司即可
                    return(workExperienceDataList);
                }
            }

            return(workExperienceDataList);
        }
Example #4
0
        public List <WorkExperienceData> extractWorkExperience(int start, int end)
        {
            // isWorkSection 用于当前分析的段是否为工作经历段落
            List <WorkExperienceData> workExperienceDataList = new List <WorkExperienceData>();
            List <string>             workContentList        = new List <string>();

            // 按行匹配,通过工作时间部分统计工作经验个数
            int    workCount      = 0;
            string subWorkContent = "";

            for (int i = start; i < end; i++)
            {
                string line = resumeContentList[i];

                var pattern = new Regex(
                    "((((19[6789][0-9]|20[01][0-9])\\s*(年|/|[.]|-|—|–))(\\s*(1[02]|[0]?[123456789])\\s*(月|/|[.]|-|—|–)?)(\\s*(3[01]|[12][0-9]|[0]?[1-9])(\\s*日)?)?)|(19[6789][0-9]|20[01][0-9])|([0-9]{2}\\s*年)(\\s*(1[02]|[0]?[123456789])\\s*月)?)"
                    + "\\s*((至\\s*今|现\\s*在|\\s*今)|((\\s|-|—|~|–|~|至|到)+)\\s*"
                    + "((((19[6789][0-9]|20[01][0-9])\\s*(年|/|[.]|-|—|–))(\\s*(1[02]|[0]?[123456789])\\s*(月|/|[.]|-|—|–)?)(\\s*(3[01]|[12][0-9]|[0]?[1-9])(\\s*日)?)?)|(19[6789][0-9]|20[01][0-9])|([0-9]{2}\\s*年)(\\s*(1[02]|[0]?[123456789])\\s*月)?|至\\s*今|现\\s*在|\\s*今))");
                var matcher = pattern.Match(line);
                if (matcher.Success)
                {
                    // 抽取日期合法性判定
                    string[] items             = Regex.Split(matcher.Groups[0].Value.Trim(), "\\D");
                    bool     valid_date_format = true;
                    foreach (string item in items)
                    {
                        if (item.Trim().Length == 3 || item.Trim().Length > 4)
                        {
                            valid_date_format = false;
                            break;
                        }
                    }
                    if (!valid_date_format)
                    {
                        continue;
                    }

                    WorkExperienceData workExperienceData = new WorkExperienceData();
                    string             startTime          = DateTools.dateFormat(matcher.Groups[1].Value);
                    string             endTime            = DateTools.dateFormat(matcher.Groups[16].Value);

                    workExperienceData.StartTime = startTime;
                    workExperienceData.EndTime   = endTime;

                    // 判断是否已存在工作时间段子项,假设同一时间段只做一份工

                    /*
                     * JR126243590R90000000000.pdf 2011.04-至今 中安网脉(北京)技术股份有限公司 公司行业:
                     * 计算机硬件/网络设备 研发经理 2011.04-至今 所在地区: 北京 下属人数: 14 人 工作职责: 1. 开发
                     * PCI-E 卡算法加速模块并集成到基于 x86 工控机的 IPSec VPN 系统中,执行管理部 门的测试和检验流程。
                     */
                    if (containsWorkExperience(workExperienceDataList, workExperienceData))
                    {
                        subWorkContent = subWorkContent + "###" + line;
                        // if (i == end - 1) {
                        // workContentList.Add(subWorkContent);
                        // }

                        // subWorkContent="";
                        continue;
                    }

                    workExperienceDataList.Add(workExperienceData);
                    if (workCount > 0)
                    {
                        workContentList.Add(subWorkContent);
                        subWorkContent = line;
                    }
                    else
                    {
                        subWorkContent = line;
                    }

                    workCount++;
                    continue;
                }
                else
                {
                    subWorkContent = subWorkContent + "###" + line;
                }
            }

            // 添加最后一项工作子项的工作内容
            workContentList.Add(subWorkContent);

            // 包含工作经历时间项
            if (workCount > 0)
            {
                for (int j = 0; j < workExperienceDataList.Count; j++)
                {
                    string line = workContentList[j];

                    string positionTitle = extractPosition(line);
                    string companyName   = extractCompany_fuzzy(line);

                    workExperienceDataList[j].CompanyName   = companyName;
                    workExperienceDataList[j].PositionTitle = positionTitle;

                    string jobDesc = line;
                    workExperienceDataList[j].JobDesc = jobDesc;
                }

                return(workExperienceDataList);
            }

            // 未包含工作经历时间部分
            if (workCount == 0)
            {
                workExperienceDataList = searchWorkExperience(start, end);
            }

            return(workExperienceDataList);
        }