Пример #1
0
        public ResumeData Parse()
        {
            try
            {
                // pre-process
                preProcess();
                // building education info
                extractEducationExperience();
                // building basic info
                extractBasicInfo();

                // add highest edu-info into basic info
                if (resumedata.LatestSchool == null || resumedata.LatestSchool.Trim() == "")
                {
                    if (resumedata.EducationExperience.Count > 0)
                    {
                        EducationExperienceData edu_item = EduAnalyzer
                                                           .getHighestEduExperience(resumedata.EducationExperience);
                        resumedata.LatestSchool = edu_item.School;
                        resumedata.LatestDegree = edu_item.Degree;
                        resumedata.LatestMajor  = edu_item.Major;
                    }
                }

                // build work info
                extractWorkExperience();

                WorkExperienceData workData = WorkAnalyzer.getearliestWorkExperience(resumedata.WorkExperience);
                if (workData != null && workData.StartTime != "")
                {
                    int workYears = 0;
                    if (workData.StartTime != "")
                    {
                        int workStartYear = Convert.ToInt32(workData.StartTime.Trim().Split(new char[] { '-' })[0]);

                        workYears            = DateTime.Now.Year - workStartYear;
                        resumedata.WorkYears = workYears;
                    }
                }

                // build except job info
                extractJobTarget();
                // build self-evaluation info
                extractSelfEvaluation();
                // building language-skill info
                extractLanguageSkill();
            }
            catch (Exception ex)
            {
                LoggerWrapper.Logger.Error("简历解析错误", ex);
            }
            return(resumedata);
        }
Пример #2
0
        // 学习经历重复时间检测
        private bool duplicationCheck(EducationExperienceData eduItem, List <EducationExperienceData> eduList)
        {
            // 较晚教育经历子项的开始时间应》=之前一项教育经历子项的结束时间年份

            int currentStartData = Convert.ToInt32(
                (eduItem.StartTime.Trim() == "" ? "0" : Regex.Split(eduItem.StartTime, "年|/|[.]|-|—")[0].Trim()));

            int currentEndData = currentStartData;

            if (eduItem.EndTime != null && eduItem.EndTime.Trim() == "至今")
            {
                currentEndData = DateTime.Now.Year;
            }
            try
            {
                currentEndData = Convert.ToInt32(
                    (eduItem.EndTime.Trim() == "" ? "0" : Regex.Split(eduItem.EndTime, "年|/|[.]|-|—")[0].Trim()));
            }
            catch (Exception ex)
            {
            }

            EducationExperienceData maxEduData = eduList[eduList.Count - 1];

            int MaxStartData = Convert.ToInt32(
                (maxEduData.StartTime.Trim() == "" ? "0" : Regex.Split(maxEduData.StartTime, "年|/|[.]|-|—")[0].Trim()));

            int MaxEndData = MaxStartData;

            if (eduItem.EndTime != null && maxEduData.EndTime.Trim() == "至今")
            {
                MaxEndData = DateTime.Now.Year;
            }
            try
            {
                MaxEndData = Convert.ToInt32(
                    (maxEduData.EndTime.Trim() == "" ? "0" : Regex.Split(maxEduData.EndTime, "年|/|[.]|-|—")[0].Trim()));
            }
            catch (Exception ex)
            {
            }

            // 较晚教育经历子项的开始时间应》=之前一项教育经历子项的结束时间
            if (currentStartData >= MaxEndData && currentEndData >= MaxEndData)
            {
                return(true);
            }
            else
            {
                return(false);
            }
        }
Пример #3
0
        public List <EducationExperienceData> extractEducationExperience(int start, int end)
        {
            // 先遍历整个教育经历,统计出教育经历数量并获得每段教育经历的字符串,后针对每段教育经历进行分析,
            // 获取到教育经历中的开始结束时间,学校,专业和学历
            List <EducationExperienceData> eduExperienceDataList = new List <EducationExperienceData>();
            // string eduContent = "";

            List <string> schoolLineList = new List <string>(); // 用户保存每份教育经历的学校所在行,用于提取专业
            List <string> eduContentList = new List <string>();

            // 统计个数
            int    count   = 0;
            string content = "";

            for (int i = start; i < end; i++)
            {
                string line = resumeContentList[i];
                schoolLineList.Add(line);

                // 非特定词组开始的学校
                string pattern_school = "(?!于|在|.*在学校|.*是学校|.*的学校|.*毕业学校|.*我学校|.*全国中学|.*所在大学|.*所在高中|.*所在中学|.*所在初中|.*所在学校|.*学历学校|.*就读学校|.*就读大学|.*就读初中|.*就读高中|.*就读中学)([\u4e00-\u9fa5]{2,18}?)(学院|大学|学校|研究生院|中学)\\s*";
                // string pattern_school =
                // "(?!于|在|.*在学校|.*是学校|.*的学校|.*毕业学校|.*我学校|.*全国中学|.*所在大学|.*所在高中|.*所在中学|.*所在初中|.*所在学校|.*学历学校|.*就读学校|.*就读大学|.*就读初中|.*就读高中|.*就读中学)([\u4e00-\u9fa5])(大学|学院|学校|研究生院|中学)\\s*";

                var pattern = new Regex(pattern_school);
                var matcher = pattern.Match(line);
                // 匹配学校名&验证学校名合法性,string school = matcher.Groups[1].Value +
                // matcher.Groups[2].Value;
                if (matcher.Success && verifySchoolName(matcher.Groups[1].Value + matcher.Groups[2].Value))
                {
                    if (count > 0)
                    {
                        eduContentList.Add(content);
                        content = line;
                    }
                    else
                    {
                        content = content + " ### " + line;
                    }
                    count++;
                    EducationExperienceData eduExperienceData = new EducationExperienceData();
                    string school = matcher.Groups[1].Value + matcher.Groups[2].Value;
                    eduExperienceData.School = school;
                    eduExperienceDataList.Add(eduExperienceData);

                    if (i == end - 1)
                    {
                        eduContentList.Add(content);
                    }
                    continue;
                }
                else
                {
                    content = content + " ### " + line;
                    if (i == end - 1)
                    {
                        eduContentList.Add(content);
                    }
                }
            }

            if (count > 0)
            {
                for (int j = 0; j < eduContentList.Count; j++)
                {
                    string eduContent = eduContentList[j];
                    string degree     = extractDegree(eduContent);
                    string major      = extractMajor(eduContent);
                    string startTime  = "";
                    string endTime    = "";
                    string school     = eduExperienceDataList[j].School;

                    var pattern_time = new Regex("((((19[6789][0-9]|20[01][0-9])\\s*(年|/|[.]|-|—|–))(\\s*(1[02]|[0]?[123456789])\\s*(月|/|[.]|-|—|–)?)(\\s*(3[01]|[12][0-9]|[0]?[1-9])(\\s*日)?)?)|(19[6789][0-9]|20[01][0-9])|([0-9]{2}\\s*年)(\\s*(1[02]|[0]?[123456789])\\s*月)?)"
                                                 + "\\s*((至\\s*今|现\\s*在|\\s*今)|((\\s|-|—|~|–|~|至|到)+)\\s*"
                                                 + "((((19[6789][0-9]|20[01][0-9])\\s*(年|/|[.]|-|—|–))(\\s*(1[02]|[0]?[123456789])\\s*(月|/|[.]|-|—|–)?)(\\s*(3[01]|[12][0-9]|[0]?[1-9])(\\s*日)?)?)|(19[6789][0-9]|20[01][0-9])|([0-9]{2}\\s*年)(\\s*(1[02]|[0]?[123456789])\\s*月)?|至\\s*今|现\\s*在|\\s*今))");
                    var matcher = pattern_time.Match(eduContent);
                    if (matcher.Success)
                    {
                        // 抽取日期合法性判定
                        string[] items             = Regex.Split(matcher.Groups[0].Value.Trim(), "\\D");
                        bool     valid_date_format = true;
                        foreach (string item in items)
                        {
                            // 判断日期子项长度
                            if (item.Trim().Length == 3 || item.Trim().Length > 4)
                            {
                                valid_date_format = false;
                                break;
                            }
                        }

                        if (!valid_date_format)
                        {
                            continue;
                        }

                        // date format normalization
                        // startTime = matcher.Groups[1].Value;
                        string[] date_start_items = Regex.Split(matcher.Groups[1].Value.Trim(), "\\D");
                        foreach (string item in date_start_items)
                        {
                            if (item.Trim() != "")
                            {
                                if (startTime == "")
                                {
                                    startTime = item;
                                }
                                else
                                {
                                    startTime += ("-" + item);
                                }
                            }
                        }

                        // date format normalization
                        // endTime = matcher.group(11).replace("至", "");
                        string[] date_end_items = Regex.Split(matcher.Groups[16].Value.Trim(), "\\D");
                        foreach (string item in date_end_items)
                        {
                            if (item.Trim() != "")
                            {
                                if (endTime == "")
                                {
                                    endTime = item;
                                }
                                else
                                {
                                    endTime += ("-" + item);
                                }
                            }
                        }

                        eduExperienceDataList[j].StartTime   = startTime;
                        eduExperienceDataList[j].EndTime     = endTime;
                        eduExperienceDataList[j].Completable = true;
                    }
                    else
                    {
                        // 召回时间,应对时间位于学校上一行情况

                        /*
                         * 学习经历 : 1995.09 - 1997.03 \n### 泰山科技学院 \n ###1998.09 -
                         * 2001.07\n###河南医科大学
                         */
                        if (j > 0)
                        {
                            string[] front_edu_content_array = eduContentList[j - 1].Split(new string[] { "###" }, StringSplitOptions.None);

                            string recall_edu_timeStr = front_edu_content_array.Length > 1
                                    ? front_edu_content_array[front_edu_content_array.Length - 1] : eduContentList[j];

                            string pattern_school_str = "(?!于|在|.*在学校|.*是学校|.*的学校|.*我学校|全国中学|所在大学|所在高中|所在中学|所在初中|所在学校|.*学历学校|就读学校|就读大学|就读初中|就读高中|就读中学)([\u4e00-\u9fa5]{2,18}?)(学院|大学|学校|研究生院|中学)\\s*";
                            var    pattern_school     = new Regex(pattern_school_str);

                            matcher = pattern_school.Match(recall_edu_timeStr);
                            //
                            if (!matcher.Success)
                            {
                                matcher = pattern_time.Match(recall_edu_timeStr);
                                if (matcher.Success)
                                {
                                    // date format normalization
                                    // startTime = matcher.Groups[1].Value;
                                    startTime = DateTools.dateFormat(matcher.Groups[1].Value.Trim());
                                    // endTime = matcher.group(11).replace("至", "");
                                    endTime = DateTools.dateFormat(matcher.Groups[16].Value.Trim());

                                    eduExperienceDataList[j].StartTime   = startTime;
                                    eduExperienceDataList[j].EndTime     = endTime;
                                    eduExperienceDataList[j].Completable = true;
                                }
                            }
                        }
                    }

                    eduExperienceDataList[j].Degree = degree;
                    eduExperienceDataList[j].Major  = major;
                }
            }

            List <EducationExperienceData> eduExperienceDataList_final = new List <EducationExperienceData>();

            // 按开始时间升序重排序
            eduExperienceDataList.Sort();
            // 有效性检测
            foreach (EducationExperienceData item in eduExperienceDataList)
            {
                // step1:教育经历项>1时,过滤掉没有时间属性的教育经历子项
                if (eduExperienceDataList.Count > 1 && (!item.Completable))
                {
                    continue;
                }

                if (eduExperienceDataList_final.Count == 0)
                {
                    eduExperienceDataList_final.Add(item);
                    continue;
                }

                // step2:
                if (duplicationCheck(item, eduExperienceDataList_final))
                {
                    eduExperienceDataList_final.Add(item);
                }
            }

            // 有效性检测召回,例如教育子项都没有时间属性
            if (eduExperienceDataList_final.Count < 1 && eduExperienceDataList.Count > 0)
            {
                eduExperienceDataList_final.Add(eduExperienceDataList[0]);
            }

            return(eduExperienceDataList_final);
        }