public bool QueryResumeIsExists(string resumeNumber, DateTime?refreshTime = null) { using (var db = new MangningXssDBEntities()) { var exists = db.ZhaopinWatchedResume.AsNoTracking().Any(f => f.ResumeNumber == resumeNumber); if (exists) { return(true); } var resume = db.ZhaopinResume.FirstOrDefault(f => f.RandomNumber == resumeNumber); if (resume != null) { resume.RefreshTime = refreshTime; db.SaveChanges(); return(true); } } using (var db = new BadoucaiAliyunDBEntities()) { return(db.CoreResumeReferenceMapping.AsNoTracking().Any(a => a.Key == "ResumeNumber" && a.Source == "ZHAOPIN" && a.Value.StartsWith(resumeNumber))); } }
public DataResult <dynamic> TryGetContactInfo(dynamic model) { var data = new List <CoreResumeSummary>(); using (var db = new BadoucaiAliyunDBEntities()) { var sb = new StringBuilder(); sb.Append($"SELECT * FROM \"Core_Resume_Summary\" WHERE substring(\"Cellphone\"::VARCHAR(32),8,4) = '{model.CellphonePart}' AND \"UpdateTime\" > '1970-01-01 08:00:00' "); if (!string.IsNullOrEmpty((string)model.Birthday)) { sb.Append($"AND \"Birthday\" = '{((string)model.Birthday).Remove(((string)model.Birthday).LastIndexOf("-", StringComparison.Ordinal))}-01' "); } if (!string.IsNullOrEmpty((string)model.Degree)) { sb.Append($"AND \"Degree\" = '{degreeDic[(string)model.Degree]}' "); } if (!string.IsNullOrEmpty((string)model.Gender)) { var gender = model.Gender == "男" ? "M" : "F"; sb.Append($"AND \"Gender\" = '{gender}' "); } sb.Append(";"); var list = db.Database.SqlQuery <CoreResumeSummary>(sb.ToString()).ToList(); using (var aif = new AIFDBEntities()) { foreach (var item in list) { var address = aif.BaseAreaBDC.AsNoTracking().FirstOrDefault(f => f.Id == item.CurrentResidence); if (address == null) { continue; } if (string.IsNullOrEmpty((string)model.CurrentResidence) || ((string)model.CurrentResidence).Contains(address.Name)) { data.Add(item); } } } } return(new DataResult <dynamic>(data.Take(5))); }
private static void GetOldResumes() { using (var db = new BadoucaiAliyunDBEntities()) { var pageIndex = 0; const int pageSize = 1000; while (true) { if (resumeQueue.Count > 8) { Thread.Sleep(1000); continue; } try { var resumeList = db.CoreResumeSummary .AsNoTracking() .OrderBy(o => o.Id) .Skip(pageIndex * pageSize) .Take(pageSize) .ToList(); if (!resumeList.Any()) { break; } resumeQueue.Enqueue(resumeList); } catch (Exception) { continue; } pageIndex++; } } }
public void Improt() { Task.Run(() => GetOldResumes()); var sb = new StringBuilder(); var tasks = new List <Task>(); for (var i = 0; i < 8; i++) { tasks.Add(Task.Run(() => { while (true) { List <CoreResumeSummary> resumeList; if (!resumeQueue.TryDequeue(out resumeList)) { Thread.Sleep(100); continue; } try { using (var db = new MangningXssDBEntities()) { foreach (var resume in resumeList) { var cellphone = resume.Cellphone.ToString(); var user = db.ZhaopinUser.AsNoTracking().FirstOrDefault(f => f.Cellphone == cellphone); if (user == null) { using (var bdb = new BadoucaiAliyunDBEntities()) { var reference = bdb.CoreResumeReference.FirstOrDefault(f => f.ResumeId == resume.Id && f.Source == "ZHAOPIN"); if (reference != null) { sb.AppendLine(resume.Id); } } } } } } catch (Exception) { continue; } Interlocked.Add(ref count, resumeList.Count); } })); } Task.WaitAll(tasks.ToArray()); File.WriteAllText(@"F:\ResumeIdList.txt", sb.ToString()); }
public DataResult <string> MatchByResumeId(string cookie, string resumeId, long cellphone, DateTime updateTime) { var referenceNeedUpdate = false; using (var db = new BadoucaiAliyunDBEntities()) { var resume = db.CoreResumeSummary.FirstOrDefault(f => f.Cellphone == cellphone); var reference = new CoreResumeReference(); if (resume != null) { reference = db.CoreResumeReference.FirstOrDefault(f => f.ResumeId == resume.Id && f.Source == "51JOB"); if (reference != null) { long id; if (!long.TryParse(reference.Id, out id)) { if (updateTime <= resume.UpdateTime) { return(new DataResult <string> { IsSuccess = true, Data = string.Empty }); // 库里已经存在,并且更新时间小于库里时间 } return(new DataResult <string> { IsSuccess = true, Data = reference.Id }); } referenceNeedUpdate = true; } } var cookieContainer = cookie.Serialize("ehire.51job.com"); var requestResult = HttpClientFactory.RequestForString("https://ehire.51job.com/Candidate/SearchResumeIndexNew.aspx", HttpMethod.Get, null, cookieContainer); if (!requestResult.IsSuccess) { return(new DataResult <string>("请求异常!")); } var matchResult = Regex.Match(requestResult.Data, "__VIEWSTATE.+?value=\"(\\S+)\""); if (!matchResult.Success) { return(new DataResult <string>("匹配 __VIEWSTATE 异常!")); } var __VIEWSTATE = matchResult.Result("$1"); var dictionary = new Dictionary <string, string> { { "__VIEWSTATE", __VIEWSTATE }, { "sex_ch", "99|不限" }, { "sex_en", "99|Unlimited" }, { "send_cycle", "1" }, { "send_time", "7" }, { "send_sum", "10" }, { "searchValueHid", resumeId + "##0##########99############1#0###0#0#0" } }; requestResult = HttpClientFactory.RequestForString("https://ehire.51job.com/Candidate/SearchResumeNew.aspx ", HttpMethod.Post, dictionary, cookieContainer); if (!requestResult.IsSuccess) { return(new DataResult <string>("请求异常!")); } matchResult = Regex.Match(requestResult.Data, "hidKey =(\\S+)\""); if (!matchResult.Success) { return new DataResult <string> { IsSuccess = true, Data = string.Empty } } ; var matchResumeId = matchResult.Result("$1"); matchResult = Regex.Match(requestResult.Data, "hidCheckKey.+?value=\"(\\S+)\""); if (!matchResult.Success) { return(new DataResult <string>("匹配 hidCheckKey 异常!")); } var hidCheckKey = matchResult.Result("$1"); if (matchResumeId != hidCheckKey) { return new DataResult <string> { IsSuccess = true, Data = string.Empty } } ; if (referenceNeedUpdate) { reference.Id = matchResumeId; } return(new DataResult <string> { IsSuccess = true, Data = matchResumeId }); } } } }
/// <summary> /// 组合简历 /// </summary> private static void PortfolioResume() { while (true) { try { ZhaopinResume resume; var stopwatch = new Stopwatch(); stopwatch.Restart(); if (!resumeQueue.TryDequeue(out resume)) { Thread.Sleep(100); continue; } var filePath = $"{uploadFilePath}{resume.Id}.json"; if (File.Exists(filePath)) { uploadQueue.Enqueue(filePath); continue; } string userId; string cellphone; string email; using (var db = new MangningXssDBEntities()) { var user = db.ZhaopinUser.AsNoTracking().FirstOrDefault(f => f.Id == resume.UserId); if (user == null) { continue; } userId = user.Id.ToString(); cellphone = user.Cellphone; email = user.Email; using (var stream = new MemoryStream()) { if (mangningOssClient.DoesObjectExist(mangningBucketName, $"Zhaopin/Resume/{resume.Id}")) { var bytes = new byte[1024]; int len; var streamContent = mangningOssClient.GetObject(mangningBucketName, $"Zhaopin/Resume/{resume.Id}").Content; while ((len = streamContent.Read(bytes, 0, bytes.Length)) > 0) { stream.Write(bytes, 0, len); } var resumeContent = Encoding.UTF8.GetString(GZip.Decompress(stream.ToArray())); var resumeObj = JsonConvert.DeserializeObject <dynamic>(resumeContent); var resumeDetail = JsonConvert.DeserializeObject(resumeObj.detialJSonStr.ToString()); resumeDetail.DateModified = user.ModifyTime.ToLocalTime(); resumeDetail.DateCreated = user.CreateTime?.ToLocalTime() ?? resumeDetail.DateCreated; resumeDetail.DateLastReleased = resume.RefreshTime.Value.ToLocalTime(); resumeDetail.DateLastViewed = resume.RefreshTime.Value.ToLocalTime(); resumeObj.detialJSonStr = resumeDetail; resumeContent = JsonConvert.SerializeObject(resumeObj); File.WriteAllText(filePath, resumeContent); uploadQueue.Enqueue(filePath); continue; } if (mangningOssClient.DoesObjectExist(mangningBucketName, $"WatchResume/{resume.Id}")) { var bytes = new byte[1024]; int len; var streamContent = mangningOssClient.GetObject(mangningBucketName, $"WatchResume/{resume.Id}").Content; while ((len = streamContent.Read(bytes, 0, bytes.Length)) > 0) { stream.Write(bytes, 0, len); } var resumeContent = Encoding.UTF8.GetString(GZip.Decompress(stream.ToArray())); var resumeObj = JsonConvert.DeserializeObject <dynamic>(resumeContent); resumeObj.userDetials.mobilePhone = user.Cellphone; resumeObj.userDetials.email = user.Email; var resumeDetail = JsonConvert.DeserializeObject(resumeObj.detialJSonStr.ToString()); resumeDetail.DateModified = user.ModifyTime.ToLocalTime(); resumeDetail.DateCreated = user.CreateTime.Value.ToLocalTime(); resumeDetail.DateLastReleased = resume.RefreshTime.Value.ToLocalTime(); resumeDetail.DateLastViewed = resume.RefreshTime.Value.ToLocalTime(); resumeObj.detialJSonStr = resumeDetail; resumeContent = JsonConvert.SerializeObject(resumeObj); using (var jsonStream = new MemoryStream(GZip.Compress(Encoding.UTF8.GetBytes(resumeContent)))) { mangningOssClient.PutObject(mangningBucketName, $"Zhaopin/Resume/{resume.Id}", jsonStream); } File.WriteAllText(filePath, resumeContent); uploadQueue.Enqueue(filePath); continue; } var zhaopinResume = db.ZhaopinResume.FirstOrDefault(f => f.Id == resume.Id); if (zhaopinResume != null) { zhaopinResume.Flag = 0x2; } } db.SaveChanges(); stopwatch.Stop(); } using (var bdcDb = new BadoucaiAliyunDBEntities()) { bdcDb.CoreResumeZhaopin.Add(new CoreResumeZhaopin { Cellphone = cellphone, Email = email, ResumeKey = userId, Type = "ResumeUserId", IsMatched = false }); bdcDb.SaveChanges(); } Console.WriteLine($"{DateTime.Now} > 简历未找到 Josn 源!ResumeId = {resume.Id}, UserId = {userId}, Elapsed = {stopwatch.ElapsedMilliseconds} ms."); } catch (Exception ex) { Trace.TraceError(ex.ToString()); } } }
public void MatchResume() { const string path = @"D:\待清理数据\2017-11-24 智联招聘简历导出"; var filesPath = Directory.EnumerateFileSystemEntries(path); var dictionary = new ConcurrentDictionary <string, string>(); var index = 0; var queue = new ConcurrentQueue <string>(); foreach (var filePath in filesPath) { queue.Enqueue(filePath); } for (var i = 0; i < 8; i++) { Task.Run(() => { while (true) { string filePath; if (!queue.TryDequeue(out filePath)) { continue; } Interlocked.Increment(ref index); var htmlSource = File.ReadAllText(filePath); var numberMatchResult = Regex.Match(htmlSource, "tips-id\">ID:(\\S{10})"); if (!numberMatchResult.Success) { LogFactory.Warn("简历编号匹配失败!Path:" + filePath); continue; } var mobileMatchResult = Regex.Match(htmlSource, "main-title-fr\">(Mobile|手机) :.*?(\\d{11})"); if (!mobileMatchResult.Success) { LogFactory.Warn("手机号码匹配失败!Path:" + filePath); continue; } var resumeNumber = numberMatchResult.Result("$1").Trim().Substring(0, 10); var mobile = mobileMatchResult.Result("$2"); using (var db = new MangningXssDBEntities()) { if (db.ZhaopinResume.Any(a => a.RandomNumber.StartsWith(resumeNumber))) { dictionary.TryAdd(resumeNumber, mobile); continue; } if (db.ZhaopinUser.Any(a => a.Cellphone == mobile)) { dictionary.TryAdd(resumeNumber, mobile); continue; } } using (var db = new BadoucaiAliyunDBEntities()) { if (db.CoreResumeReferenceMapping.Any(a => a.Value.StartsWith(resumeNumber))) { dictionary.TryAdd(resumeNumber, mobile); continue; } if (db.CoreResumeSummary.Any(a => a.Cellphone.ToString() == mobile)) { dictionary.TryAdd(resumeNumber, mobile); } } } }); } SpinWait.SpinUntil(() => false); }
public void StatisticsByArea() { var cityDictionary = new Dictionary <int, string> { #region 市 { 90287536, "北京市" }, { 90487536, "上海市" }, { 90617636, "广州市" }, { 90617836, "深圳市" }, { 90517636, "合肥市" }, { 90677536, "重庆市" }, { 90527636, "福州市" }, { 90797636, "兰州市" }, { 90697636, "贵阳市" }, { 90637636, "海口市" }, { 90307636, "石家庄市" }, { 90587636, "郑州市" }, { 90407636, "哈尔滨市" }, { 90597636, "武汉市" }, { 90607636, "长沙市" }, { 90397636, "长春市" }, { 90497636, "南京市" }, { 90537636, "南昌市" }, { 90387636, "沈阳市" }, { 90807636, "西宁市" }, { 90547636, "济南市" }, { 90317636, "太原市" }, { 90787636, "西安市" }, { 90687636, "成都市" }, { 90297536, "天津市" }, { 90707636, "昆明市" }, { 90507636, "杭州市" }, { 90627636, "南宁市" }, { 90327636, "呼和浩特市" }, { 90817636, "银川市" }, { 90717636, "拉萨市" }, { 90827636, "乌鲁木齐市" }, { 90997536, "澳门特别行政区" }, { 90987536, "香港特别行政区" }, #endregion }; var provinceDictionary = new Dictionary <int, string> { #region 省 { 90617536, "广东省" }, { 90517536, "安徽省" }, { 90527536, "福建省" }, { 90797536, "甘肃省" }, { 90697536, "贵州省" }, { 90637536, "海南省" }, { 90307536, "河北省" }, { 90587536, "河南省" }, { 90407536, "黑龙江省" }, { 90597536, "湖北省" }, { 90607536, "湖南省" }, { 90397536, "吉林省" }, { 90497536, "江苏省" }, { 90537536, "江西省" }, { 90387536, "辽宁省" }, { 90807536, "青海省" }, { 90547536, "山东省" }, { 90317536, "山西省" }, { 90787536, "陕西省" }, { 90687536, "四川省" }, { 90707536, "云南省" }, { 90507536, "浙江省" }, { 90627536, "广西壮族自治区" }, { 90327536, "内蒙古自治区" }, { 90817536, "宁夏回族自治区" }, { 90717536, "西藏自治区" }, { 90827536, "新疆维吾尔自治区" } #endregion }; var sb = new StringBuilder(); foreach (var area in cityDictionary) { var count = 0; using (var bdb = new BadoucaiAliyunDBEntities()) { bdb.Database.CommandTimeout = 600; count += bdb.CoreResumeSummary.AsNoTracking().Count(c => c.CurrentResidence == area.Key); using (var adb = new AIFDBEntities()) { var areaList = adb.BaseAreaBDC.AsNoTracking().Where(w => w.PId == area.Key).ToList(); if (areaList.Count == 1) { var pid = areaList[0].Id; areaList = adb.BaseAreaBDC.AsNoTracking().Where(w => w.PId == pid).ToList(); } count += areaList.Sum(item => bdb.CoreResumeSummary.AsNoTracking().Count(c => c.CurrentResidence == item.Id)); } } sb.AppendLine($"{area.Value}\t{count}"); } foreach (var province in provinceDictionary) { var count = 0; using (var adb = new AIFDBEntities()) { var cityList = adb.BaseAreaBDC.AsNoTracking().Where(w => w.PId == province.Key).ToList(); if (cityList.Count == 1) { var pid = cityList[0].Id; cityList = adb.BaseAreaBDC.AsNoTracking().Where(w => w.PId == pid).ToList(); } foreach (var area in cityList) { using (var bdb = new BadoucaiAliyunDBEntities()) { count += bdb.CoreResumeSummary.AsNoTracking().Count(c => c.CurrentResidence == area.Id); var areaList = adb.BaseAreaBDC.AsNoTracking().Where(w => w.PId == area.Id).ToList(); if (areaList.Count == 1) { var pid = areaList[0].Id; areaList = adb.BaseAreaBDC.AsNoTracking().Where(w => w.PId == pid).ToList(); } count += areaList.Sum(item => bdb.CoreResumeSummary.AsNoTracking().Count(c => c.CurrentResidence == item.Id)); } } } sb.AppendLine($"{province.Value}\t{count}"); } File.AppendAllText(@"D:\统计.txt", sb.ToString()); }
/// <summary> /// 标记简历 /// </summary> /// <param name="jsonContent"></param> /// <param name="resumeId"></param> private static void FlagResume(string jsonContent, int resumeId) { using (var db = new MangningXssDBEntities()) { var resume = db.ZhaopinResume.FirstOrDefault(f => f.Id == resumeId); if (resume?.Flag == 0xF) { badoucaiOssClient.DeleteObject(badoucaiBucketName, $"Zhaopin/{resumeId}"); resume.IncludeTime = DateTime.UtcNow; db.SaveChanges(); return; } var resumeIdStr = resumeId.ToString(); if (jsonContent.Contains("detialJSonStr")) { var jsonObj = JsonConvert.DeserializeObject <dynamic>(jsonContent); //var userId = (int)jsonObj.userDetials.userMasterId; if (string.IsNullOrWhiteSpace((string)jsonObj.userDetials.mobilePhone)) { var user = resume != null?db.ZhaopinUser.FirstOrDefault(f => f.Id == resume.UserId && !string.IsNullOrEmpty(f.Cellphone)) : null; if (user == null) { using (var adb = new BadoucaiAliyunDBEntities()) { var reference = adb.CoreResumeReference.AsNoTracking().FirstOrDefault(f => f.Id == resumeIdStr); CoreResumeSummary summary = null; if (reference != null) { summary = adb.CoreResumeSummary.AsNoTracking().FirstOrDefault(f => f.Id == reference.ResumeId); } if (summary != null) { jsonObj.userDetials.mobilePhone = summary.Cellphone; jsonObj.userDetials.email = summary.Email; } } } else { jsonObj.userDetials.mobilePhone = user.Cellphone; jsonObj.userDetials.email = user.Email; } } File.WriteAllText($"{jsonFilePath}{resumeId}", jsonContent); #region 被注释的代码 //var flag = string.IsNullOrEmpty((string)jsonObj.userDetials.mobilePhone) ? (short)0x0 : (short)0xF; //dynamic detialJSonStr; //try //{ // detialJSonStr = jsonObj.detialJSonStr; // if (!string.IsNullOrEmpty((string)jsonObj.detialJSonStr.DateModified)) // { // jsonObj.detialJSonStr = JsonConvert.SerializeObject(jsonObj.detialJSonStr); // } //} //catch (Exception) //{ // detialJSonStr = JsonConvert.DeserializeObject<dynamic>((string)jsonObj.detialJSonStr); //} //if (resume == null) //{ // db.ZhaopinResume.Add(new ZhaopinResume // { // Id = resumeId, // RandomNumber = ((string)jsonObj.resumeNo).Substring(0, 10), // UserId = userId, // RefreshTime = BaseFanctory.GetTime((string)detialJSonStr.DateModified).ToUniversalTime(), // UpdateTime = DateTime.UtcNow, // UserExtId = (string)detialJSonStr.UserMasterExtId, // Source = "XSS", // Flag = flag // }); // handlerFlag = "Insert"; //} //else //{ // resume.RandomNumber = ((string)jsonObj.resumeNo).Substring(0, 10); // resume.UserId = userId; // resume.RefreshTime = BaseFanctory.GetTime((string)detialJSonStr.DateModified).ToUniversalTime(); // resume.UpdateTime = DateTime.UtcNow; // resume.Flag = flag; // handlerFlag = "Update"; //} //db.ZhaopinUser.AddOrUpdate(new ZhaopinUser //{ // Id = userId, // Cellphone = (string)jsonObj.userDetials.mobilePhone, // CreateTime = BaseFanctory.GetTime((string)detialJSonStr.DateCreated).ToUniversalTime(), // Email = (string)jsonObj.userDetials.email, // ModifyTime = BaseFanctory.GetTime((string)detialJSonStr.DateModified).ToUniversalTime(), // Name = (string)jsonObj.userDetials.userName, // Source = "XSS", // UpdateTime = DateTime.UtcNow //}); //var jsonResume = JsonConvert.SerializeObject(jsonObj); //if (flag == 0x0) //{ // var path = $@"F:\ZhaopinOss\Resume\NoInformation\{resumeIdStr.Substring(0, 2)}\{resumeIdStr.Substring(2, 2)}"; // if (!Directory.Exists(path)) Directory.CreateDirectory(path); // File.WriteAllText($@"{path}\{resumeIdStr}",jsonResume); //} //else //{ // var path = $@"F:\ZhaopinOss\Resume\HaveInformation\{resumeIdStr.Substring(0, 2)}\{resumeIdStr.Substring(2, 2)}"; // if (!Directory.Exists(path)) Directory.CreateDirectory(path); // File.WriteAllText($@"{path}\{resumeIdStr}", jsonResume); // using (var stream = new MemoryStream(GZip.Compress(Encoding.UTF8.GetBytes(jsonResume)))) // { // mangningClient.PutObject(mangningBucketName, $"Zhaopin/Resume/{resumeIdStr}", stream); // } //} #endregion badoucaiOssClient.DeleteObject(badoucaiBucketName, $"Zhaopin/{resumeId}"); } else { if (jsonContent.StartsWith("\"<!DOCTYPE HTML>", StringComparison.OrdinalIgnoreCase) || jsonContent.StartsWith("<html", StringComparison.OrdinalIgnoreCase)) { if (jsonContent.StartsWith("\"<!DOCTYPE HTML>", StringComparison.OrdinalIgnoreCase)) { jsonContent = jsonContent.Substring(1); jsonContent = jsonContent.Substring(0, jsonContent.Length - 1); jsonContent = Regex.Unescape(jsonContent); } var updateTime = Regex.Match(jsonContent, "(?s)resumeUpdateTime\">(.+?)<.+?userName.+?alt=\"(.+?)\"").ResultOrDefault("$1", ""); var name = Regex.Match(jsonContent, "(?s)resumeUpdateTime\">(.+?)<.+?userName.+?alt=\"(.+?)\"").ResultOrDefault("$2", "").Replace("\\", "").Replace("/", ""); string fileName; if (string.IsNullOrEmpty(updateTime) || string.IsNullOrEmpty(name)) { fileName = $"{resumeId}"; } else { fileName = $"{name}_{updateTime.Replace("年", "-").Replace("月", "-").Replace("日", "")}.txt"; } File.WriteAllText($"{domFilePath}{fileName}", jsonContent); } else { Trace.WriteLine($"{DateTime.Now} > 简历格式异常!异常简历ID = {resumeId}, Content = {jsonContent}"); } badoucaiOssClient.DeleteObject(badoucaiBucketName, $"Zhaopin/{resumeId}"); } } }