private void ExtractExperience(string content, ref LinkedInProfile profile) { string[] titleSplitWord = Regex.Split(content, "<span class=\"title\">"); string title = Regex.Split(titleSplitWord[1], "</span>")[0].Trim(); profile.ExperiencePosition = WebUtility.HtmlDecode(title); logger.Info(title); string org = string.Empty; if (content.Contains("<span class=\"org summary\">")) { string[] orgSplitWord = Regex.Split(content, "<span class=\"org summary\">"); org = Regex.Split(orgSplitWord[1], "</span>")[0].Trim(); profile.ExperienceCompany = WebUtility.HtmlDecode(org); logger.Info(org); } string[] orgDetailSplitWord = Regex.Split(content, "<p class=\"orgstats organization-details"); string orgDetail = string.Empty; if (orgDetailSplitWord.Count() > 1) { string[] orgDetailTemp = Regex.Split(orgDetailSplitWord[1], "position\">"); orgDetail = Regex.Split(orgDetailTemp[1], "</p>")[0].Trim(); //profile.ExperienceCompanyDetail = WebUtility.HtmlDecode(orgDetail); string[] orgDetailSplit = Regex.Split(orgDetail, ";"); switch (orgDetailSplit.Count()) { case 1: profile.ExperienceCompanyBusinessSector = WebUtility.HtmlDecode(orgDetailSplit[0]).Trim(); break; case 4: profile.ExperienceCompanyType = WebUtility.HtmlDecode(orgDetailSplit[0]).Trim(); profile.ExperienceCompanySize = WebUtility.HtmlDecode(orgDetailSplit[1]).Trim(); profile.ExperienceCompanyBusinessSector = WebUtility.HtmlDecode(orgDetailSplit[3]).Trim(); break; case 3: profile.ExperienceCompanyType = WebUtility.HtmlDecode(orgDetailSplit[0]).Trim(); profile.ExperienceCompanySize = WebUtility.HtmlDecode(orgDetailSplit[1]).Trim(); profile.ExperienceCompanyBusinessSector = WebUtility.HtmlDecode(orgDetailSplit[2]).Trim(); break; } logger.Info(orgDetail); } string[] periodSplitWord = new string[] { }; if (content.Contains("<p class=\"period\">")) { periodSplitWord = Regex.Split(content, "<p class=\"period\">"); } if (content.Contains("<div class=\"period\">")) { periodSplitWord = Regex.Split(content, "<div class=\"period\">"); } string[] timeSplitWord = Regex.Split(periodSplitWord[1], "</abbr>"); if (timeSplitWord.Count() == 1) { return; } string firstTime = Regex.Split(timeSplitWord[0], ">")[1].Trim(); string secondTime = Regex.Split(timeSplitWord[1], ">")[1].Trim(); string duration = Regex.Split(periodSplitWord[1], "</span>")[1].Trim(); profile.ExperiencePeriod = WebUtility.HtmlDecode(string.Format(firstTime + " - " + secondTime + " " + duration)); logger.Info(profile.ExperiencePeriod); if (content.Contains("<span class=\"location\">")) { string[] locationSplitWord = Regex.Split(content, "<span class=\"location\">"); profile.ExperienceCompanyLocation = WebUtility.HtmlDecode(Regex.Split(locationSplitWord[1], "</span>")[0]); logger.Info(profile.ExperienceCompanyLocation); } }
private void WriteToDatabase(LinkedInProfile profile, string link) { string username = profile.Username.Replace("'", "\""); string position = profile.Position.Replace("'", "\""); string company = profile.Company.Replace("'", "\""); string summary = profile.Summary.Replace("'", "\""); string expCompany = profile.ExperienceCompany.Replace("'", "\""); string expPosition = profile.ExperiencePosition.Replace("'", "\""); string expCompanyType = profile.ExperienceCompanyType.Replace("'", "\""); string expCompanySize = profile.ExperienceCompanySize.Replace("'", "\""); string expCompanyBusinessSector = profile.ExperienceCompanyBusinessSector.Replace("'", "\""); string expCompanyLocation = profile.ExperienceCompanyLocation.Replace("'", "\""); string expPeriod = profile.ExperiencePeriod.Replace("'", "\""); string language = profile.Language.Count > 0 ? profile.Language[0] : string.Empty; for (int i = 1; i < profile.Language.Count; i++) { language += ";" + profile.Language[i].Replace("'", "\""); } string skill = profile.SkillAndExpertise.Count > 0 ? profile.SkillAndExpertise[0] : string.Empty; for (int i = 1; i < profile.SkillAndExpertise.Count; i++) { skill += ";" + profile.SkillAndExpertise[i].Replace("'", "\""); } link = link.Replace("'", "\""); mExe.ExecQuery("INSERT INTO UserDB (Username,Position,Company,Description,ExperienceCompany,ExperiencePosition," + "ExperienceCompanyType,ExperienceCompanySize,ExperienceCompanyBusinessSector,ExperienceCompanyLocation," + "ExperiencePeriod,Language,Skill,Link) " + "VALUES" + " (" + "N'" + username + "'," + "N'" + position + "'," + "N'" + company + "'," + "N'" + summary + "'," + "N'" + expCompany + "'," + "N'" + expPosition + "'," + "N'" + expCompanyType + "'," + "N'" + expCompanySize + "'," + "N'" + expCompanyBusinessSector + "'," + "N'" + expCompanyLocation + "'," + "N'" + expPeriod + "'," + "N'" + language + "'," + "N'" + skill + "'," + "N'" + link + "'" + ")"); }
// <span class="full-name">: tag for user full name // <p class="headline-title title" style="display:block">: tag for position // <p class=" description summary">: tag for description // <div class="postitle">: tag for begin experience // <li class="competency language">: tag for language // <li class="competency show-bean ">: tag for skill // <li class="with-photo">: tag for link profile public LinkedInProfile Process() { var profile = new LinkedInProfile(); logger.Info("Start parse HTML"); // Get username logger.Info("Get username"); string[] usernameSplitWord = Regex.Split(Content, "<span class=\"full-name\">"); profile.Username = ExtractUsername(usernameSplitWord[1]); logger.Info(profile.Username); // Get position logger.Info("Get position"); string[] postionSplitWord = Regex.Split(Content, "<p class=\"headline-title title\" style=\"display:block\">"); if (postionSplitWord.Count() > 1) { string position = ExtractPosition(postionSplitWord[1]); string[] positionSplit = Regex.Split(position, " at "); profile.Position = positionSplit[0]; profile.Company = positionSplit.Count() > 1 ? positionSplit[1] : string.Empty; } else { profile.Position = string.Empty; profile.Company = string.Empty; } logger.Info(profile.Position); // Get summary logger.Info("Get summary"); string[] summarySplitWord = Regex.Split(Content, "<p class=\" description summary\">"); profile.Summary = summarySplitWord.Count() > 1 ? ExtractSummary(summarySplitWord[1]) : string.Empty; // Get experience logger.Info("Get experience"); string[] expSplitWord = Regex.Split(Content, "<div class=\"postitle\">"); profile.ExperienceCompany = string.Empty; profile.ExperienceCompanyLocation = string.Empty; profile.ExperienceCompanyType = string.Empty; profile.ExperienceCompanySize = string.Empty; profile.ExperienceCompanyBusinessSector = string.Empty; profile.ExperiencePeriod = string.Empty; profile.ExperiencePosition = string.Empty; if (expSplitWord.Count() > 1) ExtractExperience(expSplitWord[1], ref profile); //logger.Info(profile.Experience); // Get language logger.Info("Get language"); string[] langSplitWord = Regex.Split(Content, "<li class=\"competency language\">"); if (langSplitWord.Count() > 1) { for (int i = 1; i < langSplitWord.Count(); i++) { string lang = ExtractLanguage(langSplitWord[i]); profile.Language.Add(lang); logger.Info(lang); } } // Get skill and expertise logger.Info("Get skill and expertise"); if (Content.Contains("<li class=\"competency show-bean \">")) { string[] skillSplitWord = Regex.Split(Content, "<li class=\"competency show-bean \">"); for (int i = 1; i < skillSplitWord.Count(); i++) { var content = Regex.Split(skillSplitWord[i], "</span>")[0]; string skill = ExtractSkillAndExpertise(content); profile.SkillAndExpertise.Add(skill); logger.Info(skill); } } if (Content.Contains("<li class=\"competency show-bean extra-skill\">")) { string[] extraSkillSplitWord = Regex.Split(Content, "<li class=\"competency show-bean extra-skill\">"); for (int i = 1; i < extraSkillSplitWord.Count(); i++) { var content = Regex.Split(extraSkillSplitWord[i], "</span>")[0]; string skill = ExtractSkillAndExpertise(content); profile.SkillAndExpertise.Add(skill); logger.Info(skill); } } // Get next profile link logger.Info("Get next profile"); string[] nextSplitWord = Regex.Split(Content, "<li class=\"with-photo\">"); if (nextSplitWord.Count() > 1) { for (int i = 1; i < nextSplitWord.Count(); i++) { string profileLink = ExtractNextProfile(nextSplitWord[i]); profile.NextProfile.Add(profileLink); logger.Info(profileLink); } } logger.Info("End parse HTML"); return profile; }