コード例 #1
0
ファイル: ParserCore.cs プロジェクト: huucp/LinkedInParser
        private void ExtractExperience(string content, ref LinkedInProfile profile)
        {
            string[] titleSplitWord = Regex.Split(content, "<span class=\"title\">");
            string title = Regex.Split(titleSplitWord[1], "</span>")[0].Trim();
            profile.ExperiencePosition = WebUtility.HtmlDecode(title);
            logger.Info(title);

            string org = string.Empty;
            if (content.Contains("<span class=\"org summary\">"))
            {
                string[] orgSplitWord = Regex.Split(content, "<span class=\"org summary\">");
                org = Regex.Split(orgSplitWord[1], "</span>")[0].Trim();
                profile.ExperienceCompany = WebUtility.HtmlDecode(org);
                logger.Info(org);
            }

            string[] orgDetailSplitWord = Regex.Split(content, "<p class=\"orgstats organization-details");
            string orgDetail = string.Empty;
            if (orgDetailSplitWord.Count() > 1)
            {
                string[] orgDetailTemp = Regex.Split(orgDetailSplitWord[1], "position\">");
                orgDetail = Regex.Split(orgDetailTemp[1], "</p>")[0].Trim();
                //profile.ExperienceCompanyDetail = WebUtility.HtmlDecode(orgDetail);
                string[] orgDetailSplit = Regex.Split(orgDetail, ";");
                switch (orgDetailSplit.Count())
                {
                    case 1:
                        profile.ExperienceCompanyBusinessSector = WebUtility.HtmlDecode(orgDetailSplit[0]).Trim();
                        break;
                    case 4:
                        profile.ExperienceCompanyType = WebUtility.HtmlDecode(orgDetailSplit[0]).Trim();
                        profile.ExperienceCompanySize = WebUtility.HtmlDecode(orgDetailSplit[1]).Trim();
                        profile.ExperienceCompanyBusinessSector = WebUtility.HtmlDecode(orgDetailSplit[3]).Trim();
                        break;
                    case 3:
                        profile.ExperienceCompanyType = WebUtility.HtmlDecode(orgDetailSplit[0]).Trim();
                        profile.ExperienceCompanySize = WebUtility.HtmlDecode(orgDetailSplit[1]).Trim();
                        profile.ExperienceCompanyBusinessSector = WebUtility.HtmlDecode(orgDetailSplit[2]).Trim();
                        break;
                }
                logger.Info(orgDetail);
            }

            string[] periodSplitWord = new string[] { };
            if (content.Contains("<p class=\"period\">"))
            {
                periodSplitWord = Regex.Split(content, "<p class=\"period\">");
            }
            if (content.Contains("<div class=\"period\">"))
            {
                periodSplitWord = Regex.Split(content, "<div class=\"period\">");
            }

            string[] timeSplitWord = Regex.Split(periodSplitWord[1], "</abbr>");
            if (timeSplitWord.Count() == 1)
            {
                return;
            }
            string firstTime = Regex.Split(timeSplitWord[0], ">")[1].Trim();
            string secondTime = Regex.Split(timeSplitWord[1], ">")[1].Trim();
            string duration = Regex.Split(periodSplitWord[1], "</span>")[1].Trim();
            profile.ExperiencePeriod = WebUtility.HtmlDecode(string.Format(firstTime + " - " + secondTime + " " + duration));
            logger.Info(profile.ExperiencePeriod);

            if (content.Contains("<span class=\"location\">"))
            {
                string[] locationSplitWord = Regex.Split(content, "<span class=\"location\">");
                profile.ExperienceCompanyLocation =
                    WebUtility.HtmlDecode(Regex.Split(locationSplitWord[1], "</span>")[0]);
                logger.Info(profile.ExperienceCompanyLocation);
            }
        }
コード例 #2
0
ファイル: Parser.cs プロジェクト: huucp/LinkedInParser
        private void WriteToDatabase(LinkedInProfile profile, string link)
        {
            string username = profile.Username.Replace("'", "\"");
            string position = profile.Position.Replace("'", "\"");
            string company = profile.Company.Replace("'", "\"");
            string summary = profile.Summary.Replace("'", "\"");
            string expCompany = profile.ExperienceCompany.Replace("'", "\"");
            string expPosition = profile.ExperiencePosition.Replace("'", "\"");
            string expCompanyType = profile.ExperienceCompanyType.Replace("'", "\"");
            string expCompanySize = profile.ExperienceCompanySize.Replace("'", "\"");
            string expCompanyBusinessSector = profile.ExperienceCompanyBusinessSector.Replace("'", "\"");

            string expCompanyLocation = profile.ExperienceCompanyLocation.Replace("'", "\"");
            string expPeriod = profile.ExperiencePeriod.Replace("'", "\"");
            string language = profile.Language.Count > 0 ? profile.Language[0] : string.Empty;
            for (int i = 1; i < profile.Language.Count; i++)
            {
                language += ";" + profile.Language[i].Replace("'", "\"");
            }
            string skill = profile.SkillAndExpertise.Count > 0 ? profile.SkillAndExpertise[0] : string.Empty;
            for (int i = 1; i < profile.SkillAndExpertise.Count; i++)
            {
                skill += ";" + profile.SkillAndExpertise[i].Replace("'", "\"");
            }
            link = link.Replace("'", "\"");
            mExe.ExecQuery("INSERT INTO UserDB (Username,Position,Company,Description,ExperienceCompany,ExperiencePosition," +
                           "ExperienceCompanyType,ExperienceCompanySize,ExperienceCompanyBusinessSector,ExperienceCompanyLocation," +
                           "ExperiencePeriod,Language,Skill,Link) " + "VALUES" + " (" +
                           "N'" + username + "'," +
                           "N'" + position + "'," +
                           "N'" + company + "'," +
                           "N'" + summary + "'," +
                           "N'" + expCompany + "'," +
                           "N'" + expPosition + "'," +
                           "N'" + expCompanyType + "'," +
                           "N'" + expCompanySize + "'," +
                           "N'" + expCompanyBusinessSector + "'," +
                           "N'" + expCompanyLocation + "'," +
                           "N'" + expPeriod + "'," +
                           "N'" + language + "'," +
                           "N'" + skill + "'," +
                           "N'" + link + "'" +
                           ")");
        }
コード例 #3
0
ファイル: ParserCore.cs プロジェクト: huucp/LinkedInParser
        // <span class="full-name">: tag for user full name
        // <p class="headline-title title" style="display:block">: tag for position
        // <p class=" description summary">: tag for description
        // <div class="postitle">: tag for begin experience
        // <li class="competency language">: tag for language
        // <li class="competency show-bean  ">: tag for skill
        // <li class="with-photo">: tag for link profile
        public LinkedInProfile Process()
        {
            var profile = new LinkedInProfile();
            logger.Info("Start parse HTML");

            // Get username
            logger.Info("Get username");
            string[] usernameSplitWord = Regex.Split(Content, "<span class=\"full-name\">");
            profile.Username = ExtractUsername(usernameSplitWord[1]);
            logger.Info(profile.Username);

            // Get position
            logger.Info("Get position");
            string[] postionSplitWord = Regex.Split(Content, "<p class=\"headline-title title\" style=\"display:block\">");
            if (postionSplitWord.Count() > 1)
            {
                string position = ExtractPosition(postionSplitWord[1]);
                string[] positionSplit = Regex.Split(position, " at ");
                profile.Position = positionSplit[0];
                profile.Company = positionSplit.Count() > 1 ? positionSplit[1] : string.Empty;
            }
            else
            {
                profile.Position = string.Empty;
                profile.Company = string.Empty;
            }
            logger.Info(profile.Position);

            // Get summary
            logger.Info("Get summary");
            string[] summarySplitWord = Regex.Split(Content, "<p class=\" description summary\">");
            profile.Summary = summarySplitWord.Count() > 1 ? ExtractSummary(summarySplitWord[1]) : string.Empty;

            // Get experience
            logger.Info("Get experience");
            string[] expSplitWord = Regex.Split(Content, "<div class=\"postitle\">");
            profile.ExperienceCompany = string.Empty;
            profile.ExperienceCompanyLocation = string.Empty;
            profile.ExperienceCompanyType = string.Empty;
            profile.ExperienceCompanySize = string.Empty;
            profile.ExperienceCompanyBusinessSector = string.Empty;
            profile.ExperiencePeriod = string.Empty;
            profile.ExperiencePosition = string.Empty;
            if (expSplitWord.Count() > 1) ExtractExperience(expSplitWord[1], ref profile);
            //logger.Info(profile.Experience);

            // Get language
            logger.Info("Get language");
            string[] langSplitWord = Regex.Split(Content, "<li class=\"competency language\">");
            if (langSplitWord.Count() > 1)
            {
                for (int i = 1; i < langSplitWord.Count(); i++)
                {
                    string lang = ExtractLanguage(langSplitWord[i]);
                    profile.Language.Add(lang);
                    logger.Info(lang);
                }
            }

            // Get skill and expertise
            logger.Info("Get skill and expertise");
            if (Content.Contains("<li class=\"competency show-bean  \">"))
            {
                string[] skillSplitWord = Regex.Split(Content, "<li class=\"competency show-bean  \">");
                for (int i = 1; i < skillSplitWord.Count(); i++)
                {
                    var content = Regex.Split(skillSplitWord[i], "</span>")[0];
                    string skill = ExtractSkillAndExpertise(content);
                    profile.SkillAndExpertise.Add(skill);
                    logger.Info(skill);
                }
            }

            if (Content.Contains("<li class=\"competency show-bean  extra-skill\">"))
            {
                string[] extraSkillSplitWord = Regex.Split(Content, "<li class=\"competency show-bean  extra-skill\">");
                for (int i = 1; i < extraSkillSplitWord.Count(); i++)
                {
                    var content = Regex.Split(extraSkillSplitWord[i], "</span>")[0];
                    string skill = ExtractSkillAndExpertise(content);
                    profile.SkillAndExpertise.Add(skill);
                    logger.Info(skill);
                }
            }

            // Get next profile link
            logger.Info("Get next profile");
            string[] nextSplitWord = Regex.Split(Content, "<li class=\"with-photo\">");
            if (nextSplitWord.Count() > 1)
            {
                for (int i = 1; i < nextSplitWord.Count(); i++)
                {
                    string profileLink = ExtractNextProfile(nextSplitWord[i]);
                    profile.NextProfile.Add(profileLink);
                    logger.Info(profileLink);
                }
            }

            logger.Info("End parse HTML");
            return profile;
        }