Beispiel #1
0
        public UserInfo GetUserInfo(string userIdOrAlias, UserInfoOption option)
        {
            var ctx = "GetUserInfo";

            if (string.IsNullOrWhiteSpace(userIdOrAlias))
            {
                throw new ArgumentException("userIdOrAlias must not null or empty.");
            }

            var userInfo = new UserInfo {
                FBInfo = new FacebookInfo()
            };
            var  userAboutUrl = string.Empty;
            bool isUserId     = !CompiledRegex.Match(Pattern.NonDigit, userIdOrAlias).Success;

            if (isUserId)
            {
                userAboutUrl       = "https://m.facebook.com/profile.php?v=info&id=" + userIdOrAlias;
                userInfo.FBInfo.Id = userIdOrAlias;
            }
            else
            {
                userAboutUrl          = "https://m.facebook.com/" + userIdOrAlias + "/about";
                userInfo.FBInfo.Alias = userIdOrAlias;
            }

            HtmlNode htmlDom = this.createDom(userAboutUrl);

            // Get avatar anchor tag :

            // avatarAnchorElem contain avatar image source, user display name and maybe contain id.
            // if userIdOrAlias is "me user" or "other users with animated avatar" then 1st xpath is wrong
            // so we need to pick another anchor
            HtmlNode avatarAnchor = htmlDom.SelectSingleNode("//div[@id='root']/div/div/div/div/div/a");

            if (avatarAnchor == null ||                                        // FB change structure
                avatarAnchor.InnerText == Localization.EditProfilePicture ||   // Me user
                avatarAnchor.InnerText == Localization.AddProfilePicture)      // Me user
            {
                // pick the second pattern
                avatarAnchor = htmlDom.SelectSingleNode("//div[@id='root']/div/div/div/div/div/div/a");
            }
            else if ((avatarAnchor.SelectSingleNode("div/a/img") != null) ||
                     (avatarAnchor.PreviousSibling != null && avatarAnchor.PreviousSibling.SelectSingleNode("/a/img") != null))
            {
                HtmlNodeCollection anchors = avatarAnchor.SelectNodes("div/a");
                if (anchors != null)
                {
                    foreach (HtmlNode anchor in anchors)
                    {
                        if (anchor.SelectSingleNode("img") != null)
                        {
                            avatarAnchor = anchor;
                            break;
                        }
                    }
                }
                else
                {
                    mLogger.WriteLine(ctx + ":Empty avatar anchor node-001");
                    mLogger.WriteLine("-------");
                }
            }
            // Update 25-0802917
            else
            {
                if (avatarAnchor.SelectSingleNode("img") == null)
                {
                    mLogger.WriteLine(ctx + ":Empty avatar anchor node-002");
                    mLogger.WriteLine("-------");
                }
            }

            // require user id
            if (!isUserId && option.FbInfoOption.IncludeUserId)
            {
                Match idMatch = Match.Empty;

                // trying get id from avatar href
                var avatarHref = avatarAnchor.GetAttributeValue("href", null);

                // If we found avatar href, we might using it to detect user id
                if (avatarHref != null)
                {
                    // There is 3 pattern to detect user id
                    // If both 3 pattern can not detect user id, return this url for detect later.
                    // /photo.php?fbid=704517456378829&id=100004617430839&...
                    // /profile/picture/view/?profile_id=100003877944061&...
                    // /story.php\?story_fbid=\d+&amp;id=(?<id>\d+) for animate avatar
                    if ((idMatch = CompiledRegex.Match(Pattern.UserIdFromAvatar1, avatarHref)).Success ||
                        (idMatch = CompiledRegex.Match(Pattern.UserIdFromAvatar2, avatarHref)).Success ||
                        (idMatch = CompiledRegex.Match(Pattern.UserIdFromAvatar3, avatarHref)).Success)
                    {
                        userInfo.FBInfo.Id = idMatch.Groups["id"].Value;
                    }
                }

                // now avatarHref is null or we cannot detect user id from avatarHref
                // so we need try another way
                if (string.IsNullOrEmpty(userInfo.FBInfo.Id))
                {
                    // Trying to detect user id from hyperlink :
                    // Timeline · Friends · Photos · Likes · Followers · Following · [Activity Log]
                    // NOTE :
                    //      - Activity log only show in "Me users" about page
                    //
                    // Important : /div/div/div/a must select before /div/div/a. Do not swap SelectNodes order.
                    HtmlNodeCollection anchors = htmlDom.SelectNodes("//div[@id='root']/div/div/div/a")
                                                 ?? htmlDom.SelectNodes("//div[@id='root']/div/div/a");

                    if (anchors != null && anchors.Count > 0)
                    {
                        foreach (HtmlNode anchor in anchors)
                        {
                            // Get and check hrefAttr and innerText
                            // If both of them have value then we can detect it using compiled pattern
                            string hrefAttr  = anchor.GetAttributeValue("href", string.Empty);
                            string innerText = anchor.InnerText;
                            if (!string.IsNullOrWhiteSpace(innerText) &&
                                (idMatch = CompiledRegex.Match(innerText, hrefAttr)).Success)
                            {
                                userInfo.FBInfo.Id = idMatch.Groups["id"].Value;
                                break;
                            }
                        }
                    }
                }

                // Try another way if id still empty
                if (string.IsNullOrEmpty(userInfo.FBInfo.Id))
                {
                    // Step 3 :
                    // trying get uid from action button : Add Friend, Message, Follow, More
                    // I only select the 1st xpath,the second xpath will be check in the future.
                    HtmlNodeCollection btnHrefs = htmlDom.SelectNodes("//div[@id='root']/div/div/div/table/tr/td/a");
                    // ??htmlDom.SelectNodes("//div[@id='root']/div/div/div");

                    // If we found some button nodes :
                    //      - Add Friend node if we does not add friend with this user
                    //      - Message if this user allow we can send message to him/her
                    //      - Follow if this user allow we can follow him/her and we not follow him/her before
                    //      - More if we can see more about user - i guess
                    if (btnHrefs != null && btnHrefs.Count > 0)
                    {
                        foreach (var btnHref in btnHrefs)
                        {
                            // if href and innertext not null then we can trying detect user id by compiled regex
                            // NOTE :
                            //      - Check CompiledRegex if you think it not correct anymore
                            //      - Edit Key if you use another Language rather than English to access FB
                            string hrefAttr  = btnHref.GetAttributeValue("href", string.Empty);
                            string innerText = btnHref.InnerText;
                            if (!string.IsNullOrWhiteSpace(innerText) &&
                                (idMatch = CompiledRegex.Match(innerText, hrefAttr)).Success)
                            {
                                userInfo.FBInfo.Id = idMatch.Groups["id"].Value;
                                break;
                            }
                        }
                    }
                }
            }

            #region IncludeUserId
            // id or alias [at least one]
            if (option.FbInfoOption.IncludeUserId && string.IsNullOrWhiteSpace(userInfo.FBInfo.Id))
            {
                // Cause these are so many pattern to detect user id so we won't log each xpath to each file
                // In stead, we need to log specify link and check all xpath later.
                mLogger.WriteLine(ctx + ": Require user id but user id empty");
                mLogger.WriteLine("\tLink: " + userAboutUrl);
                mLogger.WriteLine("-------------");
            }
            #endregion

            #region IncludeAvatarUrl || IncludeUserDisplayName
            // user name and avatar url [optional]
            // check avatarAnchor is null or not -- fault tolerant -- cuz these info doesn't important
            if ((option.FbInfoOption.IncludeAvatarUrl || option.FbInfoOption.IncludeUserDisplayName))
            {
                if (avatarAnchor != null)
                {
                    HtmlNode avatar = avatarAnchor.SelectSingleNode("img");
                    if (avatar != null)
                    {
                        // Get name and avatar
                        if (option.FbInfoOption.IncludeUserDisplayName)
                        {
                            userInfo.FBInfo.DisplayName = WebUtility.HtmlDecode(avatar.GetAttributeValue("alt", string.Empty));
                        }
                        if (option.FbInfoOption.IncludeAvatarUrl)
                        {
                            userInfo.FBInfo.AvatarUrl = WebUtility.HtmlDecode(avatar.GetAttributeValue("src", string.Empty));
                        }
                    }
                    else
                    {
                        this.logXPathNullNode(ctx, avatarAnchor.InnerHtml, "img");
                    }
                }
                else
                {
                    mLogger.WriteLine(ctx + ":IncludeAvatarUrl||IncludeUserDisplayName:Empty avatar anchor node");
                    mLogger.WriteLine("----");
                }
            }
            #endregion

            #region IncludeAddressInfo
            if (option.IncludeAddressInfo)
            {
                // livingNode contains information about City and HomeTown
                HtmlNode livingNode = htmlDom.SelectSingleNode("//div[@id='living']");
                if (livingNode != null)
                {
                    // Notice: See Note 001
                    HtmlNodeCollection trNodes = HtmlHelper.BuildDom(livingNode.InnerHtml).SelectNodes("//tr");
                    userInfo.Address = new AddressInfo();
                    foreach (var trNode in trNodes)
                    {
                        HtmlNodeCollection tds = trNode.SelectNodes("td");
                        // only get which td have 2 td
                        // td[0] is topic
                        // td[1] is value
                        if (tds == null && tds.Count != 2)
                        {
                            continue;
                        }

                        string   value     = string.Empty;
                        HtmlNode valueNode = null;
                        if (((valueNode = tds[1].SelectSingleNode("div/span/span")) != null) ||
                            ((valueNode = tds[1].SelectSingleNode("div/span")) != null) ||
                            ((valueNode = tds[1].SelectSingleNode("div/a")) != null) ||
                            ((valueNode = tds[1].SelectSingleNode("div")) != null))
                        {
                            value = valueNode.InnerText;
                            // store address
                            if (tds[0].InnerText.Contains(Localization.CurrentCity))
                            {
                                userInfo.Address.City = value;
                            }
                            else if (tds[0].InnerText.Contains(Localization.HomeTown))
                            {
                                userInfo.Address.HomeTown = value;
                            }
                        }
                        else
                        {
                            mLogger.WriteLine(ctx + ":IncludeAddressInfo:Could not detect value");
                            mLogger.WriteLine("\tMore details:");
                            this.logXPathNullNode(ctx, tds[1].InnerHtml, "div/span/span || div/span || div/a || div");
                            mLogger.WriteLine("-------------");
                        }
                    }
                }
                else
                {
                    this.logXPathNullNode(ctx, htmlDom.InnerHtml, "//div[@id='living']");
                }
            }
            #endregion

            #region IncludeContactInfo
            if (option.IncludeContactInfo)
            {
                userInfo.Contact = new ContactInfo();

                HtmlNode contactInfo = htmlDom.SelectSingleNode("//div[@id='contact-info']");
                if (contactInfo != null)
                {
                    // extract local DOM
                    HtmlNodeCollection trNodes = HtmlHelper.BuildDom(contactInfo.InnerHtml).SelectNodes("//tr");
                    foreach (var trNode in trNodes)
                    {
                        var tds = HtmlHelper.BuildDom(trNode.InnerHtml).SelectNodes("//td");
                        if (tds == null || tds.Count != 2)
                        {
                            continue; // ignore
                        }
                        // get contact value
                        string   value     = string.Empty;
                        HtmlNode valueNode = null;
                        if ((valueNode = HtmlHelper.BuildDom(tds[1].InnerHtml).SelectSingleNode("//div/span/span")) != null ||
                            (valueNode = HtmlHelper.BuildDom(tds[1].InnerHtml).SelectSingleNode("//div/span")) != null ||
                            (valueNode = HtmlHelper.BuildDom(tds[1].InnerHtml).SelectSingleNode("//div/a")) != null ||
                            (valueNode = HtmlHelper.BuildDom(tds[1].InnerHtml).SelectSingleNode("//div")) != null)
                        {
                            value = WebUtility.HtmlDecode(valueNode.InnerText);
                            // store contact
                            if (tds[0].InnerText.Contains("Mobile"))
                            {
                                userInfo.Contact.Mobile = value;
                            }
                            else if (tds[0].InnerText.Contains("Email"))
                            {
                                userInfo.Contact.Email = value;
                            }
                            else if (tds[0].InnerText.Contains("Websites"))
                            {
                                userInfo.Contact.Website = value;
                            }
                        }
                        else
                        {
                            mLogger.WriteLine(ctx + ":IncludeContactInfo:Could not detect value");
                            mLogger.WriteLine("\tMore details:");
                            this.logXPathNullNode(ctx, tds[1].InnerHtml, "div/span/span || div/span || div/a || div");
                            mLogger.WriteLine("-------------");
                        }
                    }
                }
                else
                {
                    this.logXPathNullNode(ctx, htmlDom.InnerHtml, "//div[@id='contact-info']");
                }
            }
            #endregion

            #region IncludeBasicInfo
            if (option.IncludeBasicInfo)
            {
                userInfo.BasicInfo = new BasicInfo();

                HtmlNode           contactInfo = htmlDom.SelectSingleNode("//div[@id='basic-info']");
                HtmlNodeCollection trNodes     = HtmlHelper.BuildDom(contactInfo.InnerHtml).SelectNodes("//tr");
                foreach (var trNode in trNodes)
                {
                    var tds = HtmlHelper.BuildDom(trNode.InnerHtml).SelectNodes("//td");
                    if (tds == null || tds.Count != 2)
                    {
                        continue; // ignore
                    }
                    // get contact value
                    string   value     = string.Empty;
                    HtmlNode valueNode = null;
                    if ((valueNode = HtmlHelper.BuildDom(tds[1].InnerHtml).SelectSingleNode("//div/span/span")) != null ||
                        (valueNode = HtmlHelper.BuildDom(tds[1].InnerHtml).SelectSingleNode("//div/span")) != null ||
                        (valueNode = HtmlHelper.BuildDom(tds[1].InnerHtml).SelectSingleNode("//div/a")) != null ||
                        (valueNode = HtmlHelper.BuildDom(tds[1].InnerHtml).SelectSingleNode("//div")) != null)
                    {
                        value = valueNode.InnerText;

                        // store contact
                        if (tds[0].InnerText.Contains("Birthday"))
                        {
                            userInfo.BasicInfo.BirthDay = value;
                        }
                        else if (tds[0].InnerText.Contains("Gender"))
                        {
                            userInfo.BasicInfo.Gender = value;
                        }
                        else if (tds[0].InnerText.Contains("Interested In"))
                        {
                            userInfo.BasicInfo.InterestedIn = value;
                        }
                        else if (tds[0].InnerText.Contains("Languages"))
                        {
                            userInfo.BasicInfo.Languages = value;
                        }
                        else if (tds[0].InnerText.Contains("Religious Views"))
                        {
                            userInfo.BasicInfo.ReligiousViews = value;
                        }
                        else if (tds[0].InnerText.Contains("Political Views"))
                        {
                            userInfo.BasicInfo.PolictialViews = value;
                        }
                    }
                    else
                    {
                        mLogger.WriteLine(ctx + ":IncludeBasicInfo:Could not detect value");
                        mLogger.WriteLine("\tMore details:");
                        this.logXPathNullNode(ctx, tds[1].InnerHtml, "div/span/span || div/span || div/a || div");
                        mLogger.WriteLine("-------------");
                    }
                }
            }
            #endregion

            #region IncludeEduInfo
            if (option.IncludeEduInfo)
            {
                // coming not soon
            }
            #endregion

            #region IncludeRelationshipInfo
            if (option.IncludeRelationshipInfo)
            {
                // not current ver
            }
            #endregion

            #region IncludeWorkInfo
            if (option.IncludeWorkInfo)
            {
                // not for current ver
            }
            #endregion

            #region IncludeFbFriends
            // get user friend list if included
            // at this step we do not need to check user id anymore
            // if user id is null then we had return before.
            if (option.FbInfoOption.IncludeFbFriends)
            {
                if (string.IsNullOrWhiteSpace(userInfo.FBInfo.Id))
                {
                    mLogger.WriteLine(".GetUserInfo : Include FB Friends but User Id empty. No friends included.");
                }
                else
                {
                    var friendPageUrl = "https://m.facebook.com/profile.php?v=friends&startindex=0&id=" + userInfo.FBInfo.Id;
                    userInfo.FBInfo.FbFriends = this.getFriends(friendPageUrl);
                }
            }
            #endregion

            // all step have done
            return(userInfo);
        }
Beispiel #2
0
        List <string> getFriends(string friendPage)
        {
            var ctx = "_GetFriends";
            // Declare list string to store user id
            var friends = new List <string>();

            // friendPage will be update each loop
            // if there is no more friend page, this loop will be terminate.
            while (true)
            {
                HtmlNode docNode = this.createDom(friendPage);
                // See Note 001
                HtmlNode           rootNode      = HtmlHelper.BuildDom(docNode.SelectSingleNode("//div[@id='root']").InnerHtml);
                HtmlNodeCollection friendAnchors =
                    rootNode.SelectNodes("//table[@role='presentation']/tr/td[2]/a") ??
                    rootNode.SelectNodes("//table[@role='presentation']/tbody/tr/td[2]/a");
                if (friendAnchors == null)
                {
                    mLogger.WriteLine(ctx + ":friendAnchors:Maybe last page or xpath error");
                    mLogger.WriteLine("\tMore details:");
                    this.logXPathNullNode(ctx, rootNode.InnerHtml, "//table[@role='presentation']/tr/td[2]/a || //table[@role='presentation']/tbody/tr/td[2]/a");
                    return(friends);
                }

                // Loop through all node and trying to get user alias or id
                foreach (HtmlNode friendAnchor in friendAnchors)
                {
                    string id = string.Empty;
                    string userProfileHref = friendAnchor.GetAttributeValue("href", null);

                    if (userProfileHref != null)
                    {
                        if (!userProfileHref.Contains("profile.php"))
                        {
                            // if userProfileHref does't contain "profile.php", userProfileHref contain user alias.
                            // E.g : https://m.facebook.com:443/user.alias.here?fref=fr_tab&amp;refid=17/about
                            int questionMarkIndex = userProfileHref.IndexOf("?");

                            if (questionMarkIndex > -1)
                            {
                                userProfileHref = userProfileHref.Substring(1, questionMarkIndex - 1);
                            }
                            else
                            {
                                userProfileHref = userProfileHref.Substring(1);
                            }

                            friends.Add(userProfileHref);
                        }
                        else
                        {
                            // Extract user id from href profile.php?id=user_id&fre...
                            // If extract not success then we need to log this error
                            Match match = CompiledRegex.Match(Pattern.UserId, userProfileHref);
                            if (match.Success)
                            {
                                friends.Add(match.Groups["id"].Value);
                            }
                            else
                            {
                                mLogger.WriteLine("Match user id by CompiledRege.Match(UserId) is fail. Addition info : url=" + friendPage + " and user profile is " + userProfileHref);
                            }
                        }
                    }
                    else
                    {
                        // If we go to this code block, there are some case happend :
                        // - Our bot has been block by this user or facebook.
                        // - This is deleted user.
                        // - We need provide more pattern to detect user id
                        mLogger.WriteLine(ctx + ":userProfileHref:Could not detect.");
                        mLogger.WriteLine("\tMaybe our bot blocked by this user or FB, or this user " + friendAnchor.InnerText + " has been banned or our xpath does not match anymore");
                        mLogger.WriteLine("\tLink : " + friendPage);
                        mLogger.WriteLine("-------------");
                    }
                }

                // get more friend
                HtmlNode moreFriend = rootNode.SelectSingleNode("//div[@id='m_more_friends']/a");
                if (moreFriend == null)
                {
                    mLogger.WriteLine(ctx + ":moreFriend:No more friends page at : " + friendPage);
                    break;
                }

                var nextUrl = WebUtility.HtmlDecode(moreFriend.GetAttributeValue("href", string.Empty));
                if (nextUrl != null)
                {
                    friendPage = "https://m.facebook.com" + nextUrl;
                }
                else
                {
                    mLogger.WriteLine(".GetFriends");
                    mLogger.WriteLine("\t\tNext Url is empty. Maybe this is last page.");
                    mLogger.WriteLine("\t\tLink : " + friendPage);
                    // exit loop
                    break;
                }
            }

            return(friends);
        }