Exemple #1
0
        public List <string> GetMembers(string TweetUrl, ref GlobusHttpHelper HttpHelper, out string ReturnStatus)
        {
            string        cursor       = "-1";
            string        FollowingUrl = string.Empty;
            List <string> lstIds       = new List <string>();
            string        userID;
            string        Screen_name;
            int           counter = 0;

            try
            {
                //   string numResult = Regex.Match(TweetUrl, @"\d+").Value;
                TweetUrl = TweetUrl + "@@@";

                string numResult = getBetween(TweetUrl, "status/", "@@@");

StartAgain:

                String DataCursor = string.Empty;
                if (counter == 0)
                {
                    FollowingUrl = "https://twitter.com/i/katyperry/conversation/" + numResult + "?include_available_features=1&include_entities=1&max_position=0";//TweetUrl;
                    counter++;
                }
                else
                {
                    FollowingUrl = "https://twitter.com/i/katyperry/conversation/" + numResult + "?include_available_features=1&include_entities=1&max_position=" + cursor.Trim();
                }


                string Data = HttpHelper.getHtmlfromUrl(new Uri(FollowingUrl), "", "");
                if (string.IsNullOrEmpty(Data))
                {
                    Data = HttpHelper.getHtmlfromUrl(new Uri(FollowingUrl), "", "");
                }

                if (string.IsNullOrEmpty(Data))
                {
                    AddToLog_ScrapMember("Either Url is Invalid or PageSource is getting Null or Empty.");

                    ReturnStatus = "Error";
                    return(lstIds);
                }
                String DataCursor1 = string.Empty;

                if (!Data.Contains("Rate limit exceeded") && !Data.Contains("{\"errors\":[{\"message\":\"Sorry, that page does not exist\",\"code\":34}]}") && !string.IsNullOrEmpty(Data))
                {
                    String[] DataDivArr;
                    if (Data.Contains("js-stream-tweet js-actionable-tweet"))
                    {
                        DataDivArr = Regex.Split(Data, "js-stream-tweet js-actionable-tweet");
                    }
                    else
                    {
                        DataDivArr = Regex.Split(Data, "simple-tweet tweet");
                    }

                    foreach (var DataDivArr_item in DataDivArr)
                    {
                        if (DataDivArr_item.Contains("min_position"))
                        {
                            //String DataCurso = System.Text.RegularExpressions.Regex.Split(Data, "data-cursor")[1];
                            DataCursor1 = DataDivArr_item.Substring(DataDivArr_item.IndexOf("min_position\":"), DataDivArr_item.IndexOf(",")).Replace(">", string.Empty).Replace("\n", string.Empty).Replace("\"", string.Empty).Replace("min_position", string.Empty).Replace(":", "").Replace(",", "").Trim();
                        }
                        if (DataDivArr_item.Contains("<!DOCTYPE html>") || DataDivArr_item.Contains("min_position"))
                        {
                            continue;
                        }

                        if (DataDivArr_item.Contains("data-screen-name"))
                        {
                            int endIndex   = 0;
                            int startIndex = DataDivArr_item.IndexOf("data-screen-name");
                            try
                            {
                                endIndex = DataDivArr_item.IndexOf("data-name");
                            }
                            catch { }

                            if (endIndex == -1)
                            {
                                endIndex = DataDivArr_item.IndexOf("data-feedback-token");
                            }

                            string GetDataStr = DataDivArr_item.Substring(startIndex, endIndex);

                            //string _SCRNameID = (GetDataStr.Substring(GetDataStr.IndexOf("data-user-id"), GetDataStr.IndexOf("data-feedback-token", GetDataStr.IndexOf("data-user-id")) - GetDataStr.IndexOf("data-user-id")).Replace("data-user-id", string.Empty).Replace("=", string.Empty).Replace("\"", "").Replace("\\\\n", string.Empty).Replace("data-screen-name=", string.Empty).Replace("\\", "").Trim());
                            string _SCRName = (GetDataStr.Substring(GetDataStr.IndexOf("data-screen-name="), GetDataStr.IndexOf("data-user-id", GetDataStr.IndexOf("data-screen-name=")) - GetDataStr.IndexOf("data-screen-name=")).Replace("data-screen-name=", string.Empty).Replace("=", string.Empty).Replace("\"", "").Replace("\\\\n", string.Empty).Replace("data-screen-name=", string.Empty).Replace("\\", "").Trim());
                            if (_SCRName.Contains(" "))
                            {
                                _SCRName = _SCRName.Split(' ')[0];
                            }

                            if (noOfRecords > lstIds.Count)
                            {
                                lstIds.Add(_SCRName);
                                lstIds = lstIds.Distinct().ToList();
                                AddToLog_ScrapMember("[ " + DateTime.Now + " ] => [" + _SCRName + " ]");
                                if (!File.Exists(Globals.Path_ScrapedMembersList))
                                {
                                    GlobusFileHelper.AppendStringToTextfileNewLine(" UserName , Url", Globals.Path_ScrapedMembersList);
                                }
                                GlobusFileHelper.AppendStringToTextfileNewLine(_SCRName + "," + TweetUrl, Globals.Path_ScrapedMembersList);
                            }
                        }
                    }


                    if (noOfRecords != lstIds.Count)
                    {
                        if (Data.Contains("min_position"))
                        {
                            DataCursor1 = Data.Substring(Data.IndexOf("min_position\":"), Data.IndexOf(",")).Replace(">", string.Empty).Replace("\n", string.Empty).Replace("\"", string.Empty).Replace("min_position", string.Empty).Replace(":", string.Empty).Replace(",", string.Empty).Trim();
                            cursor      = DataCursor1;

                            if (cursor.Contains("null") || cursor.Contains("Null"))
                            {
                                ReturnStatus = "No Error";
                                return(lstIds);
                            }

                            if (cursor != "0")
                            {
                                goto StartAgain;
                            }
                        }

                        if (Data.Contains("\"has_more_items\":true"))
                        {
                            int    startindex = Data.IndexOf("cursor");
                            string start      = Data.Substring(startindex).Replace("cursor", "");
                            int    lastindex  = -1;

                            lastindex = start.IndexOf(",");
                            if (lastindex > 40)
                            {
                                lastindex = start.IndexOf("\n");
                            }
                            string end = start.Substring(0, lastindex).Replace("\"", "").Replace("\n", string.Empty).Replace("=", string.Empty).Replace(":", string.Empty).Trim();
                            cursor = end;
                            if (cursor != "0")
                            {
                                goto StartAgain;
                            }
                        }
                    }

                    ReturnStatus = "No Error";
                    return(lstIds);
                }
                else if (Data.Contains("401 Unauthorized"))
                {
                    ReturnStatus = "Account is Suspended. ";
                    return(new List <string>());
                }
                else if (Data.Contains("{\"errors\":[{\"message\":\"Sorry, that page does not exist\",\"code\":34}]}"))
                {
                    ReturnStatus = "Sorry, that page does not exist :";
                    return(lstIds);
                }
                else if (Data.Contains("Rate limit exceeded. Clients may not make more than 150 requests per hour."))
                {
                    ReturnStatus = "Rate limit exceeded. Clients may not make more than 150 requests per hour.:-";
                    return(lstIds);
                }
                else
                {
                    ReturnStatus = "Error";
                    return(lstIds);
                }
            }
            catch (Exception ex)
            {
                //Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> getMembers() -- "" --> " + ex.Message, Globals.Path_TwitterDataScrapper);
                Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> getMembers() -- " + "" + " --> " + ex.Message, Globals.Path_TwtErrorLogs);
                //AddToLog_ScrapMember("[ " + DateTime.Now + " ] => [ You have entered invalid URL " + FollowingUrl + " ]");

                ReturnStatus = "Error";

                return(lstIds);
            }
        }
Exemple #2
0
        public void getTweetUsers(string Url, ref GlobusHttpHelper HttpHelper)
        {
            string        cursor       = "-1";
            string        FollowingUrl = string.Empty;
            List <string> lstIds       = new List <string>();
            string        userID;
            string        Screen_name;
            int           counter = 0;

            try
            {
                Url = Url + "@@@";

                string numResult = getBetween(Url, "status/", "@@@");    //Regex.Match(Url, @"\d+").Value;



                FollowingUrl = "https://twitter.com/i/activity/retweeted_popup?id=" + numResult;

                string Data = HttpHelper.getHtmlfromUrl(new Uri(FollowingUrl), "", "");
                if (string.IsNullOrEmpty(Data))
                {
                    Data = HttpHelper.getHtmlfromUrl(new Uri(FollowingUrl), "", "");
                }

                if (string.IsNullOrEmpty(Data))
                {
                    AddToLog_ScrapMember("Either Url is Invalid or PageSource is getting Null or Empty.");

                    //ReturnStatus = "Error";
                    return;
                }

                String[] DataDivArr = null;
                if (Data.Contains("js-stream-item stream-item stream-item"))
                {
                    DataDivArr = Regex.Split(Data, "js-stream-item stream-item stream-item");
                }

                foreach (var DataDivArr_item in DataDivArr)
                {
                    if (DataDivArr_item.Contains("data-screen-name"))
                    {
                        int endIndex   = 0;
                        int startIndex = DataDivArr_item.IndexOf("data-screen-name");
                        try
                        {
                            endIndex = DataDivArr_item.IndexOf("data-name");
                        }
                        catch { }

                        if (endIndex == -1)
                        {
                            endIndex = DataDivArr_item.IndexOf("data-feedback-token");
                        }

                        string GetDataStr = DataDivArr_item.Substring(startIndex, endIndex);

                        //string _SCRNameID = (GetDataStr.Substring(GetDataStr.IndexOf("data-user-id"), GetDataStr.IndexOf("data-feedback-token", GetDataStr.IndexOf("data-user-id")) - GetDataStr.IndexOf("data-user-id")).Replace("data-user-id", string.Empty).Replace("=", string.Empty).Replace("\"", "").Replace("\\\\n", string.Empty).Replace("data-screen-name=", string.Empty).Replace("\\", "").Trim());
                        string _SCRName = (GetDataStr.Substring(GetDataStr.IndexOf("data-screen-name="), GetDataStr.IndexOf("data-user-id", GetDataStr.IndexOf("data-screen-name=")) - GetDataStr.IndexOf("data-screen-name=")).Replace("data-screen-name=", string.Empty).Replace("=", string.Empty).Replace("\"", "").Replace("\\\\n", string.Empty).Replace("data-screen-name=", string.Empty).Replace("\\", "").Trim());
                        if (_SCRName.Contains(" "))
                        {
                            _SCRName = _SCRName.Split(' ')[0];
                        }

                        //if (noOfRecords > lstIds.Count)
                        {
                            lstIds.Add(_SCRName);
                            lstIds = lstIds.Distinct().ToList();
                            AddToLog_ScrapMember("[ " + DateTime.Now + " ] => [" + _SCRName + " ]");
                            if (!File.Exists(Globals.Path_ScrapedMembersList))
                            {
                                GlobusFileHelper.AppendStringToTextfileNewLine(" UserName , Url", Globals.Path_ScrapedMembersList);
                            }
                            GlobusFileHelper.AppendStringToTextfileNewLine(_SCRName + "," + Url, Globals.Path_ScrapedMembersList);
                        }
                    }
                }
            }
            catch { }
        }
        public List <string> GetMembers(string keyword, out string ReturnStatus)
        {
            string        cursor       = "-1";
            string        FollowingUrl = string.Empty;
            List <string> lstIds       = new List <string>();
            string        userID;
            string        Screen_name;
            int           counter = 0;

            try
            {
                Globussoft.GlobusHttpHelper HttpHelper = new Globussoft.GlobusHttpHelper();

StartAgain:
                if (counter == 0)
                {
                    FollowingUrl = keyword;
                    counter++;
                }
                else
                {
                    FollowingUrl = keyword + "/timeline?cursor=" + cursor + "&cursor_index=&cursor_offset=&include_available_features=1&include_entities=1&is_forward=true";
                }


                String DataCursor = string.Empty;


                string Data = HttpHelper.getHtmlfromUrl(new Uri(FollowingUrl), "", "");
                if (string.IsNullOrEmpty(Data))
                {
                    Data = HttpHelper.getHtmlfromUrl(new Uri(FollowingUrl), "", "");
                }

                if (string.IsNullOrEmpty(Data))
                {
                    AddToLog_ScrapMember("Either Url in Invalid or PageSource is getting Null or Empty.");

                    ReturnStatus = "Error";
                    return(lstIds);
                }
                String DataCursor1 = string.Empty;

                if (!Data.Contains("Rate limit exceeded") && !Data.Contains("{\"errors\":[{\"message\":\"Sorry, that page does not exist\",\"code\":34}]}") && !string.IsNullOrEmpty(Data))
                {
                    String[] DataDivArr;
                    if (Data.Contains("js-stream-item stream-item stream-item"))
                    {
                        DataDivArr = Regex.Split(Data, "js-stream-item stream-item stream-item");
                    }
                    else
                    {
                        DataDivArr = Regex.Split(Data, "js-stream-item");
                    }

                    foreach (var DataDivArr_item in DataDivArr)
                    {
                        if (DataDivArr_item.Contains("data-cursor"))
                        {
                            String DataCurso = System.Text.RegularExpressions.Regex.Split(Data, "data-cursor")[1];
                            DataCursor1 = DataCurso.Substring(DataCurso.IndexOf("="), DataCurso.IndexOf(">")).Replace(">", string.Empty).Replace("\n", string.Empty).Replace("\"", string.Empty).Replace("=", string.Empty).Trim();
                        }
                        if (DataDivArr_item.Contains("<!DOCTYPE html>") || DataDivArr_item.Contains("cursor"))
                        {
                            continue;
                        }

                        if (DataDivArr_item.Contains("data-screen-name") && DataDivArr_item.Contains(" data-user-id"))
                        {
                            int endIndex   = 0;
                            int startIndex = DataDivArr_item.IndexOf("data-screen-name");
                            try
                            {
                                endIndex = DataDivArr_item.IndexOf(">");
                            }
                            catch { }

                            if (endIndex == -1)
                            {
                                endIndex = DataDivArr_item.IndexOf("data-feedback-token");
                            }

                            string GetDataStr = DataDivArr_item.Substring(startIndex, endIndex);

                            string _SCRNameID = (GetDataStr.Substring(GetDataStr.IndexOf("data-user-id"), GetDataStr.IndexOf("data-feedback-token", GetDataStr.IndexOf("data-user-id")) - GetDataStr.IndexOf("data-user-id")).Replace("data-user-id", string.Empty).Replace("=", string.Empty).Replace("\"", "").Replace("\\\\n", string.Empty).Replace("data-screen-name=", string.Empty).Replace("\\", "").Trim());
                            string _SCRName   = (GetDataStr.Substring(GetDataStr.IndexOf("data-screen-name="), GetDataStr.IndexOf("data-user-id", GetDataStr.IndexOf("data-screen-name=")) - GetDataStr.IndexOf("data-screen-name=")).Replace("data-screen-name=", string.Empty).Replace("=", string.Empty).Replace("\"", "").Replace("\\\\n", string.Empty).Replace("data-screen-name=", string.Empty).Replace("\\", "").Trim());

                            if (noOfRecords > lstIds.Count)
                            {
                                lstIds.Add(_SCRName + ":" + _SCRNameID);
                                AddToLog_ScrapMember("[ " + DateTime.Now + " ] => [" + _SCRNameID + " :: " + _SCRName + " ]");
                                if (!File.Exists(Globals.Path_ScrapedMembersList))
                                {
                                    GlobusFileHelper.AppendStringToTextfileNewLine("UserID , UserName , Url", Globals.Path_ScrapedMembersList);
                                }
                                GlobusFileHelper.AppendStringToTextfileNewLine(_SCRNameID + "," + _SCRName + "," + keyword, Globals.Path_ScrapedMembersList);
                            }
                        }
                    }


                    if (noOfRecords != lstIds.Count)
                    {
                        if (Data.Contains("data-cursor"))
                        {
                            int    startindex = Data.IndexOf("data-cursor");
                            string start      = Data.Substring(startindex).Replace("data-cursor", "");
                            int    lastindex  = start.IndexOf("<div class=\"stream profile-stream\">");
                            if (lastindex == -1)
                            {
                                lastindex = start.IndexOf("\n");
                            }
                            string end = start.Substring(0, lastindex).Replace("\"", "").Replace("\n", string.Empty).Replace("=", string.Empty).Replace(">", string.Empty).Trim();
                            cursor = end;


                            if (cursor != "0")
                            {
                                goto StartAgain;
                            }
                        }

                        if (Data.Contains("cursor"))
                        {
                            int    startindex = Data.IndexOf("cursor");
                            string start      = Data.Substring(startindex).Replace("cursor", "");
                            int    lastindex  = -1;

                            lastindex = start.IndexOf(",");
                            if (lastindex > 40)
                            {
                                lastindex = start.IndexOf("\n");
                            }
                            string end = start.Substring(0, lastindex).Replace("\"", "").Replace("\n", string.Empty).Replace("=", string.Empty).Replace(":", string.Empty).Trim();
                            cursor = end;
                            if (cursor != "0")
                            {
                                goto StartAgain;
                            }
                        }
                    }

                    ReturnStatus = "No Error";
                    return(lstIds);
                }
                else if (Data.Contains("401 Unauthorized"))
                {
                    ReturnStatus = "Account is Suspended. ";
                    return(new List <string>());
                }
                else if (Data.Contains("{\"errors\":[{\"message\":\"Sorry, that page does not exist\",\"code\":34}]}"))
                {
                    ReturnStatus = "Sorry, that page does not exist :";
                    return(lstIds);
                }
                else if (Data.Contains("Rate limit exceeded. Clients may not make more than 150 requests per hour."))
                {
                    ReturnStatus = "Rate limit exceeded. Clients may not make more than 150 requests per hour.:-";
                    return(lstIds);
                }
                else
                {
                    ReturnStatus = "Error";
                    return(lstIds);
                }
            }
            catch (Exception ex)
            {
                //Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> getMembers() -- "" --> " + ex.Message, Globals.Path_TwitterDataScrapper);
                Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> getMembers() -- " + "" + " --> " + ex.Message, Globals.Path_TwtErrorLogs);
                //AddToLog_ScrapMember("[ " + DateTime.Now + " ] => [ You have entered invalid URL " + FollowingUrl + " ]");

                ReturnStatus = "Error";

                return(lstIds);
            }
        }