public static string GetAppReviews(string appID, int reviewsPage, bool isUsingProxies = false)
        {
            // Creating Instance of HTTP Requests Handler
            using (WebRequests httpClient = new WebRequests())
            {
                // Configuring Request Object
                httpClient.Host              = Consts.HOST;
                httpClient.Origin            = Consts.ORIGIN;
                httpClient.Encoding          = "utf-8";
                httpClient.AllowAutoRedirect = true;
                httpClient.Accept            = "*/*";
                httpClient.UserAgent         = Consts.USER_AGENT;
                httpClient.ContentType       = "application/x-www-form-urlencoded;charset=UTF-8";
                httpClient.EncodingDetection = WebRequests.CharsetDetection.DefaultCharset;
                httpClient.Headers.Add(Consts.ACCEPT_LANGUAGE);

                // Checking for the need to use a Proxy on this request
                if (isUsingProxies)
                {
                    httpClient.Proxy = ProxiesLoader.GetWebProxy();
                }

                // Assembling Post Data
                string postData = String.Format(Consts.REVIEWS_POST_DATA, reviewsPage, appID);

                // Issuing Request
                return(httpClient.Post(Consts.REVIEWS_URL, postData));
            }
        }
Exemplo n.º 2
0
        /// <summary>
        /// </summary>
        /// <param name="username"></param>
        /// <param name="authToken"></param>
        public static async void Logout(string username, string authToken)
        {
            long timestamp = Timestamps.GenerateRetardedTimestamp();
            var  postData  = new Dictionary <string, string>
            {
                { "json", "{}" },
                { "username", username },
                { "timestamp", timestamp.ToString(CultureInfo.InvariantCulture) }
            };
            HttpResponseMessage response =
                await WebRequests.Post("logout", postData, authToken, timestamp.ToString(CultureInfo.InvariantCulture));

            switch (response.StatusCode)
            {
            case HttpStatusCode.OK:
            {
                // Yup, save the data and return true
                return;
            }

            default:
                // Well, f**k
                return;
            }
        }
Exemplo n.º 3
0
        public static string Post(ref WebRequests client, Logger logger, string url, string data)
        {
            int    retry        = 10;
            string htmlResponse = String.Empty;

            do
            {
                // Get html of the current category main page
                try
                {
                    htmlResponse = client.Post(url, data, true);
                }
                catch (Exception ex)
                {
                    logger.Error(ex);
                }

                // Sanity check
                if (!String.IsNullOrWhiteSpace(htmlResponse) && client.StatusCode == HttpStatusCode.OK)
                {
                    break;
                }

                retry -= 1;
                logger.Debug(String.Format("Status Code not OK. Retries left: {0}", retry));

                logger.Debug("StatusCode = " + client.StatusCode + " Message = " + client.Error);

                logger.Debug("Html Response = " + htmlResponse);

                // Polite Sleeping
                Thread.Sleep(TimeSpan.FromSeconds(_rnd.Next(2, 5)));
            } while (retry >= 0);
            return(htmlResponse);
        }
Exemplo n.º 4
0
        /// <summary>
        /// </summary>
        /// <param name="events"></param>
        /// <param name="snapInfo"></param>
        /// <param name="username"></param>
        /// <param name="authToken"></param>
        /// <returns></returns>
        public static async Task <bool> SendEvents(Dictionary <string, object>[] events,
                                                   Dictionary <string, Dictionary <string, double> > snapInfo, string username,
                                                   string authToken)
        {
            long timestamp = Timestamps.GenerateRetardedTimestamp();
            var  postData  = new Dictionary <string, string>
            {
                { "events", JsonConvert.SerializeObject(events) },
                { "json", JsonConvert.SerializeObject(snapInfo) },
                { "username", username },
                { "timestamp", timestamp.ToString(CultureInfo.InvariantCulture) }
            };

            HttpResponseMessage response =
                await WebRequests.Post("update_snaps", postData, authToken, timestamp.ToString(CultureInfo.InvariantCulture));

            switch (response.StatusCode)
            {
            case HttpStatusCode.OK:
                return(true);

            default:
                // Well, f**k
                return(false);
            }
        }
Exemplo n.º 5
0
        /// <summary>
        /// </summary>
        /// <param name="username"></param>
        /// <param name="authToken"></param>
        /// <param name="isPrivate"></param>
        /// <returns></returns>
        public static async Task <bool> UpdateAccountPrivacy(string username, string authToken, bool isPrivate)
        {
            int privacy = 0;

            if (isPrivate == true)
            {
                privacy = 1;
            }
            long timestamp = Timestamps.GenerateRetardedTimestamp();
            var  postData  = new Dictionary <string, string>
            {
                { "username", username },
                { "action", "updatePrivacy" },
                { "privacySetting", privacy.ToString() },
                { "timestamp", timestamp.ToString(CultureInfo.InvariantCulture) }
            };
            HttpResponseMessage response =
                await WebRequests.Post("settings", postData, authToken, timestamp.ToString(CultureInfo.InvariantCulture));

            switch (response.StatusCode)
            {
            case HttpStatusCode.OK:
                return(true);

            default:
                return(false);
            }
        }
Exemplo n.º 6
0
 public void AddScore(
     NewLeaderBoardScore newScore,
     Action <LeaderBoardScoreModel> onSuccess,
     Action <string> onError
     )
 {
     WebRequests.Post(
         URL,
         JsonConvert.SerializeObject(
             newScore,
             new JsonSerializerSettings()
     {
         ContractResolver = new DefaultContractResolver {
             NamingStrategy = new SnakeCaseNamingStrategy()
         }
     }
             ),
         (response) => {
         var leaderBoardScore = JsonConvert
                                .DeserializeObject <LeaderBoardScoreModel>(response);
         onSuccess(leaderBoardScore);
     },
         (error) => {
         Debug.Log(error);
         onError(error);
     }
         );
 }
Exemplo n.º 7
0
        public static string GetAppReviews (string appID, int reviewsPage, bool isUsingProxies = false)
        {
            // Creating Instance of HTTP Requests Handler
            using (WebRequests httpClient = new WebRequests ())
            {
                // Configuring Request Object
                httpClient.Host              = Consts.HOST;
                httpClient.Origin            = Consts.ORIGIN;
                httpClient.Encoding          = "utf-8";
                httpClient.AllowAutoRedirect = true;
                httpClient.Accept            = "*/*";
                httpClient.UserAgent         = Consts.USER_AGENT;
                httpClient.ContentType       = "application/x-www-form-urlencoded;charset=UTF-8";
                httpClient.EncodingDetection = WebRequests.CharsetDetection.DefaultCharset;
                httpClient.Headers.Add (Consts.ACCEPT_LANGUAGE);

                // Checking for the need to use a Proxy on this request
                if (isUsingProxies)
                {
                    httpClient.Proxy = ProxiesLoader.GetWebProxy ();
                }

                // Assembling Post Data
                string postData = String.Format (Consts.REVIEWS_POST_DATA, reviewsPage, appID);

                // Issuing Request
                return httpClient.Post (Consts.REVIEWS_URL, postData);
            }
        }
Exemplo n.º 8
0
        /// <summary>
        /// </summary>
        /// <param name="username"></param>
        /// <param name="authToken"></param>
        /// <returns></returns>
        public static async Task <Account> Update(string username, string authToken)
        {
            long timestamp = Timestamps.GenerateRetardedTimestamp();
            var  postData  = new Dictionary <string, string>
            {
                { "username", username },
                { "timestamp", timestamp.ToString(CultureInfo.InvariantCulture) }
            };
            HttpResponseMessage response =
                await WebRequests.Post("updates", postData, authToken, timestamp.ToString(CultureInfo.InvariantCulture));

            switch (response.StatusCode)
            {
            case HttpStatusCode.OK:
            {
                // Http Request Worked
                string data = await response.Content.ReadAsStringAsync();

                Account parsedData = await JsonConvert.DeserializeObjectAsync <Account>(data);

                // we updated n shit
                return(!parsedData.Logged ? null : parsedData);
            }

            default:
                // Well, f**k
                return(null);
            }
        }
Exemplo n.º 9
0
 static string[] getPermissions(WebRequests server, string appUrl)
 {
     var match = Regex.Match(appUrl, "id=([^&]*)");
     if (match.Success)
     {
         string idApp = match.Groups[1].Value;
         string response = server.Post("https://play.google.com/store/xhr/getdoc?authuser=0", "xhr=1&ids=" + idApp);
     }
     return null;
 }
Exemplo n.º 10
0
        /// <summary>
        /// </summary>
        /// <param name="username"></param>
        /// <param name="authToken"></param>
        /// <param name="friendsOnly"></param>
        /// <param name="friendsToBlock"></param>
        /// <returns></returns>
        public static async Task <bool> UpdateStoryPrivacy(string username, string authToken, bool friendsOnly, List <string> friendsToBlock = null)
        {
            string privacySetting = "";

            if (friendsOnly == false)
            {
                privacySetting = "EVERYONE";
            }
            if (friendsOnly == true && friendsToBlock == null)
            {
                privacySetting = "FRIENDS";
            }
            if (friendsOnly == true && friendsToBlock != null)
            {
                privacySetting = "CUSTOM";
            }
            long timestamp = Timestamps.GenerateRetardedTimestamp();
            var  postData  = new Dictionary <string, string>
            {
                { "username", username },
                { "action", "updateStoryPrivacy" },
                { "privacySetting", privacySetting },
                { "timestamp", timestamp.ToString(CultureInfo.InvariantCulture) }
            };

            if (friendsOnly == true && friendsToBlock != null)
            {
                string blockedFriendsData = "";
                foreach (string s in friendsToBlock)
                {
                    blockedFriendsData += string.Format("'{0}'", s);
                    if (friendsToBlock.IndexOf(s) != friendsToBlock.Count - 1)
                    {
                        blockedFriendsData += ",";
                    }
                }
                postData.Add("storyFriendsToBlock", string.Format("[{0}]", blockedFriendsData));
            }
            HttpResponseMessage response =
                await WebRequests.Post("settings", postData, authToken, timestamp.ToString(CultureInfo.InvariantCulture));

            switch (response.StatusCode)
            {
            case HttpStatusCode.OK:
                return(true);

            default:
                return(false);
            }
        }
Exemplo n.º 11
0
        /// <summary>
        /// </summary>
        /// <param name="snapId"></param>
        /// <param name="username"></param>
        /// <param name="authToken"></param>
        /// <returns></returns>
        public static async Task <byte[]> GetBlob(string snapId, string username, string authToken)
        {
            long timestamp = Timestamps.GenerateRetardedTimestamp();
            var  postData  = new Dictionary <string, string>
            {
                { "id", snapId },
                { "username", username },
                { "timestamp", timestamp.ToString(CultureInfo.InvariantCulture) }
            };

            HttpResponseMessage response =
                await WebRequests.Post("blob", postData, authToken, timestamp.ToString(CultureInfo.InvariantCulture));

            switch (response.StatusCode)
            {
            case HttpStatusCode.OK:
            {
                byte[] data = await response.Content.ReadAsByteArrayAsync();

                // Validate Blob Shit
                byte[] decryptedBlob = null;

                if (Blob.ValidateMediaBlob(data))
                {
                    decryptedBlob = data;
                }
                else
                {
                    data = Blob.DecryptBlob(data);
                    if (Blob.ValidateMediaBlob(data))
                    {
                        decryptedBlob = data;
                    }
                }

                return(decryptedBlob);
            }

            default:
                // Well, f**k
                return(null);
            }
        }
Exemplo n.º 12
0
        /// <summary>
        /// </summary>
        /// <param name="username"></param>
        /// <param name="password"></param>
        public static async Task <Tuple <TempEnumHolder.LoginStatus, Account> > Login(string username, string password)
        {
            long timestamp = Timestamps.GenerateRetardedTimestamp();
            var  postData  = new Dictionary <string, string>
            {
                { "password", password },
                { "username", username },
                { "timestamp", timestamp.ToString(CultureInfo.InvariantCulture) }
            };
            HttpResponseMessage response =
                await WebRequests.Post("login", postData, KeyVault.StaticToken, timestamp.ToString(CultureInfo.InvariantCulture));

            switch (response.StatusCode)
            {
            case HttpStatusCode.OK:
            {
                // Http Request Worked
                string data = await response.Content.ReadAsStringAsync();

                Account parsedData = await JsonConvert.DeserializeObjectAsync <Account>(data);

                // Check if we were logged in
                if (!parsedData.Logged)
                {
                    return
                        (new Tuple <TempEnumHolder.LoginStatus, Account>(
                             TempEnumHolder.LoginStatus.InvalidCredentials, parsedData));
                }

                // Yup, save the data and return true
                return
                    (new Tuple <TempEnumHolder.LoginStatus, Account>(
                         TempEnumHolder.LoginStatus.Success, parsedData));
            }

            default:
                // Well, f**k
                return
                    (new Tuple <TempEnumHolder.LoginStatus, Account>(
                         TempEnumHolder.LoginStatus.ServerError, null));
            }
        }
Exemplo n.º 13
0
        /// <summary>
        /// </summary>
        /// <param name="action"></param>
        /// <param name="username"></param>
        /// <param name="authToken"></param>
        /// <param name="friend"></param>
        /// <param name="postDataEntries"></param>
        public static async Task <FriendAction> Friend(string friend, string action, string username, string authToken,
                                                       Dictionary <string, string> postDataEntries = null)
        {
            long timestamp = Timestamps.GenerateRetardedTimestamp();
            var  postData  = new Dictionary <string, string>
            {
                { "friend", friend },
                { "action", action },
                { "username", username },
                { "timestamp", timestamp.ToString(CultureInfo.InvariantCulture) }
            };

            if (postDataEntries != null)
            {
                foreach (var postDataEntry in postDataEntries)
                {
                    postData.Add(postDataEntry.Key, postDataEntry.Value);
                }
            }

            HttpResponseMessage response =
                await WebRequests.Post("friend", postData, authToken, timestamp.ToString(CultureInfo.InvariantCulture));

            switch (response.StatusCode)
            {
            case HttpStatusCode.OK:
            {
                string data = await response.Content.ReadAsStringAsync();

                FriendAction parsedData = await JsonConvert.DeserializeObjectAsync <FriendAction>(data);

                // Yup, save the data and return true
                return(parsedData);
            }

            default:
                // Well, f**k
                return(null);
            }
        }
Exemplo n.º 14
0
        /// <summary>
        /// </summary>
        /// <param name="username"></param>
        /// <param name="authToken"></param>
        /// <param name="birthMonth"></param>
        /// <param name="birthDay"></param>
        /// <returns></returns>
        public static async Task <bool> UpdateBirthday(string username, string authToken, int birthMonth, int birthDay)
        {
            long timestamp = Timestamps.GenerateRetardedTimestamp();
            var  postData  = new Dictionary <string, string>
            {
                { "username", username },
                { "action", "updateBirthday" },
                { "birthday", string.Format("{0}-{1}", birthMonth, birthDay) },
                { "timestamp", timestamp.ToString(CultureInfo.InvariantCulture) }
            };
            HttpResponseMessage response =
                await WebRequests.Post("settings", postData, authToken, timestamp.ToString(CultureInfo.InvariantCulture));

            switch (response.StatusCode)
            {
            case HttpStatusCode.OK:
                return(true);

            default:
                return(false);
            }
        }
Exemplo n.º 15
0
        /// <summary>
        /// </summary>
        /// <param name="username"></param>
        /// <param name="authToken"></param>
        /// <param name="canViewMatureContent"></param>
        public static async Task <bool> UpdateMaturitySettings(string username, string authToken, bool canViewMatureContent)
        {
            long timestamp = Timestamps.GenerateRetardedTimestamp();
            var  postData  = new Dictionary <string, string>
            {
                { "username", username },
                { "action", "updateCanViewMatureContent" },
                { "canViewMatureContent", canViewMatureContent.ToString() },
                { "timestamp", timestamp.ToString(CultureInfo.InvariantCulture) }
            };
            HttpResponseMessage response =
                await WebRequests.Post("settings", postData, authToken, timestamp.ToString(CultureInfo.InvariantCulture));

            switch (response.StatusCode)
            {
            case HttpStatusCode.OK:
                return(true);

            default:
                return(false);
            }
        }
Exemplo n.º 16
0
        private static void PerformLoginOnPocket(WebRequests client, string username, string password)
        {
            // Explanation of the requests below:
            // In order to perform the LOGIN on Pocket website, we have to issue a POST request with a few parameters in it's BODY.
            // Aside from the obvious "Login" and "Password", there's also a variable parameter called "Form_Check".
            // This "form_check" parameter can be found on the login page, hidden in it's HTML, so what we are going to do is "Extract" this information
            // from there, and use it on the login request to automate the whole login flow.

            // First thing we have to do is to define the actual "Target" for our demo, in this case, it will be "www.imdb.com"
            // To start Simple, let's try to get the HTML of the Home Page of the site. Since we are RETRIEVING a resource (the home page), the request method needs to be a GET
            // But first, let's setup the headers for this request
            client.ClearCookies();
            client.Host      = "getpocket.com";
            client.UserAgent = "Web Crawling 101 Book - Used for educational purposes only";
            client.Referer   = "https://getpocket.com/login?e=4";
            client.Origin    = "https://getpocket.com";
            client.Timeout   = 18000; // 18 Seconds

            // Reaching Home Page for the "Form Check" parameter
            Console.WriteLine(" => Executing GET Request for Home Page");
            string homePageHTML = client.Get("https://getpocket.com/login?e=4");

            // Parsing "Form Check" parameter
            string formCheck = ExtractFormCheckParameter(homePageHTML);

            Console.WriteLine(" => Extracted FormCheck parameter hidden within the HTML '{0}'", formCheck);

            // Formatting the HTTP POST BODY of the LOGIN Request
            string postData = String.Format("feed_id={0}&password={1}&route=&form_check={2}&src=&source=email&source_page=%2Flogin&is_ajax=1",
                                            username, password, formCheck);

            // HTTP Post URl for "Login" on Pocket
            Console.WriteLine(" => Performing Login");
            string pocketLoginUrl = "https://getpocket.com/login_process.php";
            string loginResponse  = client.Post(pocketLoginUrl, postData);

            Console.WriteLine(" => Login Status Code : {0}", client.StatusCode);
            Console.WriteLine(" => Login Response Text : {0}", loginResponse);
        }
Exemplo n.º 17
0
        /// <summary>
        /// </summary>
        /// <param name="friends"></param>
        /// <param name="username"></param>
        /// <param name="authToken"></param>
        public static async Task <Dictionary <string, Best> > GetBests(List <Friend> friends, string username, string authToken)
        {
            var friendsList = new string[friends.Count];

            for (int i = 0; i < friendsList.Length; i++)
            {
                friendsList[i] = friends[i].Name;
            }

            long timestamp = Timestamps.GenerateRetardedTimestamp();
            var  postData  = new Dictionary <string, string>
            {
                { "friend_usernames", JsonConvert.SerializeObject(friendsList) },
                { "username", username },
                { "timestamp", timestamp.ToString(CultureInfo.InvariantCulture) }
            };
            HttpResponseMessage response =
                await WebRequests.Post("bests", postData, authToken, timestamp.ToString(CultureInfo.InvariantCulture));

            switch (response.StatusCode)
            {
            case HttpStatusCode.OK:
            {
                string data = await response.Content.ReadAsStringAsync();

                Dictionary <string, Best> parsedData = await JsonConvert.DeserializeObjectAsync <Dictionary <string, Best> >(data);

                // Yup, save the data and return true
                return(parsedData);
            }

            default:
                // Well, f**k
                return(null);
            }
        }
Exemplo n.º 18
0
        /// <summary>
        /// Executes a Search using the searchField as the search parameter, 
        /// paginates / scrolls the search results to the end adding all the url of apps
        /// it finds to a AWS SQS queue
        /// </summary>
        /// <param name="searchField"></param>
        private static void CrawlStore (string searchField)
        {
            // Console Feedback
            Console.WriteLine ("Crawling Search Term : [ " + searchField + " ]");

            // Compiling Regular Expression used to parse the "pagToken" out of the Play Store
            Regex pagTokenRegex = new Regex (@"GAEi+.+\:S\:.{11}\\42", RegexOptions.Compiled);

            // HTML Response
            string response;

            // MongoDB Helper
            // Configuring MongoDB Wrapper
            MongoDBWrapper mongoDB   = new MongoDBWrapper ();
            string fullServerAddress = String.Join (":", Consts.MONGO_SERVER, Consts.MONGO_PORT);
            mongoDB.ConfigureDatabase (Consts.MONGO_USER, Consts.MONGO_PASS, Consts.MONGO_AUTH_DB, fullServerAddress, Consts.MONGO_TIMEOUT, Consts.MONGO_DATABASE, Consts.MONGO_COLLECTION);

            // Ensuring the database has the proper indexe
            mongoDB.EnsureIndex ("Url");

            // Response Parser
            PlayStoreParser parser = new PlayStoreParser (); 

            // Executing Web Requests
            using (WebRequests server = new WebRequests ())
            {
                // Creating Request Object
                server.Host = Consts.HOST;

                // Executing Initial Request
                response    = server.Post (String.Format (Consts.CRAWL_URL, searchField), Consts.INITIAL_POST_DATA);

                // Parsing Links out of Html Page (Initial Request)                
                foreach (string url in parser.ParseAppUrls (response))
                {
                    // Checks whether the app have been already processed 
                    // or is queued to be processed
                    if ((!mongoDB.AppProcessed (Consts.APP_URL_PREFIX + url)) && (!mongoDB.AppQueued (url)))
                    {
                        // Console Feedback
                        Console.WriteLine (" . Queued App");

                        // Than, queue it :)
                        mongoDB.AddToQueue (url);
                        Thread.Sleep (250); // Hiccup
                    }
                    else
                    {
                        // Console Feedback
                        Console.WriteLine (" . Duplicated App. Skipped");
                    }
                }

                // Executing Requests for more Play Store Links
                int initialSkip       = 48;
                int currentMultiplier = 1;
                int errorsCount       = 0;
                do
                {
                    // Finding pagToken from HTML
                    var rgxMatch = pagTokenRegex.Match (response);

                    // If there's no match, skips it
                    if (!rgxMatch.Success)
                    {
                        break;
                    }

                    // Reading Match from Regex, and applying needed replacements
                    string pagToken = rgxMatch.Value.Replace (":S:", "%3AS%3A").Replace("\\42", String.Empty).Replace(@"\\u003d", String.Empty);

                    // Assembling new PostData with paging values
                    string postData = String.Format (Consts.POST_DATA, pagToken);

                    // Executing request for values
                    response = server.Post (String.Format (Consts.CRAWL_URL, searchField), postData);

                    // Checking Server Status
                    if (server.StatusCode != System.Net.HttpStatusCode.OK)
                    {
                        LogWriter.Error ("Http Error", "Status Code [ " + server.StatusCode + " ]");
                        errorsCount++;
                        continue;
                    }

                    // Parsing Links
                    foreach (string url in parser.ParseAppUrls (response))
                    {
                        // Checks whether the app have been already processed 
                        // or is queued to be processed
                        if ((!mongoDB.AppProcessed (Consts.APP_URL_PREFIX + url)) && (!mongoDB.AppQueued (url)))
                        {
                            // Console Feedback
                            Console.WriteLine (" . Queued App");

                            // Than, queue it :)
                            mongoDB.AddToQueue (url);
                            Thread.Sleep (250); // Hiccup
                        }
                        else
                        {
                            // Console Feedback
                            Console.WriteLine (" . Duplicated App. Skipped");
                        }
                    }

                    // Incrementing Paging Multiplier
                    currentMultiplier++;

                }  while (parser.AnyResultFound (response) && errorsCount <= Consts.MAX_REQUEST_ERRORS);
            }
        }
Exemplo n.º 19
0
        /// <summary>
        /// Executes a Search using the searchField as the search parameter,
        /// paginates / scrolls the search results to the end adding all the url of apps
        /// it finds to a AWS SQS queue
        /// </summary>
        /// <param name="searchField"></param>
        private static void CrawlStore(string searchField)
        {
            // Console Feedback
            Console.WriteLine("Crawling Search Term : [ " + searchField + " ]");

            // HTML Response
            string response;

            // MongoDB Helper
            // Configuring MongoDB Wrapper
            MongoDBWrapper mongoDB           = new MongoDBWrapper();
            string         fullServerAddress = String.Join(":", Consts.MONGO_SERVER, Consts.MONGO_PORT);

            mongoDB.ConfigureDatabase(Consts.MONGO_USER, Consts.MONGO_PASS, Consts.MONGO_AUTH_DB, fullServerAddress, Consts.MONGO_TIMEOUT, Consts.MONGO_DATABASE, Consts.MONGO_COLLECTION);

            // Response Parser
            PlayStoreParser parser = new PlayStoreParser();

            // Executing Web Requests
            using (WebRequests server = new WebRequests())
            {
                // Creating Request Object
                server.Host = Consts.HOST;

                // Executing Initial Request
                response = server.Post(Consts.CRAWL_URL, Consts.INITIAL_POST_DATA);

                // Parsing Links out of Html Page (Initial Request)
                foreach (string url in parser.ParseAppUrls(response))
                {
                    // Checks whether the app have been already processed
                    // or is queued to be processed
                    if ((!mongoDB.AppProcessed(Consts.APP_URL_PREFIX + url)) && (!mongoDB.AppQueued(url)))
                    {
                        // Console Feedback
                        Console.WriteLine(" . Queued App");

                        // Than, queue it :)
                        mongoDB.AddToQueue(url);
                    }
                    else
                    {
                        // Console Feedback
                        Console.WriteLine(" . Duplicated App. Skipped");
                    }
                }

                // Executing Requests for more Play Store Links
                int initialSkip       = 48;
                int currentMultiplier = 1;
                int errorsCount       = 0;
                do
                {
                    // Assembling new PostData with paging values
                    string postData = String.Format(Consts.POST_DATA, (initialSkip * currentMultiplier));

                    // Executing request for values
                    response = server.Post(Consts.CRAWL_URL, postData);

                    // Checking Server Status
                    if (server.StatusCode != System.Net.HttpStatusCode.OK)
                    {
                        LogWriter.Error("Http Error", "Status Code [ " + server.StatusCode + " ]");
                        errorsCount++;
                        continue;
                    }

                    // Parsing Links
                    foreach (string url in parser.ParseAppUrls(response))
                    {
                        // Checks whether the app have been already processed
                        // or is queued to be processed
                        if ((!mongoDB.AppProcessed(Consts.APP_URL_PREFIX + url)) && (!mongoDB.AppQueued(url)))
                        {
                            // Console Feedback
                            Console.WriteLine(" . Queued App");

                            // Than, queue it :)
                            mongoDB.AddToQueue(url);
                        }
                        else
                        {
                            // Console Feedback
                            Console.WriteLine(" . Duplicated App. Skipped");
                        }
                    }

                    // Incrementing Paging Multiplier
                    currentMultiplier++;
                }  while (parser.AnyResultFound(response) && errorsCount <= Consts.MAX_REQUEST_ERRORS);
            }
        }
Exemplo n.º 20
0
        /// <summary>
        /// Executes a Search using the searchField as the search parameter, 
        /// paginates / scrolls the search results to the end adding all the url of apps
        /// it finds to a Mongo "QUEUE" collection
        /// </summary>
        /// <param name="searchField"></param>
        private static void CrawlStore (string searchField, bool shouldUseProxies)
        {
            // Console Feedback
			_logger.Warn ("Crawling Search Term : [ " + searchField + " ]");

			// Hashset of urls used to keep track of what's been parsed already
			HashSet<String> foundUrls = new HashSet<String> ();

			// Control variable to avoid "Loop" on pagging
			bool isDonePagging = false;

            // Compiling Regular Expression used to parse the "pagToken" out of the Play Store
            Regex pagTokenRegex = new Regex (@"GAEi+.+\:S\:.{11}\\42", RegexOptions.Compiled);

            // HTML Response
            string response;

            // MongoDB Helper
            // Configuring MongoDB Wrapper
            MongoDBWrapper mongoDB   = new MongoDBWrapper ();
            string fullServerAddress = String.Join (":", Consts.MONGO_SERVER, Consts.MONGO_PORT);
            mongoDB.ConfigureDatabase (Consts.MONGO_USER, Consts.MONGO_PASS, Consts.MONGO_AUTH_DB, fullServerAddress, Consts.MONGO_TIMEOUT, Consts.MONGO_DATABASE, Consts.MONGO_COLLECTION);

            // Ensuring the database has the proper indexe
            mongoDB.EnsureIndex ("Url");

            // Response Parser
            PlayStoreParser parser = new PlayStoreParser (); 

            // Executing Web Requests
            using (WebRequests server = new WebRequests ())
            {
                // Creating Request Object
                server.Headers.Add (Consts.ACCEPT_LANGUAGE);
                server.Host      = Consts.HOST;
                server.UserAgent = Consts.GITHUBURL;
                server.Encoding  = "utf-8";

                // Checking for the need to use "HTTP Proxies"
                if (shouldUseProxies)
                {
                    server.Proxy = ProxiesLoader.GetWebProxy ();
                }   

                // Executing Initial Request
                response    = server.Post (String.Format (Consts.CRAWL_URL, searchField), Consts.INITIAL_POST_DATA);

                // Parsing Links out of Html Page (Initial Request)                
                foreach (string url in parser.ParseAppUrls (response))
                {
                    // Checks whether the app have been already processed 
					// or is queued to be processed
					foundUrls.Add (url);
                    if ((!mongoDB.AppProcessed (Consts.APP_URL_PREFIX + url)) && (!mongoDB.AppQueued (url)))
                    {
                        // Than, queue it :)
                        mongoDB.AddToQueue (url);
                        Thread.Sleep (250); // Hiccup
                    }
                }

                // Executing Requests for more Play Store Links
                int initialSkip       = 48;
                int currentMultiplier = 1;
                int errorsCount       = 0;
                do
                {
                    // Finding pagToken from HTML
                    var rgxMatch = pagTokenRegex.Match (response);

                    // If there's no match, skips it
                    if (!rgxMatch.Success)
                    {
                        break;
                    }

                    // Reading Match from Regex, and applying needed replacements
                    string pagToken = rgxMatch.Value.Replace (":S:", "%3AS%3A").Replace("\\42", String.Empty).Replace(@"\\u003d", String.Empty);

                    // Assembling new PostData with paging values
                    string postData = String.Format (Consts.POST_DATA, pagToken);

                    // Executing request for values
                    response = server.Post (String.Format (Consts.CRAWL_URL, searchField), postData);

                    // Checking Server Status
                    if (server.StatusCode != System.Net.HttpStatusCode.OK)
                    {
                        _logger.Error ("Http Error" + " - Status Code [ " + server.StatusCode + " ]");
                        errorsCount++;
                        continue;
                    }

                    // Parsing Links
                    foreach (string url in parser.ParseAppUrls (response))
					{
						if (foundUrls.Contains (url))
						{
							isDonePagging = true;
							break;
						}
                        // Checks whether the app have been already processed 
						foundUrls.Add (url);
                        if ((!mongoDB.AppProcessed (Consts.APP_URL_PREFIX + url)) && (!mongoDB.AppQueued (url)))
                        {
                            // Than, queue it :)
                            mongoDB.AddToQueue (url);
                            Thread.Sleep (250); // Hiccup
                        }
                    }

                    // Incrementing Paging Multiplier
                    currentMultiplier++;

				}  while (!isDonePagging && parser.AnyResultFound (response) && errorsCount <= Consts.MAX_REQUEST_ERRORS);
            }
        }
Exemplo n.º 21
0
        /// <summary>
        /// 
        /// </summary>
        /// <param name="categoryUrl"></param>
        private static void CrawlCategory (string categoryUrl, string categoryName, bool shouldUseProxies)
        {
            // Console Feedback
            _logger.Warn ("Crawling Category : [ " + categoryName + " ]");

            // Hashset of urls used to keep track of what's been parsed already
            HashSet<String> foundUrls = new HashSet<String> ();

            // Control variable to avoid "Loop" on pagging
            bool isDonePagging = false;

            // HTML Response
            string response;

            // MongoDB Helper
            // Configuring MongoDB Wrapper
            MongoDBWrapper mongoDB   = new MongoDBWrapper ();
            string fullServerAddress = String.Join (":", Consts.MONGO_SERVER, Consts.MONGO_PORT);
            mongoDB.ConfigureDatabase (Consts.MONGO_USER, Consts.MONGO_PASS, Consts.MONGO_AUTH_DB, fullServerAddress, Consts.MONGO_TIMEOUT, Consts.MONGO_DATABASE, Consts.MONGO_COLLECTION);

            // Ensuring the database has the proper indexe
            mongoDB.EnsureIndex ("Url");

            // Response Parser
            PlayStoreParser parser = new PlayStoreParser (); 

            // Executing Web Requests
            using (WebRequests server = new WebRequests ())
            {
                // Creating Request Object
                server.Headers.Add (Consts.ACCEPT_LANGUAGE);
                server.Host      = Consts.HOST;
                server.UserAgent = Consts.GITHUBURL;
                server.Encoding  = "utf-8";

                // Executing Initial Request
                response = server.Get (categoryUrl);

                // Parsing Links out of Html Page (Initial Request)                
                foreach (string url in parser.ParseAppUrls (response))
                {
                    // Saving found url on local hashset
                    foundUrls.Add (url);

                    // Checks whether the app have been already processed 
                    // or is queued to be processed
                    if ((!mongoDB.AppProcessed (Consts.APP_URL_PREFIX + url)) && (!mongoDB.AppQueued (url)))
                    {
                        // Than, queue it :)
                        mongoDB.AddToQueue (url);
                    }
                }

                // Executing Requests for more Play Store Links
                int baseSkip       = 60;
                int currentMultiplier = 1;
                int errorsCount       = 0;
                do
                {
                    // Assembling new PostData with paging values
                    string postData = String.Format (Consts.CATEGORIES_POST_DATA, (currentMultiplier * baseSkip), baseSkip);

                    // Executing request for values
                    response = server.Post (String.Format (categoryUrl + "?authuser=0"), postData);

                    // Checking Server Status
                    if (server.StatusCode != System.Net.HttpStatusCode.OK)
                    {
                        _logger.Error ("Http Error" + " - Status Code [ " + server.StatusCode + " ]");
                        errorsCount++;
                        continue;
                    }

                    // Parsing Links
                    foreach (string url in parser.ParseAppUrls (response))
                    {
                        // If a certain app is found twice, it means that the "pagging" logic got stuck into a 
                        // Loop, so the all the apps for this category were parsed already
                        if (foundUrls.Contains (url))
                        {
                            isDonePagging = true;
                            break;
                        }

                        // Saving found url on local hashset
                        foundUrls.Add (url);

                        // Checks whether the app have been already processed 
                        // or is queued to be processed
                        if ((!mongoDB.AppProcessed (Consts.APP_URL_PREFIX + url)) && (!mongoDB.AppQueued (url)))
                        {
                            // Than, queue it :)
                            mongoDB.AddToQueue (url);
                        }
                    }

                    // Incrementing Paging Multiplier
                    currentMultiplier++;

                }  while (!isDonePagging && errorsCount <= Consts.MAX_REQUEST_ERRORS);
            }
        }
Exemplo n.º 22
0
        public static List <AppShortDescription> CollectAppsShortInformationFromCategories(string category)
        {
            List <AppShortDescription> parsedApps_list = new List <AppShortDescription>();


            log.Info("Crawling Category : [ " + category + " ]");

            int numberOfCyclesCompleted = 0;

            while (numberOfCyclesCompleted < Consts.CATEGORY_NUMBER_OF_CYCLES)
            {
                string crawlUrl = String.Format(Consts.CRAWL_URL_CATEGORY, category, "Russia");
                string postData = String.Format(Consts.POST_DATA_CATEGORY, Consts.CATEGORY_NUMBER_OF_APPS_PER_CYCLE * numberOfCyclesCompleted);
                numberOfCyclesCompleted++;
                //Console.WriteLine(postDataTest);


                // HTML Response
                string response = string.Empty;

                // Executing Web Requests
                using (WebRequests server = new WebRequests())
                {
                    // Creating Request Object
                    server.Host = Consts.HOST;

                    //  this is how we actually connect to all this shit
                    //  the only thing left - we need to randomize it and check if 200
                    //WebProxy proxyObject = new WebProxy("http://" + ProxyLoader.ReturnRandomProxy(), true);
                    //server.Proxy = proxyObject;

                    int insertedAppCount = 0;
                    int skippedAppCount  = 0;
                    int errorsCount      = 0;

                    // Executing Request
                    response = server.Post(crawlUrl, postData);

                    // Checking Server Status
                    if (server.StatusCode != System.Net.HttpStatusCode.OK)
                    {
                        log.Error("Http Error - Status Code: " + server.StatusCode);

                        errorsCount++;

                        if (errorsCount > Consts.MAX_REQUEST_ERRORS)
                        {
                            log.Info("Crawl Stopped: MAX_REQUEST_ERRORS reached");
                            break;
                        }
                        else
                        {
                            continue;
                        }
                    }


                    //var kek1 = parser.ParseAppUrls(response);


                    // Parsing Links out of Html Page
                    foreach (AppShortDescription asd in parser.ParseAppUrls(response))
                    {
                        if (!parsedApps_list.Contains(asd))
                        {
                            parsedApps_list.Add(asd);

                            log.Info("Inserted App: " + asd);

                            ++insertedAppCount;
                        }
                        else
                        {
                            ++skippedAppCount;
                            log.Info("Duplicated App. Skipped: " + asd);
                        }
                    }

exit:
                    log.Info("Inserted App Count: " + insertedAppCount);
                    log.Info("Skipped App Count: " + skippedAppCount);
                    log.Info("Error Count: " + errorsCount + "\n");
                }
            }

            return(parsedApps_list);
        }
Exemplo n.º 23
0
        /// <summary>
        /// Executes a Search using the searchField as the search parameter, 
        /// paginates / scrolls the search results to the end adding all the url of apps
        /// it finds to a AWS SQS queue
        /// </summary>
        /// <param name="searchField"></param>
        private static void CrawlStore (string searchField)
        {
            // Console Feedback
            Console.WriteLine ("Crawling Search Term : [ " + searchField + " ]");

            // HTML Response
            string response;

            // MongoDB Helper
            // Configuring MongoDB Wrapper
            MongoDBWrapper mongoDB   = new MongoDBWrapper ();
            string fullServerAddress = String.Join (":", Consts.MONGO_SERVER, Consts.MONGO_PORT);
            mongoDB.ConfigureDatabase (Consts.MONGO_USER, Consts.MONGO_PASS, Consts.MONGO_AUTH_DB, fullServerAddress, Consts.MONGO_TIMEOUT, Consts.MONGO_DATABASE, Consts.MONGO_COLLECTION);

            // Response Parser
            PlayStoreParser parser = new PlayStoreParser (); 

            // Executing Web Requests
            using (WebRequests server = new WebRequests ())
            {
                // Creating Request Object
                server.Host = Consts.HOST;

                // Executing Initial Request
                response    = server.Post (Consts.CRAWL_URL, Consts.INITIAL_POST_DATA);

                // Parsing Links out of Html Page (Initial Request)                
                foreach (string url in parser.ParseAppUrls (response))
                {
                    // Checks whether the app have been already processed 
                    // or is queued to be processed
                    if ((!mongoDB.AppProcessed (Consts.APP_URL_PREFIX + url)) && (!mongoDB.AppQueued (url)))
                    {
                        // Console Feedback
                        Console.WriteLine (" . Queued App");

                        // Than, queue it :)
                        mongoDB.AddToQueue (url);
                    }
                    else
                    {
                        // Console Feedback
                        Console.WriteLine (" . Duplicated App. Skipped");
                    }
                }

                // Executing Requests for more Play Store Links
                int initialSkip       = 48;
                int currentMultiplier = 1;
                int errorsCount       = 0;
                do
                {
                    // Assembling new PostData with paging values
                    string postData = String.Format (Consts.POST_DATA, (initialSkip * currentMultiplier));

                    // Executing request for values
                    response = server.Post (Consts.CRAWL_URL, postData);

                    // Checking Server Status
                    if (server.StatusCode != System.Net.HttpStatusCode.OK)
                    {
                        LogWriter.Error ("Http Error", "Status Code [ " + server.StatusCode + " ]");
                        errorsCount++;
                        continue;
                    }

                    // Parsing Links
                    foreach (string url in parser.ParseAppUrls (response))
                    {
                        // Checks whether the app have been already processed 
                        // or is queued to be processed
                        if ((!mongoDB.AppProcessed (Consts.APP_URL_PREFIX + url)) && (!mongoDB.AppQueued (url)))
                        {
                            // Console Feedback
                            Console.WriteLine (" . Queued App");

                            // Than, queue it :)
                            mongoDB.AddToQueue (url);
                        }
                        else
                        {
                            // Console Feedback
                            Console.WriteLine (" . Duplicated App. Skipped");
                        }
                    }

                    // Incrementing Paging Multiplier
                    currentMultiplier++;

                }  while (parser.AnyResultFound (response) && errorsCount <= Consts.MAX_REQUEST_ERRORS);
            }
        }
Exemplo n.º 24
0
        private static ISet <string> CollectAppUrls(string searchField, int maxAppUrls)
        {
            ISet <string> resultUrls = new HashSet <string>();

            log.Info("Crawling Search Term : [ " + searchField + " ]");

            string crawlUrl = String.Format(Consts.CRAWL_URL, searchField);

            // HTML Response
            string response;

            // Executing Web Requests
            using (WebRequests server = new WebRequests())
            {
                // Creating Request Object
                server.Host = Consts.HOST;

                int insertedAppCount = 0;
                int skippedAppCount  = 0;
                int errorsCount      = 0;

                string postData = Consts.INITIAL_POST_DATA;

                do
                {
                    // Executing Request
                    response = server.Post(crawlUrl, postData);

                    // Checking Server Status
                    if (server.StatusCode != System.Net.HttpStatusCode.OK)
                    {
                        log.Error("Http Error - Status Code: " + server.StatusCode);

                        errorsCount++;

                        if (errorsCount > Consts.MAX_REQUEST_ERRORS)
                        {
                            log.Info("Crawl Stopped: MAX_REQUEST_ERRORS reached");
                            break;
                        }
                        else
                        {
                            continue;
                        }
                    }

                    // Parsing Links out of Html Page
                    foreach (string url in parser.ParseAppUrls(response))
                    {
                        if (!resultUrls.Contains(url))
                        {
                            resultUrls.Add(url);

                            log.Info("Inserted App: " + url);

                            ++insertedAppCount;

                            if (maxAppUrls > 0 && insertedAppCount >= maxAppUrls)
                            {
                                goto exit;
                            }
                        }
                        else
                        {
                            ++skippedAppCount;
                            log.Info("Duplicated App. Skipped: " + url);
                        }
                    }

                    // Get pagTok value that will be used to fetch next stream data.
                    // If not found, that means we have reached the end of stream.
                    string pagTok = getPageToken(response);
                    if (pagTok.Length == 0)
                    {
                        break;
                    }

                    // Build the next post data
                    postData = String.Format(Consts.POST_DATA, pagTok);
                } while (true);

exit:
                log.Info("Inserted App Count: " + insertedAppCount);
                log.Info("Skipped App Count: " + skippedAppCount);
                log.Info("Error Count: " + errorsCount + "\n");
            }

            return(resultUrls);
        }
Exemplo n.º 25
0
        /// <summary>
        /// Executes a Search using the searchField as the search parameter,
        /// paginates / scrolls the search results to the end adding all the url of apps
        /// it finds to a Mongo "QUEUE" collection
        /// </summary>
        /// <param name="searchField"></param>
        private static void CrawlStore(string searchField, bool shouldUseProxies)
        {
            // Console Feedback
            _logger.Warn("Crawling Search Term : [ " + searchField + " ]");

            // Hashset of urls used to keep track of what's been parsed already
            HashSet <String> foundUrls = new HashSet <String> ();

            // Control variable to avoid "Loop" on pagging
            bool isDonePagging = false;

            // Compiling Regular Expression used to parse the "pagToken" out of the Play Store
            Regex pagTokenRegex = new Regex(@"GAEi+.+\:S\:.{11}\\x22", RegexOptions.Compiled);

            // HTML Response
            string response;

            // Response Parser
            PlayStoreParser parser = new PlayStoreParser();

            // Executing Web Requests
            using (WebRequests server = new WebRequests())
            {
                // Creating Request Object
                server.Headers.Add(Consts.ACCEPT_LANGUAGE);
                server.Host      = Consts.HOST;
                server.UserAgent = Consts.GITHUBURL;
                server.Encoding  = "utf-8";

                // Checking for the need to use "HTTP Proxies"
                if (shouldUseProxies)
                {
                    server.Proxy = ProxiesLoader.GetWebProxy();
                }

                // Executing Initial Request
                response = server.Post(String.Format(Consts.CRAWL_URL, searchField), Consts.INITIAL_POST_DATA);

                // Parsing Links out of Html Page (Initial Request)
                foreach (string url in parser.ParseAppUrls(response))
                {
                    // Checks whether the app have been already processed
                    // or is queued to be processed
                    foundUrls.Add(url);
                    if ((!_mongoDB.AppProcessed(Consts.APP_URL_PREFIX + url)) && (!_mongoDB.AppQueued(url)))
                    {
                        // Than, queue it :)
                        _mongoDB.AddToQueue(url);
                        Thread.Sleep(250);  // Hiccup
                    }
                }

                // Executing Requests for more Play Store Links
                int initialSkip       = 48;
                int currentMultiplier = 1;
                int errorsCount       = 0;
                do
                {
                    // Finding pagToken from HTML
                    var rgxMatch = pagTokenRegex.Match(response);

                    // If there's no match, skips it
                    if (!rgxMatch.Success)
                    {
                        break;
                    }

                    // Reading Match from Regex, and applying needed replacements
                    string pagToken = rgxMatch.Value.Replace(":S:", "%3AS%3A").Replace("\\x22", String.Empty).Replace(@"\\u003d", String.Empty);

                    // Assembling new PostData with paging values
                    string postData = String.Format(Consts.POST_DATA, pagToken);

                    // Executing request for values
                    response = server.Post(String.Format(Consts.CRAWL_URL, searchField), postData);

                    // Checking Server Status
                    if (server.StatusCode != System.Net.HttpStatusCode.OK)
                    {
                        _logger.Error("Http Error" + " - Status Code [ " + server.StatusCode + " ]");
                        errorsCount++;
                        continue;
                    }

                    // Parsing Links
                    foreach (string url in parser.ParseAppUrls(response))
                    {
                        if (foundUrls.Contains(url))
                        {
                            isDonePagging = true;
                            break;
                        }
                        // Checks whether the app have been already processed
                        foundUrls.Add(url);
                        if ((!_mongoDB.AppProcessed(Consts.APP_URL_PREFIX + url)) && (!_mongoDB.AppQueued(url)))
                        {
                            // Than, queue it :)
                            _mongoDB.AddToQueue(url);
                            Thread.Sleep(250);  // Hiccup
                        }
                    }

                    // Incrementing Paging Multiplier
                    currentMultiplier++;
                }  while (!isDonePagging && parser.AnyResultFound(response) && errorsCount <= Consts.MAX_REQUEST_ERRORS);
            }
        }
Exemplo n.º 26
0
        /// <summary>
        ///
        /// </summary>
        /// <param name="categoryUrl"></param>
        private static void CrawlCategory(string categoryUrl, string categoryName, bool shouldUseProxies)
        {
            // Console Feedback
            _logger.Warn("Crawling Category : [ " + categoryName + " ]");

            // Hashset of urls used to keep track of what's been parsed already
            HashSet <String> foundUrls = new HashSet <String> ();

            // Control variable to avoid "Loop" on pagging
            bool isDonePagging = false;

            // HTML Response
            string response;

            // MongoDB Helper
            // Configuring MongoDB Wrapper
            MongoDBWrapper mongoDB           = new MongoDBWrapper();
            string         fullServerAddress = String.Join(":", Consts.MONGO_SERVER, Consts.MONGO_PORT);

            mongoDB.ConfigureDatabase(Consts.MONGO_USER, Consts.MONGO_PASS, Consts.MONGO_AUTH_DB, fullServerAddress, Consts.MONGO_TIMEOUT, Consts.MONGO_DATABASE, Consts.MONGO_COLLECTION);

            // Ensuring the database has the proper indexe
            mongoDB.EnsureIndex("Url");

            // Response Parser
            PlayStoreParser parser = new PlayStoreParser();

            // Executing Web Requests
            using (WebRequests server = new WebRequests())
            {
                // Creating Request Object
                server.Headers.Add(Consts.ACCEPT_LANGUAGE);
                server.Host      = Consts.HOST;
                server.UserAgent = Consts.GITHUBURL;
                server.Encoding  = "utf-8";

                // Executing Initial Request
                response = server.Get(categoryUrl);

                // Parsing Links out of Html Page (Initial Request)
                foreach (string url in parser.ParseAppUrls(response))
                {
                    // Saving found url on local hashset
                    foundUrls.Add(url);

                    // Checks whether the app have been already processed
                    // or is queued to be processed
                    if ((!mongoDB.AppProcessed(Consts.APP_URL_PREFIX + url)) && (!mongoDB.AppQueued(url)))
                    {
                        // Than, queue it :)
                        mongoDB.AddToQueue(url);
                    }
                }

                // Executing Requests for more Play Store Links
                int baseSkip          = 60;
                int currentMultiplier = 1;
                int errorsCount       = 0;
                do
                {
                    // Assembling new PostData with paging values
                    string postData = String.Format(Consts.CATEGORIES_POST_DATA, (currentMultiplier * baseSkip), baseSkip);

                    // Executing request for values
                    response = server.Post(String.Format(categoryUrl + "?authuser=0"), postData);

                    // Checking Server Status
                    if (server.StatusCode != System.Net.HttpStatusCode.OK)
                    {
                        _logger.Error("Http Error" + " - Status Code [ " + server.StatusCode + " ]");
                        errorsCount++;
                        continue;
                    }

                    // Parsing Links
                    foreach (string url in parser.ParseAppUrls(response))
                    {
                        // If a certain app is found twice, it means that the "pagging" logic got stuck into a
                        // Loop, so the all the apps for this category were parsed already
                        if (foundUrls.Contains(url))
                        {
                            isDonePagging = true;
                            break;
                        }

                        // Saving found url on local hashset
                        foundUrls.Add(url);

                        // Checks whether the app have been already processed
                        // or is queued to be processed
                        if ((!mongoDB.AppProcessed(Consts.APP_URL_PREFIX + url)) && (!mongoDB.AppQueued(url)))
                        {
                            // Than, queue it :)
                            mongoDB.AddToQueue(url);
                        }
                    }

                    // Incrementing Paging Multiplier
                    currentMultiplier++;
                }  while (!isDonePagging && errorsCount <= Consts.MAX_REQUEST_ERRORS);
            }
        }
Exemplo n.º 27
0
        /// <summary>
        /// Executes a Search using the searchField as the search parameter,
        /// paginates / scrolls the search results to the end adding all the url of apps
        /// it finds to a AWS SQS queue
        /// </summary>
        /// <param name="searchField"></param>
        private static void CrawlStore(string searchField)
        {
            // Console Feedback
            Console.WriteLine("Crawling Search Term : [ " + searchField + " ]");

            // Compiling Regular Expression used to parse the "pagToken" out of the Play Store
            Regex pagTokenRegex = new Regex(@"GAEi+.+\:S\:.{11}\\42", RegexOptions.Compiled);

            // HTML Response
            string response;

            // MongoDB Helper
            // Configuring MongoDB Wrapper
            MongoDBWrapper mongoDB           = new MongoDBWrapper();
            string         fullServerAddress = String.Join(":", Consts.MONGO_SERVER, Consts.MONGO_PORT);

            mongoDB.ConfigureDatabase(Consts.MONGO_USER, Consts.MONGO_PASS, Consts.MONGO_AUTH_DB, fullServerAddress, Consts.MONGO_TIMEOUT, Consts.MONGO_DATABASE, Consts.MONGO_COLLECTION);

            // Ensuring the database has the proper indexe
            mongoDB.EnsureIndex("Url");

            // Response Parser
            PlayStoreParser parser = new PlayStoreParser();

            // Executing Web Requests
            using (WebRequests server = new WebRequests())
            {
                // Creating Request Object
                server.Host = Consts.HOST;

                // Executing Initial Request
                response = server.Post(String.Format(Consts.CRAWL_URL, searchField), Consts.INITIAL_POST_DATA);

                // Parsing Links out of Html Page (Initial Request)
                foreach (string url in parser.ParseAppUrls(response))
                {
                    // Checks whether the app have been already processed
                    // or is queued to be processed
                    if ((!mongoDB.AppProcessed(Consts.APP_URL_PREFIX + url)) && (!mongoDB.AppQueued(url)))
                    {
                        // Console Feedback
                        Console.WriteLine(" . Queued App");

                        // Than, queue it :)
                        mongoDB.AddToQueue(url);
                        Thread.Sleep(250);  // Hiccup
                    }
                    else
                    {
                        // Console Feedback
                        Console.WriteLine(" . Duplicated App. Skipped");
                    }
                }

                // Executing Requests for more Play Store Links
                int initialSkip       = 48;
                int currentMultiplier = 1;
                int errorsCount       = 0;
                do
                {
                    // Finding pagToken from HTML
                    var rgxMatch = pagTokenRegex.Match(response);

                    // If there's no match, skips it
                    if (!rgxMatch.Success)
                    {
                        break;
                    }

                    // Reading Match from Regex, and applying needed replacements
                    string pagToken = rgxMatch.Value.Replace(":S:", "%3AS%3A").Replace("\\42", String.Empty).Replace(@"\\u003d", String.Empty);

                    // Assembling new PostData with paging values
                    string postData = String.Format(Consts.POST_DATA, pagToken);

                    // Executing request for values
                    response = server.Post(String.Format(Consts.CRAWL_URL, searchField), postData);

                    // Checking Server Status
                    if (server.StatusCode != System.Net.HttpStatusCode.OK)
                    {
                        LogWriter.Error("Http Error", "Status Code [ " + server.StatusCode + " ]");
                        errorsCount++;
                        continue;
                    }

                    // Parsing Links
                    foreach (string url in parser.ParseAppUrls(response))
                    {
                        // Checks whether the app have been already processed
                        // or is queued to be processed
                        if ((!mongoDB.AppProcessed(Consts.APP_URL_PREFIX + url)) && (!mongoDB.AppQueued(url)))
                        {
                            // Console Feedback
                            Console.WriteLine(" . Queued App");

                            // Than, queue it :)
                            mongoDB.AddToQueue(url);
                            Thread.Sleep(250);  // Hiccup
                        }
                        else
                        {
                            // Console Feedback
                            Console.WriteLine(" . Duplicated App. Skipped");
                        }
                    }

                    // Incrementing Paging Multiplier
                    currentMultiplier++;
                }  while (parser.AnyResultFound(response) && errorsCount <= Consts.MAX_REQUEST_ERRORS);
            }
        }
Exemplo n.º 28
0
        public static List <AppShortDescription> CollectAppsShortInformationFromKeywords(string keyword)
        {
            List <AppShortDescription> parsedApps_list = new List <AppShortDescription> ();


            log.Info("Crawling Search Term : [ " + keyword + " ]");

            string crawlUrl = String.Format(Consts.CRAWL_URL_KEYWORD_INITIAL, keyword, "Russia", "ru");

            string postData = Consts.POST_DATA_KEYWORD_INITAL;

            // HTML Response
            string response = string.Empty;

            // Executing Web Requests
            using (WebRequests server = new WebRequests())
            {
                // Creating Request Object
                server.Host = Consts.HOST;

                //  this is how we actually connect to all this shit
                //  the only thing left - we need to randomize it and check if 200
                //WebProxy proxyObject = new WebProxy("http://" + ProxyLoader.ReturnRandomProxy(), true);
                //server.Proxy = proxyObject;

                int insertedAppCount = 0;
                int skippedAppCount  = 0;
                int errorsCount      = 0;

                do
                {
                    // Executing Request
                    response = server.Post(crawlUrl, postData);

                    // Checking Server Status
                    if (server.StatusCode != System.Net.HttpStatusCode.OK)
                    {
                        log.Error("Http Error - Status Code: " + server.StatusCode);

                        errorsCount++;

                        if (errorsCount > Consts.MAX_REQUEST_ERRORS)
                        {
                            log.Info("Crawl Stopped: MAX_REQUEST_ERRORS reached");
                            break;
                        }
                        else
                        {
                            continue;
                        }
                    }


                    //var kek1 = parser.ParseAppUrls(response);


                    // Parsing Links out of Html Page
                    foreach (AppShortDescription asd in parser.ParseAppUrls(response))
                    {
                        if (!parsedApps_list.Contains(asd))
                        {
                            parsedApps_list.Add(asd);

                            log.Info("Inserted App: " + asd);

                            ++insertedAppCount;

                            //if (maxAppUrls > 0 && insertedAppCount >= maxAppUrls)
                            //{
                            //    goto exit;
                            //}
                        }
                        else
                        {
                            ++skippedAppCount;
                            log.Info("Duplicated App. Skipped: " + asd);
                        }
                    }

                    // Get pagTok value that will be used to fetch next stream data.
                    // If not found, that means we have reached the end of stream.
                    ClusterAndToken cat_cl = getPageAndClusterTokens(response);
                    if (cat_cl == null)
                    {
                        break;
                    }
                    else
                    {
                        crawlUrl = Consts.CRAWL_URL_KEYWORD_CLUSTER;
                        postData = String.Format(Consts.POST_DATA_KEYWORD_CLUSTER, cat_cl.clp, cat_cl.pagTok);
                    }
                    Console.WriteLine("Inserted apps: " + insertedAppCount + ".");
                } while (true);

exit:
                log.Info("Inserted App Count: " + insertedAppCount);
                log.Info("Skipped App Count: " + skippedAppCount);
                log.Info("Error Count: " + errorsCount + "\n");
            }

            return(parsedApps_list);
        }