コード例 #1
0
        public void ProcessRequest(HttpContext context)
        {
            context.Response.ContentType = "text/plain";
            try
            {
                Stream       httpStream       = context.Request.InputStream;
                StreamReader httpStreamReader = new StreamReader(httpStream);
                Resource     newResource      = FHIRUtilities.StreamToFHIR(httpStreamReader);
                _patient = (Patient)newResource;

                //find all patient without fingerprints.  should be a small sample.
                //if found, return localid
                //if not found, return "no match found"
                MongoDBWrapper  dbwrapper = new MongoDBWrapper(NoIDMongoDBAddress, SparkMongoDBAddress);
                AlternateSearch altSearch = GetAlternateFromPatient(_patient);
                string          localNoID = dbwrapper.AlternateSearch(altSearch);
                if (localNoID.ToLower().Contains("noid://") == false)
                {
                    dbwrapper.AddAlternateSearch(altSearch);
                    _responseText = "no match found";
                }
                else
                {
                    _responseText = localNoID;
                }
            }
            catch (Exception ex)
            {
                _responseText = "Error in AltMatchByDemographics::ProcessRequest: " + ex.Message;
                LogUtilities.LogEvent(_responseText);
            }
            context.Response.Write(_responseText);
            context.Response.End();
        }
コード例 #2
0
ファイル: Program.cs プロジェクト: AmirAbrams/noid-1
        static void Main(string[] args)
        {
            string commandLine = "";

            Console.WriteLine("Enter C for checkin patient, P for pending patient queue, M for Mongo tests, F for fingerprint identity and Q to quit");
            while (commandLine != "q")
            {
                if (commandLine == "c")
                {
                    // call PatentCheckinUri
                    Console.WriteLine("Sending test patient FHIR message.");
                    Patient testPt = TestPatient();
                    SendJSON(testPt);
                    Console.WriteLine("Sending FHIR message from file.");
                    Patient readPt = ReadPatient(@"C:\JSONTest\sample-new-patient.json");
                    SendJSON(readPt);
                }
                else if (commandLine == "p") //send profiles
                {
                    // call PendingPatientsUri
                    IList <PatientProfile> patientProfiles = GetCheckinList();
                    Console.WriteLine("Patient profiles received.");
                }
                else if (commandLine == "m") // MongoDB tests
                {
                    MongoDBWrapper dbwrapper = new MongoDBWrapper(NoIDMongoDBAddress, SparkMongoDBAddress);
                    SessionQueue   seq       = new SessionQueue();
                    seq._id                 = Guid.NewGuid().ToString();
                    seq.ClinicArea          = "Test Clinic";
                    seq.LocalReference      = "123456";
                    seq.SparkReference      = "spark5";
                    seq.ApprovalStatus      = "pending";
                    seq.PatientStatus       = "new";
                    seq.RemoteHubReference  = "rem440403";
                    seq.SessionComputerName = "Prototype Computer 1";
                    seq.SubmitDate          = DateTime.UtcNow.AddMinutes(-15);
                    seq.PatientBeginDate    = DateTime.UtcNow.AddMinutes(-19);
                    Console.WriteLine(seq.Serialize());
                    dbwrapper.AddPendingPatient(seq);
                    List <SessionQueue> PendingPatients = dbwrapper.GetPendingPatients();
                    dbwrapper.UpdateSessionQueueRecord(seq._id, "approved", "TestUser", "TestComputer");
                }
                else if (commandLine == "f") // test fingerprint identity web service
                {
                    Media readMedia = ReadMedia(@"C:\JSONTest\sample-media-fhir-message.json");
                    SendJSON(readMedia);
                }
                string previousCommand = commandLine;
                commandLine = Console.ReadLine();
                if (commandLine.Length > 0)
                {
                    commandLine = commandLine.ToLower().Substring(0, 1);
                }
                else
                {
                    commandLine = previousCommand;
                }
            }
        }
コード例 #3
0
 public void TestFixtureSetup()
 {
     _mongoTest = new MongoDBWrapper();
     _mongoTest.KillServerAndCleanup();
     _mongoTest.StartServer();
     _mongoTest.ConnectToTestDB();
     _mongoTest.LoadDataset();
 }
コード例 #4
0
        // Static Constructor
        public MongoDBRepository()
        {
            _logger = LogManager.GetCurrentClassLogger();

            _mongoDB = new MongoDBWrapper();
            string fullServerAddress = String.Join(":", Consts.MONGO_SERVER, Consts.MONGO_PORT);

            _mongoDB.ConfigureDatabase(Consts.MONGO_USER, Consts.MONGO_PASS, Consts.MONGO_AUTH_DB, fullServerAddress, Consts.MONGO_TIMEOUT, Consts.MONGO_DATABASE, Consts.MONGO_COLLECTION);
        }
コード例 #5
0
        public void ProcessRequest(HttpContext context)
        {
            context.Response.ContentType = "text/plain";
            try
            {
                foreach (String key in context.Request.QueryString.AllKeys)
                {
                    switch (key)
                    {
                    case "sessionid":
                        _sessionID = context.Request.QueryString[key];
                        break;

                    case "action":
                        _action = context.Request.QueryString[key];
                        break;

                    case "computername":
                        _computerName = context.Request.QueryString[key];
                        break;

                    case "username":
                        _userName = context.Request.QueryString[key];
                        break;
                    }
                }

                MongoDBWrapper dbwrapper = new MongoDBWrapper(NoIDMongoDBAddress, SparkMongoDBAddress);
                if (dbwrapper.UpdateSessionQueueRecord(_sessionID, _action, _userName, _computerName) == false)
                {
                    if (dbwrapper.Exceptions.Count > 0)
                    {
                        string errorMessage = dbwrapper.Exceptions[0].Message;
                        context.Response.Write("UpdatePendingStatus::ProcessRequest Error: " + errorMessage);
                    }
                    else
                    {
                        context.Response.Write("UpdatePendingStatus::ProcessRequest Error: Could not find sessionID " + _sessionID + ".");
                    }
                }
                else
                {
                    context.Response.Write("Successfully updated the pending status.");
                }
            }
            catch (Exception ex)
            {
                context.Response.Write("UpdatePendingStatus::ProcessRequest Error: " + ex.Message);
            }
            context.Response.End();
        }
コード例 #6
0
        private IList <PatientProfile> GetPendingPatients()
        {
            List <PatientProfile> listPending = new List <PatientProfile>();

            try
            {
                MongoDBWrapper      dbwrapper          = new MongoDBWrapper(NoIDMongoDBAddress, SparkMongoDBAddress);
                List <SessionQueue> pendingSessionList = dbwrapper.GetPendingPatients();
                FhirClient          client             = new FhirClient(sparkEndpointAddress);

                foreach (var pending in pendingSessionList)
                {
                    string         sparkAddress   = sparkEndpointAddress.ToString() + "/Patient/" + pending.SparkReference;
                    Patient        pendingPatient = (Patient)client.Get(sparkAddress);
                    PatientProfile patientProfile = new PatientProfile(pendingPatient, true);
                    patientProfile.SessionID       = pending._id;
                    patientProfile.LocalNoID       = pending.LocalReference;
                    patientProfile.NoIDStatus      = pending.ApprovalStatus;
                    patientProfile.NoIDType        = pending.PatientStatus;
                    patientProfile.CheckinDateTime = FHIRUtilities.DateTimeToFHIRString(pending.SubmitDate);

                    listPending.Add(patientProfile);
                }

                /*
                 * string gtDateFormat = "gt" + FHIRUtilities.DateToFHIRString(DateTime.UtcNow.AddDays(-2));
                 * client.PreferredFormat = ResourceFormat.Json;
                 * Uri uriTwoDays = new Uri(sparkEndpointAddress.ToString() + "/Patient?_lastUpdated=" + gtDateFormat);
                 * Bundle patientBundle = (Bundle)client.Get(uriTwoDays);
                 * foreach (Bundle.EntryComponent entry in patientBundle.Entry)
                 * {
                 *  string ptURL = entry.FullUrl.ToString().Replace("http://localhost:49911/fhir", sparkEndpointAddress.ToString());
                 *  Patient pt = (Patient)client.Get(ptURL);
                 *  if (pt.Meta.Extension.Count > 0)
                 *  {
                 *      Extension ext = pt.Meta.Extension[0];
                 *      if (ext.Value.ToString().ToLower().Contains("pending") == true)
                 *      {
                 *          PatientProfile patientProfile = new PatientProfile(pt, false);
                 *          listPending.Add(patientProfile);
                 *      }
                 *  }
                 * }
                 */
            }
            catch (Exception ex)
            {
                throw ex;
            }
            return(listPending);
        }
コード例 #7
0
        public void ProcessRequest(HttpContext context)
        {
            context.Response.ContentType = "text/plain";
            string purgeResult = "";
            string destroyKey  = "";

            try
            {
                if (uint.TryParse(MinimumAcceptedMatchScore, out _minimumAcceptedMatchScore) == false)
                {
                    _minimumAcceptedMatchScore = 30;
                }

                foreach (String key in context.Request.QueryString.AllKeys)
                {
                    if (key == "destroykey")
                    {
                        destroyKey = context.Request.QueryString[key];
                        break;
                    }
                }
                if (destroyKey == DestroyKey)
                {
                    MongoDBWrapper dbwrapper = new MongoDBWrapper(NoIDMongoDBAddress, SparkMongoDBAddress);
                    if (dbwrapper.DeleteMongoDBs() == true)
                    {
                        FingerPrintMatchDatabase dbMinutia = new FingerPrintMatchDatabase(DatabaseLocation, BackupLocation, _minimumAcceptedMatchScore);
                        if (dbMinutia.DeleteMatchDatabase())
                        {
                            purgeResult = "Successful.";
                        }
                        else
                        {
                            purgeResult = "Error in PurgeAllDatabases::ProcessRequest: Unable to delete all databases.";
                        }
                    }
                }
                else
                {
                    //TODO: log this event as an invalid attempt due to mismatched keys.
                }
            }
            catch (Exception ex)
            {
                purgeResult = "Error in PurgeAllDatabases::ProcessRequest: " + ex.Message;
            }
            context.Response.Write(purgeResult);
        }
コード例 #8
0
        static void Main(string[] args)
        {
            // Loading Keen.IO Keys and Misc. from Config File
            _keenIOProjectID = ConfigurationManager.AppSettings["keenIOProjectID"];
            _keenIOMasterKey = ConfigurationManager.AppSettings["keenIOMasterKey"];
            _keenIOWriteKey  = ConfigurationManager.AppSettings["keenIOWriteKey"];
            _keenIOReadKey   = ConfigurationManager.AppSettings["keenIOReadKey"];
            _bucketName      = ConfigurationManager.AppSettings["keenIOBucketName"];

            // Configuring MongoDB Wrapper for connection and queries
            MongoDBWrapper mongoDB           = new MongoDBWrapper();
            string         fullServerAddress = String.Join(":", Consts.MONGO_SERVER, Consts.MONGO_PORT);

            mongoDB.ConfigureDatabase(Consts.MONGO_USER, Consts.MONGO_PASS, Consts.MONGO_AUTH_DB, fullServerAddress, Consts.MONGO_TIMEOUT, Consts.MONGO_DATABASE, Consts.MONGO_COLLECTION);

            // Creating Keen.IO Variables
            var projectSettings = new ProjectSettingsProvider(_keenIOProjectID, _keenIOMasterKey, _keenIOWriteKey, _keenIOReadKey);
            var keenClient      = new KeenClient(projectSettings);

            // From This point on, you can change your code to reflect your own "Reading" logic
            // What I've done is simply read the records from the MongoDB database and Upload them to Keen.IO
            foreach (var currentApp in mongoDB.FindMatch <AppModel> (Query.NE("Uploaded", true)))
            {
                try
                {
                    // Adding Event to Keen.IO
                    keenClient.AddEvent("PlayStore2014", currentApp);

                    // Incrementing Counter
                    _appsCounter++;

                    // Console feedback Every 100 Processed Apps
                    if (_appsCounter % 100 == 0)
                    {
                        Console.WriteLine("Uploaded : " + _appsCounter);
                    }

                    mongoDB.SetUpdated(currentApp.Url);
                }
                catch (Exception ex)
                {
                    Console.WriteLine("\n\t" + ex.Message);
                }
            }
        }
コード例 #9
0
        static void Main (string[] args)
        {
            // Loading Keen.IO Keys and Misc. from Config File
            _keenIOProjectID = ConfigurationManager.AppSettings["keenIOProjectID"];
            _keenIOMasterKey = ConfigurationManager.AppSettings["keenIOMasterKey"];
            _keenIOWriteKey  = ConfigurationManager.AppSettings["keenIOWriteKey"];
            _keenIOReadKey   = ConfigurationManager.AppSettings["keenIOReadKey"];
            _bucketName      = ConfigurationManager.AppSettings["keenIOBucketName"];

            // Configuring MongoDB Wrapper for connection and queries
            MongoDBWrapper mongoDB   = new MongoDBWrapper ();
            string fullServerAddress = String.Join (":", Consts.MONGO_SERVER, Consts.MONGO_PORT);
            mongoDB.ConfigureDatabase (Consts.MONGO_USER, Consts.MONGO_PASS, Consts.MONGO_AUTH_DB, fullServerAddress, Consts.MONGO_TIMEOUT, Consts.MONGO_DATABASE, Consts.MONGO_COLLECTION);

            // Creating Keen.IO Variables
            var projectSettings = new ProjectSettingsProvider (_keenIOProjectID, _keenIOMasterKey, _keenIOWriteKey, _keenIOReadKey);
            var keenClient      = new KeenClient (projectSettings);

            // From This point on, you can change your code to reflect your own "Reading" logic
            // What I've done is simply read the records from the MongoDB database and Upload them to Keen.IO
            foreach (var currentApp in mongoDB.FindMatch<AppModel> (Query.NE ("Uploaded", true)))
            {
                try
                {
                    // Adding Event to Keen.IO
                    keenClient.AddEvent ("PlayStore2014", currentApp);

                    // Incrementing Counter
                    _appsCounter++;

                    // Console feedback Every 100 Processed Apps
                    if (_appsCounter % 100 == 0)
                    {
                        Console.WriteLine ("Uploaded : " + _appsCounter);
                    }

                    mongoDB.SetUpdated (currentApp.Url);
                }
                catch (Exception ex)
                {
                    Console.WriteLine ("\n\t" + ex.Message);
                }
            }
        }
コード例 #10
0
        private static async Task ReadPosts(SqlConnection connection, MongoDBWrapper wrapper)
        {
            while (true)
            {
                // Read from Mongo
                var nextPost = await wrapper.GetNextPost();

                if (nextPost == null)
                {
                    break;
                }

                Console.WriteLine($"ReadPosts: Read {nextPost.Id}");

                using var transaction = connection.BeginTransaction();

                // Write To Sql Server
                string sql = "DECLARE @newRecord table(newId uniqueidentifier); "
                             + "INSERT INTO Post "
                             + "(Text, WorkoutDate) "
                             + "OUTPUT INSERTED.Id INTO @newRecord "
                             + "VALUES "
                             + "(@text, @workoutDate) "
                             + "SELECT CONVERT(nvarchar(50), newId) FROM @newRecord";
                var result = await connection.QueryAsync <string>(sql,
                                                                  new { text = nextPost.Text, workoutDate = nextPost.WorkoutDate },
                                                                  transaction);

                // Get all comments for post
                await ReadComments(transaction, connection,
                                   result.Single(), nextPost.Id.ToString(), wrapper);

                transaction.Commit();
            }

            Console.WriteLine("ReadPosts: End");
        }
コード例 #11
0
        public async Task <HttpResponseMessage> RunCTAlgorithm(HttpRequestMessage request)
        {
            try
            {
                //Logging Info
                MLPExecutionLogger.Info("CTPhantom", "Calling Jenkins Job, IP: " + HttpContext.Current.Request.UserHostAddress + ", Client: " + HttpContext.Current.Request.Url.AbsoluteUri);

                MongoDBWrapper mongoBase = new MongoDBWrapper();

                var jsonString = await request.Content.ReadAsStringAsync();

                BsonDocument collection = BsonDocument.Parse(jsonString);

                var result = await mongoBase.RunJenkinsJob(collection);

                return(Request.CreateResponse(HttpStatusCode.OK, result));
            }
            catch (Exception ex)
            {
                MLPExecutionLogger.Error("CTPhantom", ex.Message);

                return(Request.CreateResponse(HttpStatusCode.InternalServerError, ex.Message));
            }
        }
コード例 #12
0
        public void ProcessRequest(HttpContext context)
        {
            try
            {
                bool   biometricsSaved = false;
                string missingReason   = "";
                string question1       = "";
                string question2       = "";
                string answer1         = "";
                string answer2         = "";

                if (uint.TryParse(MinimumAcceptedMatchScore, out _minimumAcceptedMatchScore) == false)
                {
                    _minimumAcceptedMatchScore = 30;
                }

                Stream       httpStream       = context.Request.InputStream;
                StreamReader httpStreamReader = new StreamReader(httpStream);
                Resource     newResource      = FHIRUtilities.StreamToFHIR(httpStreamReader);

                _patient = (Patient)newResource;
                //TODO: make sure this FHIR message has a new pending status.

                //TODO: make this an atomic transaction.
                //          delete the FHIR message from Spark if there is an error in the minutia.

                Patient ptSaved = (Patient)SendPatientToSparkServer();
                //LogUtilities.LogEvent("AddNewPatient.ashx Saved FHIR in spark.");

                if (ptSaved == null)
                {
                    _responseText = "Error sending Patient FHIR message to the Spark FHIR endpoint. " + ExceptionString;
                    return;
                }

                SourceAFIS.Templates.NoID noID = new SourceAFIS.Templates.NoID();
                noID.SessionID = ptSaved.Id.ToString();
                //TODO: Add Argon2d hash here:
                noID.LocalNoID = "noid://" + DomainName + "/" + StringUtilities.SHA256(DomainName + noID.SessionID + NodeSalt);
                SessionQueue seq = Utilities.PatientToSessionQueue(_patient, ptSaved.Id.ToString(), noID.LocalNoID, "new", "pending");
                seq.SubmitDate = DateTime.UtcNow;

                //TODO: send to selected match hub and get the remote hub ID.
                // Hub ID in the same format: noid://domain/LocalID

                if (_patient.Photo.Count > 0)
                {
                    dbMinutia = new FingerPrintMatchDatabase(DatabaseDirectory, BackupDatabaseDirectory, _minimumAcceptedMatchScore);
                    foreach (var minutia in _patient.Photo)
                    {
                        byte[] byteMinutias = minutia.Data;
                        Stream stream       = new MemoryStream(byteMinutias);
                        Media  media        = (Media)FHIRUtilities.StreamToFHIR(new StreamReader(stream));
                        // Save minutias for matching.
                        Template fingerprintTemplate = ConvertFHIR.FHIRToTemplate(media);
                        fingerprintTemplate.NoID = noID;
                        try
                        {
                            dbMinutia.LateralityCode = (FHIRUtilities.LateralitySnoMedCode)fingerprintTemplate.NoID.LateralitySnoMedCode;
                            dbMinutia.CaptureSite    = (FHIRUtilities.CaptureSiteSnoMedCode)fingerprintTemplate.NoID.CaptureSiteSnoMedCode;
                        }
                        catch { }
                        if (dbMinutia.AddTemplate(fingerprintTemplate) == false)
                        {
                            _responseText = "Error adding a fingerprint to the match database.";
                        }
                    }
                    dbMinutia.Dispose();
                    biometricsSaved = true;
                }
                else
                {
                    // check alternate pathway Q&A
                    foreach (var id in _patient.Identifier)
                    {
                        if (id.System.ToLower().Contains("biometric") == true)
                        {
                            Extension extExceptionQA = id.Extension[0];
                            foreach (var ext in extExceptionQA.Extension)
                            {
                                if (ext.Url.ToLower().Contains("reason") == true)
                                {
                                    missingReason = ext.Value.ToString();
                                }
                                else if (ext.Url.ToLower().Contains("question 1") == true)
                                {
                                    question1 = ext.Value.ToString();
                                }
                                else if (ext.Url.ToLower().Contains("answer 1") == true)
                                {
                                    answer1 = ext.Value.ToString();
                                }
                                else if (ext.Url.ToLower().Contains("question 2") == true)
                                {
                                    question2 = ext.Value.ToString();
                                }
                                else if (ext.Url.ToLower().Contains("answer 2") == true)
                                {
                                    answer2 = ext.Value.ToString();
                                }
                            }
                            if (
                                missingReason.Length > 0 &&
                                question1.Length > 0 && answer1.Length > 0 &&
                                question2.Length > 0 && answer2.Length > 0
                                )
                            {
                                if (missingReason != "I am permanently physically unable to provide fingerprints")
                                {
                                    if (missingReason == "I am temporarily physically unable to provide fingerprints")
                                    {
                                        seq.PatientStatus = "hold**";
                                    }
                                    else if (missingReason == "I attempted the fingerprint scan process, but I could not get a successful scan on either hand")
                                    {
                                        seq.PatientStatus = "hold";
                                    }
                                }
                                else
                                {
                                    seq.PatientStatus = "new***";
                                }
                                biometricsSaved = true;
                            }
                        }
                    }
                    // log patient in alternatesearch container
                }
                if (biometricsSaved)
                {
                    MongoDBWrapper dbwrapper = new MongoDBWrapper(NoIDMongoDBAddress, SparkMongoDBAddress);
                    dbwrapper.AddPendingPatient(seq);
                }
                else
                {
                    _responseText = "Critical Error! No biometrics or alternates provided. Can not complete enrollment.";
                    LogUtilities.LogEvent(_responseText);
                }
                //TODO: end atomic transaction.
                _responseText = "Successful.";
                //LogUtilities.LogEvent("Ending AddNewPatient.ashx");
            }
            catch (Exception ex)
            {
                _responseText = "Error in AddNewPatient::ProcessRequest: " + ex.Message;
                LogUtilities.LogEvent(_responseText);
            }
            context.Response.Write(_responseText);
            context.Response.End();
        }
コード例 #13
0
        /// <summary>
        ///
        /// </summary>
        /// <param name="categoryUrl"></param>
        private static void CrawlCategory(string categoryUrl, string categoryName, bool shouldUseProxies)
        {
            // Console Feedback
            _logger.Warn("Crawling Category : [ " + categoryName + " ]");

            // Hashset of urls used to keep track of what's been parsed already
            HashSet <String> foundUrls = new HashSet <String> ();

            // Control variable to avoid "Loop" on pagging
            bool isDonePagging = false;

            // HTML Response
            string response;

            // MongoDB Helper
            // Configuring MongoDB Wrapper
            MongoDBWrapper mongoDB           = new MongoDBWrapper();
            string         fullServerAddress = String.Join(":", Consts.MONGO_SERVER, Consts.MONGO_PORT);

            mongoDB.ConfigureDatabase(Consts.MONGO_USER, Consts.MONGO_PASS, Consts.MONGO_AUTH_DB, fullServerAddress, Consts.MONGO_TIMEOUT, Consts.MONGO_DATABASE, Consts.MONGO_COLLECTION);

            // Ensuring the database has the proper indexe
            mongoDB.EnsureIndex("Url");

            // Response Parser
            PlayStoreParser parser = new PlayStoreParser();

            // Executing Web Requests
            using (WebRequests server = new WebRequests())
            {
                // Creating Request Object
                server.Headers.Add(Consts.ACCEPT_LANGUAGE);
                server.Host      = Consts.HOST;
                server.UserAgent = Consts.GITHUBURL;
                server.Encoding  = "utf-8";

                // Executing Initial Request
                response = server.Get(categoryUrl);

                // Parsing Links out of Html Page (Initial Request)
                foreach (string url in parser.ParseAppUrls(response))
                {
                    // Saving found url on local hashset
                    foundUrls.Add(url);

                    // Checks whether the app have been already processed
                    // or is queued to be processed
                    if ((!mongoDB.AppProcessed(Consts.APP_URL_PREFIX + url)) && (!mongoDB.AppQueued(url)))
                    {
                        // Than, queue it :)
                        mongoDB.AddToQueue(url);
                    }
                }

                // Executing Requests for more Play Store Links
                int baseSkip          = 60;
                int currentMultiplier = 1;
                int errorsCount       = 0;
                do
                {
                    // Assembling new PostData with paging values
                    string postData = String.Format(Consts.CATEGORIES_POST_DATA, (currentMultiplier * baseSkip), baseSkip);

                    // Executing request for values
                    response = server.Post(String.Format(categoryUrl + "?authuser=0"), postData);

                    // Checking Server Status
                    if (server.StatusCode != System.Net.HttpStatusCode.OK)
                    {
                        _logger.Error("Http Error" + " - Status Code [ " + server.StatusCode + " ]");
                        errorsCount++;
                        continue;
                    }

                    // Parsing Links
                    foreach (string url in parser.ParseAppUrls(response))
                    {
                        // If a certain app is found twice, it means that the "pagging" logic got stuck into a
                        // Loop, so the all the apps for this category were parsed already
                        if (foundUrls.Contains(url))
                        {
                            isDonePagging = true;
                            break;
                        }

                        // Saving found url on local hashset
                        foundUrls.Add(url);

                        // Checks whether the app have been already processed
                        // or is queued to be processed
                        if ((!mongoDB.AppProcessed(Consts.APP_URL_PREFIX + url)) && (!mongoDB.AppQueued(url)))
                        {
                            // Than, queue it :)
                            mongoDB.AddToQueue(url);
                        }
                    }

                    // Incrementing Paging Multiplier
                    currentMultiplier++;
                }  while (!isDonePagging && errorsCount <= Consts.MAX_REQUEST_ERRORS);
            }
        }
コード例 #14
0
        /// <summary>
        /// Entry point of the crawler
        /// </summary>
        /// <param name="args"></param>
        static void Main(string[] args)
        {
            // Setting Up Log
            LogSetup.InitializeLog("PlayStoreCrawler.log", "info");
            _logger = LogManager.GetCurrentClassLogger();

            // Control Variable (Bool - Should the process use proxies? )
            bool isUsingProxies = false;

            // Checking for the need to use HTTP proxies or not
            if (args != null && args.Length == 1)
            {
                _logger.Info("Loading Proxies from File");

                // Setting flag to true
                isUsingProxies = true;

                // Loading proxies from .txt received as argument
                String fPath = args[0];

                // Sanity Check
                if (!File.Exists(fPath))
                {
                    _logger.Fatal("Couldnt find proxies on path : " + fPath);
                    System.Environment.Exit(-100);
                }

                // Reading Proxies from File
                string[] fLines = File.ReadAllLines(fPath, Encoding.GetEncoding("UTF-8"));

                try
                {
                    // Actual Load of Proxies
                    ProxiesLoader.Load(fLines.ToList());
                }
                catch (Exception ex)
                {
                    _logger.Fatal(ex);
                    System.Environment.Exit(-101);
                }
            }

            // Configuring MongoDB Wrapper
            _logger.Info("Setting up MongoDB Collections and Indexes");
            _mongoDB = new MongoDBWrapper();
            string fullServerAddress = String.Join(":", Consts.MONGO_SERVER, Consts.MONGO_PORT);

            _mongoDB.ConfigureDatabase(Consts.MONGO_USER, Consts.MONGO_PASS, Consts.MONGO_AUTH_DB, fullServerAddress, Consts.MONGO_TIMEOUT, Consts.MONGO_DATABASE, Consts.MONGO_COLLECTION);

            // Ensuring the database collections have the proper indexes
            _mongoDB.EnsureIndex("Url");
            _mongoDB.EnsureIndex("IsBusy", Consts.QUEUED_APPS_COLLECTION);
            _mongoDB.EnsureIndex("Url", Consts.QUEUED_APPS_COLLECTION);

            // Main Flow
            _logger.Info("Started Bootstrapping Steps");

            // Scrapping "Play Store Categories"
            foreach (var categoriesKVP in BootstrapTerms.categoriesAndNames)
            {
                CrawlCategory(categoriesKVP.Key, categoriesKVP.Value, isUsingProxies);
            }

            // Queueing Apps that start with each of the characters from "A" to "Z"
            foreach (var character in BootstrapTerms.charactersSearchTerms)
            {
                CrawlStore(character, isUsingProxies);
            }

            /// ... Keep Adding characters / search terms in order to increase the crawler's reach
            // APP CATEGORIES
            foreach (var category in BootstrapTerms.categoriesSearchTerms)
            {
                CrawlStore(category, isUsingProxies);
            }

            // Extra "Random" Search terms to increase even more the crawler's reach
            foreach (var miscTerm in BootstrapTerms.miscSearchTerms)
            {
                CrawlStore(miscTerm, isUsingProxies);
            }

            // Country Names as Search terms to increase even more the crawler's reach
            foreach (var countryName in BootstrapTerms.countryNames)
            {
                CrawlStore(countryName, isUsingProxies);
            }
        }
コード例 #15
0
        static void Main (string[] args)
        {
            // Configuring Log Object
            Logger logger = LogManager.GetCurrentClassLogger ();

            // Control Variable (Bool - Should the process use proxies? )
            bool isUsingProxies = false;

            logger.Info ("Checking proxies configuration");

            // Checking for the need to use proxies
            if (args != null && args.Length == 1)
            {
                // Setting flag to true
                isUsingProxies = true;

                // Loading proxies from .txt received as argument
                String fPath = args[0];

                // Sanity Check
                if (!File.Exists (fPath))
                {
                    logger.Fatal ("Couldnt find proxies on path : " + fPath);
                    System.Environment.Exit (-100);
                }

                // Reading Proxies from File
                logger.Info ("Loading Proxies");
                string[] fLines = File.ReadAllLines (fPath, Encoding.GetEncoding ("UTF-8"));

                try
                {
                    // Actual Load of Proxies
                    ProxiesLoader.Load (fLines.ToList ());
                }
                catch (Exception ex)
                {
                    logger.Fatal (ex);
                    System.Environment.Exit (-101);
                }
            }
            
            // MongoDB instance Creation
            logger.Info ("Configuring MonboDB Client");

            // Creating instance of Mongo Handler for the main collection
            MongoDBWrapper mongoClient = new MongoDBWrapper ();
            string fullServerAddress   = String.Join (":", Consts.MONGO_SERVER, Consts.MONGO_PORT);
            mongoClient.ConfigureDatabase (Consts.MONGO_USER, Consts.MONGO_PASS, Consts.MONGO_AUTH_DB, fullServerAddress, Consts.MONGO_TIMEOUT, Consts.MONGO_DATABASE, Consts.MONGO_COLLECTION);

            logger.Info ("Iterating over Apps");

            // Creating Play Store Parser
            PlayStoreParser parser = new PlayStoreParser ();
            
            // App Model
            AppModel appRecord;

            // Control Variable
            bool noError = true;

            // Finding all the "Apps" that didn't have the reviews visited yet
            while ((appRecord = mongoClient.FindAndModifyReviews ()) != null)
            {
                // Extracting app ID from URL
                string appId = appRecord.Url.Replace (Consts.PLAY_STORE_PREFIX, String.Empty);

                // Console Feedback
                logger.Info ("Processing App [ " + appRecord.Name + " ] ");
                
                try
                {
                    // Console Feedback
                    Console.Write ("Reviews from : " + appRecord.Name);

                    // Issuing Request for Reviews
                    string response = ReviewsWrapper.GetAppReviews (appId, 1, isUsingProxies);

                    // Checking for Blocking Situation
                    if (String.IsNullOrEmpty(response))
                    {
                        logger.Info ("Blocked by Play Store. Sleeping process for 10 minutes before retrying.");

                        // Thread Wait for 10 seconds
                        Thread.Sleep (TimeSpan.FromSeconds (10));
                    }

                    // Checking for "No Reviews" app
                    if (response.Length < 50)
                    {
                        logger.Info ("No Reviews for this app. Skipping");
                        Console.Write (" - No Reviews Found\n");
                        continue;
                    }

                    // Normalizing Response to Proper HTML
                    response = ReviewsWrapper.NormalizeResponse (response);

                    // List of Reviews
                    List<AppReview> reviews = new List<AppReview> ();

                    // Iterating over Parsed Reviews
                    foreach (var review in parser.ParseReviews (response))
                    {
                        // Adding App Data to the review
                        review.appID     = appId;
                        review.appName   = appRecord.Name;
                        review.appURL    = appRecord.Url;

                        // Capture Timestamp to the model
                        review.timestamp = DateTime.Now;

                        // Adding reviews to the current list
                        reviews.Add (review);
                    }

                    // Any Review Found ?
                    if (reviews.Count > 0)
                    {
                        Console.Write (" - " + reviews.Count + " Reviews Found\n");

                        // Checking if there was any previous list of reviews
                        if (appRecord.Reviews == null)
                        {
                            appRecord.Reviews = reviews;
                        }
                        else // Previous List found - Appending only the new ones
                        {
                            foreach (var review in reviews)
                            {
                                if (!appRecord.Reviews.Any (t => t.permalink.Equals (review.permalink)))
                                {
                                    appRecord.Reviews.Add (review);
                                }
                            }
                        }
                    }
                }
                catch (Exception ex)
                {
                    logger.Error (ex);
                    Console.ForegroundColor = ConsoleColor.Red;
                    Console.WriteLine ("Error : " + ex.Message);
                    Console.ForegroundColor = ConsoleColor.White;
                    noError = false;
                }
                finally
                {
                    // Toggling back the "ReviewsStatus" attribute from the model
                    if (noError)
                    {
                        appRecord.ReviewsStatus = "Visited";
                        mongoClient.SaveRecord<AppModel> (appRecord);
                    }
                    else // "Error" status
                    {
                        appRecord.ReviewsStatus = "Error";
                        mongoClient.SaveRecord<AppModel> (appRecord);
                    }
                }
            }
        }
コード例 #16
0
ファイル: Crawler_RF.cs プロジェクト: uwtcat/PlayStoreCrawler
        /// <summary>
        /// Executes a Search using the searchField as the search parameter, 
        /// paginates / scrolls the search results to the end adding all the url of apps
        /// it finds to a AWS SQS queue
        /// </summary>
        /// <param name="searchField"></param>
        private static void CrawlStore (string searchField)
        {
            // Console Feedback
            Console.WriteLine ("Crawling Search Term : [ " + searchField + " ]");

            // Compiling Regular Expression used to parse the "pagToken" out of the Play Store
            Regex pagTokenRegex = new Regex (@"GAEi+.+\:S\:.{11}\\42", RegexOptions.Compiled);

            // HTML Response
            string response;

            // MongoDB Helper
            // Configuring MongoDB Wrapper
            MongoDBWrapper mongoDB   = new MongoDBWrapper ();
            string fullServerAddress = String.Join (":", Consts.MONGO_SERVER, Consts.MONGO_PORT);
            mongoDB.ConfigureDatabase (Consts.MONGO_USER, Consts.MONGO_PASS, Consts.MONGO_AUTH_DB, fullServerAddress, Consts.MONGO_TIMEOUT, Consts.MONGO_DATABASE, Consts.MONGO_COLLECTION);

            // Ensuring the database has the proper indexe
            mongoDB.EnsureIndex ("Url");

            // Response Parser
            PlayStoreParser parser = new PlayStoreParser (); 

            // Executing Web Requests
            using (WebRequests server = new WebRequests ())
            {
                // Creating Request Object
                server.Host = Consts.HOST;

                // Executing Initial Request
                response    = server.Post (String.Format (Consts.CRAWL_URL, searchField), Consts.INITIAL_POST_DATA);

                // Parsing Links out of Html Page (Initial Request)                
                foreach (string url in parser.ParseAppUrls (response))
                {
                    // Checks whether the app have been already processed 
                    // or is queued to be processed
                    if ((!mongoDB.AppProcessed (Consts.APP_URL_PREFIX + url)) && (!mongoDB.AppQueued (url)))
                    {
                        // Console Feedback
                        Console.WriteLine (" . Queued App");

                        // Than, queue it :)
                        mongoDB.AddToQueue (url);
                        Thread.Sleep (250); // Hiccup
                    }
                    else
                    {
                        // Console Feedback
                        Console.WriteLine (" . Duplicated App. Skipped");
                    }
                }

                // Executing Requests for more Play Store Links
                int initialSkip       = 48;
                int currentMultiplier = 1;
                int errorsCount       = 0;
                do
                {
                    // Finding pagToken from HTML
                    var rgxMatch = pagTokenRegex.Match (response);

                    // If there's no match, skips it
                    if (!rgxMatch.Success)
                    {
                        break;
                    }

                    // Reading Match from Regex, and applying needed replacements
                    string pagToken = rgxMatch.Value.Replace (":S:", "%3AS%3A").Replace("\\42", String.Empty).Replace(@"\\u003d", String.Empty);

                    // Assembling new PostData with paging values
                    string postData = String.Format (Consts.POST_DATA, pagToken);

                    // Executing request for values
                    response = server.Post (String.Format (Consts.CRAWL_URL, searchField), postData);

                    // Checking Server Status
                    if (server.StatusCode != System.Net.HttpStatusCode.OK)
                    {
                        LogWriter.Error ("Http Error", "Status Code [ " + server.StatusCode + " ]");
                        errorsCount++;
                        continue;
                    }

                    // Parsing Links
                    foreach (string url in parser.ParseAppUrls (response))
                    {
                        // Checks whether the app have been already processed 
                        // or is queued to be processed
                        if ((!mongoDB.AppProcessed (Consts.APP_URL_PREFIX + url)) && (!mongoDB.AppQueued (url)))
                        {
                            // Console Feedback
                            Console.WriteLine (" . Queued App");

                            // Than, queue it :)
                            mongoDB.AddToQueue (url);
                            Thread.Sleep (250); // Hiccup
                        }
                        else
                        {
                            // Console Feedback
                            Console.WriteLine (" . Duplicated App. Skipped");
                        }
                    }

                    // Incrementing Paging Multiplier
                    currentMultiplier++;

                }  while (parser.AnyResultFound (response) && errorsCount <= Consts.MAX_REQUEST_ERRORS);
            }
        }
コード例 #17
0
        private static int SendEventsToKeep(Keen.Core.KeenClient keenClient, List <AppModel> eventsToSend, MongoDBWrapper mongoDB)
        {
            try
            {
                // Adding Event to Keen.IO
                keenClient.AddEvents("PlayStore2014", eventsToSend);

                // Incrementing Counter
                _appsCounter += eventsToSend.Count;

                // Console feedback Every 100 Processed Apps
                if (_appsCounter % 100 == 0)
                {
                    Console.WriteLine("Uploaded : " + _appsCounter);
                }

                foreach (var e in eventsToSend)
                {
                    mongoDB.SetUpdated(e.Url);
                }

                return(eventsToSend.Count);
            }
            catch (Exception ex)
            {
                Console.WriteLine("\n\t" + ex.Message);
            }

            return(0);
        }
コード例 #18
0
        /// <summary>
        /// Entry point of the worker piece of the process
        /// Notice that you can run as many workers as you want to in order to make the crawling faster
        /// </summary>
        /// <param name="args"></param>
        static void Main(string[] args)
        {
            // Configuring Log Object
            LogSetup.InitializeLog ("PlayStoreWorker.log", "info");
            Logger logger = LogManager.GetCurrentClassLogger ();
            logger.Info ("Worker Started");

            // Control Variable (Bool - Should the process use proxies? )
            bool isUsingProxies = false;

            // Checking for the need to use proxies
            if (args != null && args.Length == 1)
            {
                // Setting flag to true
                isUsingProxies = true;

                // Loading proxies from .txt received as argument
                String fPath = args[0];

                // Sanity Check
                if (!File.Exists (fPath))
                {
                    logger.Fatal ("Couldnt find proxies on path : " + fPath);
                    System.Environment.Exit (-100);
                }

                // Reading Proxies from File
                string[] fLines = File.ReadAllLines (fPath, Encoding.GetEncoding ("UTF-8"));

                try
                {
                    // Actual Load of Proxies
                    ProxiesLoader.Load (fLines.ToList ());
                }
                catch (Exception ex)
                {
                    logger.Fatal (ex);
                    System.Environment.Exit (-101);
                }
            }

            // Parser
            PlayStoreParser parser = new PlayStoreParser();

            // Configuring MongoDB Wrapper
            MongoDBWrapper mongoDB   = new MongoDBWrapper();
            string fullServerAddress = String.Join(":", Consts.MONGO_SERVER, Consts.MONGO_PORT);
            mongoDB.ConfigureDatabase (Consts.MONGO_USER, Consts.MONGO_PASS, Consts.MONGO_AUTH_DB, fullServerAddress, Consts.MONGO_TIMEOUT, Consts.MONGO_DATABASE, Consts.MONGO_COLLECTION);

            // Creating Instance of Web Requests Server
            WebRequests server = new WebRequests ();

            // Queued App Model
            QueuedApp app;

            // Retry Counter (Used for exponential wait increasing logic)
            int retryCounter = 0;

            // Iterating Over MongoDB Records while no document is found to be processed
            while ((app = mongoDB.FindAndModify ()) != null)
            {
                try
                {
                    // Building APP URL
                    string appUrl = app.Url;

                    // Sanity check of app page url
                    if (app.Url.IndexOf ("http", StringComparison.OrdinalIgnoreCase) < 0)
                    {
                        appUrl = Consts.APP_URL_PREFIX + app.Url;
                    }

                    // Checking if this app is on the database already
                    if (mongoDB.AppProcessed (appUrl))
                    {
                        // Console Feedback, Comment this line to disable if you want to
                        logger.Info ("Duplicated App, skipped.");

                        // Delete it from the queue and continues the loop
                        mongoDB.RemoveFromQueue (app.Url);
                        continue;
                    }

                    // Configuring server and Issuing Request
                    server.Headers.Add (Consts.ACCEPT_LANGUAGE);
                    server.Host              = Consts.HOST;
                    server.UserAgent         = Consts.GITHUBURL;
                    server.Encoding          = "utf-8";
                    server.EncodingDetection = WebRequests.CharsetDetection.DefaultCharset;

                    // Checking for the need to use "HTTP Proxies"
                    if (isUsingProxies)
                    {
                        server.Proxy = ProxiesLoader.GetWebProxy ();
                    }

                    // Issuing HTTP Request
                    string response          = server.Get (appUrl);

                    // Flag Indicating Success while processing and parsing this app
                    bool ProcessingWorked = true;

                    // Sanity Check
                    if (String.IsNullOrEmpty (response) || server.StatusCode != System.Net.HttpStatusCode.OK)
                    {
                        logger.Info ("Error opening app page : " + appUrl);
                        ProcessingWorked = false;

                        // Renewing WebRequest Object to get rid of Cookies
                        server = new WebRequests ();

                        // Fallback time variable
                        double waitTime;

                        // Checking which "Waiting Logic" to use - If there are proxies being used, there's no need to wait too much
                        // If there are no proxies in use, on the other hand, the process must wait more time
                        if (isUsingProxies)
                        {
                            // Waits two seconds everytime
                            waitTime = TimeSpan.FromSeconds (2).TotalMilliseconds;
                        }
                        else
                        {
                            // Increments retry counter
                            retryCounter++;

                            // Checking for maximum retry count
                            if (retryCounter >= 8)
                            {
                                waitTime = TimeSpan.FromMinutes (20).TotalMilliseconds;
                            }
                            else
                            {
                                // Calculating next wait time ( 2 ^ retryCounter seconds)
                                waitTime = TimeSpan.FromSeconds (Math.Pow (2, retryCounter)).TotalMilliseconds;
                            }
                        }

                        // Hiccup to avoid google blocking connections in case of heavy traffic from the same IP
                        logger.Info ("======================================================");
                        logger.Info ("\n\tFallback : " + waitTime + " Seconds");
                        Thread.Sleep (Convert.ToInt32 (waitTime));

                        // If The Status code is "ZERO" (it means 404) - App must be removed from "Queue"
                        if (server.StatusCode == 0)
                        {
                            // Console Feedback
                            logger.Info ("\tApp Not Found (404) - " + app.Url);

                            mongoDB.RemoveFromQueue (app.Url);
                        }
                        logger.Info ("======================================================");
                    }
                    else
                    {
                        // Reseting retry counter
                        retryCounter = 0;

                        // Parsing Useful App Data
                        AppModel parsedApp = parser.ParseAppPage (response, appUrl);

                        List<String> relatedApps = new List<String> ();

                        // Avoiding Exceptions caused by "No Related Apps" situations - Must be treated differently
                        try
                        {

                            // Parsing "Related Apps" and "More From Developer" Apps (URLS Only)
                            foreach (string extraAppUrl in parser.ParseExtraApps (response))
                            {
                                relatedApps.Add (Consts.APP_URL_PREFIX + extraAppUrl);
                            }

                            // Adding "Related Apps" to Apps Model
                            parsedApp.RelatedUrls = relatedApps.Distinct ().ToArray ();
                        }
                        catch
                        {
                            logger.Info ("\tNo Related Apps Found. Skipping");
                        }

                        // Inserting App into Mongo DB Database
                        if (!mongoDB.Insert<AppModel>(parsedApp))
                        {
                            ProcessingWorked = false;
                        }

                        // If the processing failed, do not remove the app from the database, instead, keep it and flag it as not busy
                        // so that other workers can try to process it later
                        if (!ProcessingWorked)
                        {
                            mongoDB.ToggleBusyApp(app, false);
                        }
                        else // On the other hand, if processing worked, removes it from the database
                        {
                            // Console Feedback, Comment this line to disable if you want to
                            Console.ForegroundColor = ConsoleColor.Red;
                            logger.Info ("Inserted App : " + parsedApp.Name);
                            Console.ForegroundColor = ConsoleColor.White;

                            mongoDB.RemoveFromQueue(app.Url);
                        }

                        // Counters for console feedback only
                        int extraAppsCounter = 0, newExtraApps = 0;

                        // Parsing "Related Apps" and "More From Developer" Apps (URLS Only)
                        foreach (string extraAppUrl in relatedApps)
                        {
                            // Incrementing counter of extra apps
                            extraAppsCounter++;

                            // Assembling Full app Url to check with database
                            string fullExtraAppUrl = Consts.APP_URL_PREFIX + extraAppUrl;

                            // Checking if the app was either processed or queued to be processed already
                            if ((!mongoDB.AppProcessed (fullExtraAppUrl)) && (!mongoDB.IsAppOnQueue(extraAppUrl)))
                            {
                                // Incrementing counter of inserted apps
                                newExtraApps++;

                                // Adds it to the queue of apps to be processed
                                mongoDB.AddToQueue (extraAppUrl);
                            }
                        }

                        // Console Feedback
                        logger.Info ("Queued " + newExtraApps + " / " + extraAppsCounter + " related apps");
                    }
                }
                catch (Exception ex)
                {
                    logger.Error (ex);
                }
                finally
                {
                    try
                    {
                        // Toggles Busy status back to false
                        mongoDB.ToggleBusyApp (app, false);
                    }
                    catch (Exception ex)
                    {
                        // Toggle Busy App may raise an exception in case of lack of internet connection, so, i must use this
                        // "inner catch" to avoid it from happenning
                        logger.Error (ex);
                    }
                }
            }
        }
コード例 #19
0
ファイル: Crawler.cs プロジェクト: W1N3/GooglePlayAppsCrawler
        /// <summary>
        /// 
        /// </summary>
        /// <param name="categoryUrl"></param>
        private static void CrawlCategory (string categoryUrl, string categoryName, bool shouldUseProxies)
        {
            // Console Feedback
            _logger.Warn ("Crawling Category : [ " + categoryName + " ]");

            // Hashset of urls used to keep track of what's been parsed already
            HashSet<String> foundUrls = new HashSet<String> ();

            // Control variable to avoid "Loop" on pagging
            bool isDonePagging = false;

            // HTML Response
            string response;

            // MongoDB Helper
            // Configuring MongoDB Wrapper
            MongoDBWrapper mongoDB   = new MongoDBWrapper ();
            string fullServerAddress = String.Join (":", Consts.MONGO_SERVER, Consts.MONGO_PORT);
            mongoDB.ConfigureDatabase (Consts.MONGO_USER, Consts.MONGO_PASS, Consts.MONGO_AUTH_DB, fullServerAddress, Consts.MONGO_TIMEOUT, Consts.MONGO_DATABASE, Consts.MONGO_COLLECTION);

            // Ensuring the database has the proper indexe
            mongoDB.EnsureIndex ("Url");

            // Response Parser
            PlayStoreParser parser = new PlayStoreParser (); 

            // Executing Web Requests
            using (WebRequests server = new WebRequests ())
            {
                // Creating Request Object
                server.Headers.Add (Consts.ACCEPT_LANGUAGE);
                server.Host      = Consts.HOST;
                server.UserAgent = Consts.GITHUBURL;
                server.Encoding  = "utf-8";

                // Executing Initial Request
                response = server.Get (categoryUrl);

                // Parsing Links out of Html Page (Initial Request)                
                foreach (string url in parser.ParseAppUrls (response))
                {
                    // Saving found url on local hashset
                    foundUrls.Add (url);

                    // Checks whether the app have been already processed 
                    // or is queued to be processed
                    if ((!mongoDB.AppProcessed (Consts.APP_URL_PREFIX + url)) && (!mongoDB.AppQueued (url)))
                    {
                        // Than, queue it :)
                        mongoDB.AddToQueue (url);
                    }
                }

                // Executing Requests for more Play Store Links
                int baseSkip       = 60;
                int currentMultiplier = 1;
                int errorsCount       = 0;
                do
                {
                    // Assembling new PostData with paging values
                    string postData = String.Format (Consts.CATEGORIES_POST_DATA, (currentMultiplier * baseSkip), baseSkip);

                    // Executing request for values
                    response = server.Post (String.Format (categoryUrl + "?authuser=0"), postData);

                    // Checking Server Status
                    if (server.StatusCode != System.Net.HttpStatusCode.OK)
                    {
                        _logger.Error ("Http Error" + " - Status Code [ " + server.StatusCode + " ]");
                        errorsCount++;
                        continue;
                    }

                    // Parsing Links
                    foreach (string url in parser.ParseAppUrls (response))
                    {
                        // If a certain app is found twice, it means that the "pagging" logic got stuck into a 
                        // Loop, so the all the apps for this category were parsed already
                        if (foundUrls.Contains (url))
                        {
                            isDonePagging = true;
                            break;
                        }

                        // Saving found url on local hashset
                        foundUrls.Add (url);

                        // Checks whether the app have been already processed 
                        // or is queued to be processed
                        if ((!mongoDB.AppProcessed (Consts.APP_URL_PREFIX + url)) && (!mongoDB.AppQueued (url)))
                        {
                            // Than, queue it :)
                            mongoDB.AddToQueue (url);
                        }
                    }

                    // Incrementing Paging Multiplier
                    currentMultiplier++;

                }  while (!isDonePagging && errorsCount <= Consts.MAX_REQUEST_ERRORS);
            }
        }
コード例 #20
0
        ///  *** READ THIS BEFORE YOU START. ***
        ///  *** I MEAN IT, PLEASE, READ IT  ***
        /// 
        ///  This exporting helper will download ALL THE APPS found on the database, and
        ///  dump it to a CSV file (with headers).
        ///  
        ///  Note that, since the database is Hosted on AWS, i will PAY (for the internet outbound traffic) if you execute a full database export,
        ///  so, if you are going to execute a full export, please, get in touch with me before running this project, or send me a donation
        ///  via paypal on [email protected]
        ///  
        ///  Also, be nice with the database.
        ///  
        ///  ** END OF WARNING ***

        static void Main (string[] args)
        {
            // Logs Counter
            int processedApps = 0;

            // Configuring Log Object
            Logger logger = LogManager.GetCurrentClassLogger ();
            logger.Info ("Worker Started");

            logger.Info ("Checking Arguments");
            
            // Periodic Log Timer
            Timer loggingThread = new Timer((TimerCallback) =>
            {
                logger.Info ("Processed Apps: " + processedApps);

            }, null, 10000, 10000);
            
            // Validating Arguments
            if (!ValidateArgs (args))
            {
                logger.Fatal ("Invalid Args", "Args must have 1 element");
                return;
            }

            logger.Info ("Checking Write Permissions on output Path");
            // Validating Write Permissions on output path
            if (!ValidateFilePermissions (args[0]))
            {
                logger.Fatal ("Insuficient Permissions", "Cannot write on path : " + args[0]);
                return;
            }

            // Configuring MongoDB Wrapper
            MongoDBWrapper mongoDB = new MongoDBWrapper();
            string fullServerAddress = String.Join(":", Consts.MONGO_SERVER, Consts.MONGO_PORT);
            mongoDB.ConfigureDatabase(Consts.MONGO_USER, Consts.MONGO_PASS, Consts.MONGO_AUTH_DB, fullServerAddress, Consts.MONGO_TIMEOUT, Consts.MONGO_DATABASE, Consts.MONGO_COLLECTION);
            
            // Opening Output Stream
            using (StreamWriter sWriter = new StreamWriter (args[0], true, Encoding.GetEncoding("ISO-8859-1")))
            {
                // Auto Flush Content
                sWriter.AutoFlush = true;

                // Writing Headers
                String headersLine = "Url,ReferenceDate,Name,Developer,IsTopDeveloper,DeveloperURL,PublicationDate,"
                                   + "Category,IsFree,Price,Reviewers,Score.Total,Score.Count,Score.FiveStars,"
                                   + "Score.FourStars,Score.ThreeStars,Score.TwoStars,Score.OneStars,LastUpdateDate"
                                   + "AppSize,Instalations,CurrentVersion,MinimumOSVersion,ContentRating,HaveInAppPurchases,DeveloperEmail,DeveloperWebsite,DeveloperPrivacyPolicy";

                sWriter.WriteLine (headersLine);

                // Example of MongoDB Query Construction
                // Queries for records which have the attribute "IsTopDeveloper" equal to "false"
                //var mongoQuery = Query.EQ ("IsTopDeveloper", false);
                var mongoQuery = Query.EQ ("Category", "/store/apps/category/SPORTS");

                // More Examples of Queries
                // var mongoQuery = Query.EQ ("Category", "/store/apps/category/GAME_CASINO");
                // var mongoQuery = Query.GT ("Price", 10);

                // Reading all apps from the database
                // USAGE: CHANGE FindMatches to FindAll if you want to export all the records from the database
                foreach (AppModel app in mongoDB.FindMatch<AppModel>(mongoQuery))
                {
                    try
                    {
                        // Writing line to File
                        sWriter.WriteLine (app.ToString ());
                        processedApps++;
                    }
                    catch (Exception ex)
                    {
                        logger.Error (ex);
                    }
                }
            }

            // Logging end of the Process
            logger.Info ("Finished Exporting Database");
        }
コード例 #21
0
 public void ProcessRequest(HttpContext context)
 {
     try
     {
         if (uint.TryParse(MinimumAcceptedMatchScore, out _minimumAcceptedMatchScore) == false)
         {
             _minimumAcceptedMatchScore = 30;
         }
         Resource newResource = FHIRUtilities.StreamToFHIR(new StreamReader(context.Request.InputStream));
         _biometics = (Media)newResource;
         // TODO send to biometric match engine. If found, add patient reference to FHIR message.
         // convert FHIR fingerprint message (_biometics) to AFIS template class
         Template probe = ConvertFHIR.FHIRToTemplate(_biometics);
         dbMinutia = new FingerPrintMatchDatabase(_databaseDirectory, _backupDatabaseDirectory, _minimumAcceptedMatchScore);
         try
         {
             dbMinutia.LateralityCode = (FHIRUtilities.LateralitySnoMedCode)probe.NoID.LateralitySnoMedCode;
             dbMinutia.CaptureSite    = (FHIRUtilities.CaptureSiteSnoMedCode)probe.NoID.CaptureSiteSnoMedCode;
         }
         catch { }
         MinutiaResult minutiaResult = dbMinutia.SearchPatients(probe);
         if (minutiaResult != null)
         {
             if (minutiaResult.NoID != null && minutiaResult.NoID.Length > 0)
             {
                 // Fingerprint found in database
                 // check if patient is already pending.
                 MongoDBWrapper dbwrapper     = new MongoDBWrapper(NoIDMongoDBAddress, SparkMongoDBAddress);
                 string         currentStatus = dbwrapper.GetCurrentStatus(minutiaResult.NoID);
                 if (currentStatus.ToLower() != "pending")
                 {
                     _responseText = minutiaResult.NoID;  //TODO: for now, it returns the localNoID.  should return a FHIR response.
                 }
                 else
                 {
                     _responseText = "pending";
                 }
                 LogUtilities.LogEvent(_responseText);
             }
             else
             {
                 _responseText = "No local database match.";
                 LogUtilities.LogEvent(_responseText);
             }
         }
         else
         {
             _responseText = "No local database match.";
             LogUtilities.LogEvent(_responseText);
         }
         dbMinutia.Dispose();
         LogUtilities.LogEvent("After dbMinutia.Dispose();");
     }
     catch (Exception ex)
     {
         _exception    = ex;
         _responseText = ex.Message;
     }
     context.Response.Write(_responseText);
     context.Response.End();
 }
コード例 #22
0
        static void Main(string[] args)
        {
            // Checking for Input Parameters
            if (args == null || args.Length != 1)
            {
                Console.WriteLine("Incorrect number of arguments received. Expected One");
                System.Environment.Exit(-100);
            }

            // Human Readable Variable
            string inputFile = args[0];

            // Checking if the Input file received exists
            if (!File.Exists(inputFile))
            {
                Console.WriteLine(String.Format("Received input file does not exist : {0}", inputFile));
                System.Environment.Exit(-101);
            }

            // App Status
            _appStatus = new Dictionary <String, AppStatusModel> ();

            // Creating Instance of Database Manager
            MongoDBWrapper mongoDB           = new MongoDBWrapper();
            string         fullServerAddress = String.Join(":", Consts.MONGO_SERVER, Consts.MONGO_PORT);

            mongoDB.ConfigureDatabase(Consts.MONGO_USER, Consts.MONGO_PASS, Consts.MONGO_AUTH_DB, fullServerAddress, Consts.MONGO_TIMEOUT, Consts.MONGO_DATABASE, Consts.MONGO_COLLECTION);

            // Creating Instance of Parser
            PlayStoreParser dataParser = new PlayStoreParser();

            goto PeopleData;

            using (WebRequests httpClient = new WebRequests())
            {
                // Minor Configuration of the Http Client - Ensures that the requests response will be in english
                // By doing so, we have no problems parsing the dates to their proper formats
                httpClient.Headers.Add(Consts.ACCEPT_LANGUAGE);
                httpClient.Host              = Consts.HOST;
                httpClient.Encoding          = "utf-8";
                httpClient.EncodingDetection = WebRequests.CharsetDetection.DefaultCharset;

                // Iterating over File Lines (App Urls) - To Extract Data, Not The Reviews Yet.
                foreach (string appUrl in File.ReadAllLines(inputFile))
                {
                    // Logging Progress
                    Console.WriteLine("\n => Processing App : " + appUrl);

                    // Executing Http Get Request for the Apps's Data - With max of 5 Retries
                    String appDataResponse = String.Empty;
                    int    currentRetry    = 0;

                    do
                    {
                        // Http Get
                        appDataResponse = httpClient.Get(appUrl);
                    } while (String.IsNullOrWhiteSpace(appDataResponse) || ++currentRetry <= _maxRetries);

                    // Sanity Check
                    if (String.IsNullOrWhiteSpace(appDataResponse))
                    {
                        Console.WriteLine("\t\t.Error - Failed to find page of app : " + appUrl + ". Skipping it");
                        continue;
                    }

                    Console.WriteLine("\t\t.Page Found. Firing Parser");

                    // Parsing App Data
                    AppModel appData = dataParser.ParseAppPage(appDataResponse, appUrl);

                    // Checking If this app is on the database already
                    if (mongoDB.AppProcessed(appUrl))
                    {
                        Console.WriteLine("\t\t.Previous Version of App Found. Updating It");
                        mongoDB.UpdateRecord(appData, "Url", appData.Url);

                        // Updating App Status
                        _appStatus.Add
                        (
                            appData.Url,
                            new AppStatusModel()
                        {
                            appId   = appData.Url.Replace(Consts.PLAY_STORE_PREFIX, String.Empty),
                            appUrl  = appData.Url,
                            appName = appData.Name,
                            status  = "Updated"
                        }
                        );
                    }
                    else
                    {
                        Console.WriteLine("\t\t.No Previous Version of the App Found. Adding to Database");
                        mongoDB.Insert <AppModel> (appData);

                        // Updating App Status
                        _appStatus.Add
                        (
                            appData.Url,
                            new AppStatusModel()
                        {
                            appId   = appData.Url.Replace(Consts.PLAY_STORE_PREFIX, String.Empty),
                            appUrl  = appData.Url,
                            appName = appData.Name,
                            status  = "Inserted"
                        }
                        );
                    }
                }
            }

Reviews:
            // Next Phase: Parse Reviews of those Apps
            Console.WriteLine("\n => Parsing Complete. Obtaining Reviews");

            // Iterating again over app urls to parse the reviews from this app
            foreach (string appUrl in File.ReadAllLines(inputFile))
            {
                // Reaching App Id
                string appID = _appStatus[appUrl].appId;

                // Reviews-Break-Parsing Flag
                bool shouldContinueParsing = true;

                // Parsing Review Pages from the apps
                for (int currentPage = 1; /* no stop condition */; currentPage++)
                {
                    // Getting Reviews Data Bundle
                    string reviewsData = ReviewsWrapper.GetAppReviews(appID, currentPage);

                    // Checking for Blocking Situation
                    if (String.IsNullOrEmpty(reviewsData))
                    {
                        Console.WriteLine("Blocked by Play Store. Sleeping process for 10 minutes before retrying.");

                        // Thread Wait for 10 Minutes
                        Thread.Sleep(10 * 60 * 1000);
                    }

                    // Checking for "No Reviews" app
                    if (reviewsData.Length < 50)
                    {
                        Console.WriteLine("No Reviews left for this app. Skipping");
                        break;
                    }

                    // Normalizing Response to Proper HTML
                    reviewsData = ReviewsWrapper.NormalizeResponse(reviewsData);

                    // Iterating over Parsed Reviews
                    foreach (var review in dataParser.ParseReviews(reviewsData))
                    {
                        // Adding App Data to the review
                        review.appID   = _appStatus[appUrl].appId;
                        review.appName = _appStatus[appUrl].appName;
                        review.appURL  = _appStatus[appUrl].appUrl;

                        // Incrementing Reviews Count for this app
                        _appStatus[appUrl].reviews++;

                        // Adding Review Object to Database
                        review.timestamp = DateTime.Now;

                        // Building Query to check for duplicated review
                        var duplicatedReviewQuery = Query.EQ("permalink", review.permalink);

                        // Checking for duplicated review before inserting it
                        if (mongoDB.FindMatch <AppReview> (duplicatedReviewQuery, 1, 0, Consts.REVIEWS_COLLECTION).Count() == 0)
                        {
                            // Inserting Review into MongoDB
                            mongoDB.Insert <AppReview> (review, Consts.REVIEWS_COLLECTION);
                        }
                        else
                        {
                            Console.WriteLine("Duplicated Review. Skipping App");

                            // When this happens, there are no more reviews to be parsed
                            shouldContinueParsing = false; // Skipping this apps processing
                        }
                    }

                    // Hiccup to avoid Blocking problems
                    Console.WriteLine("Parsed Reviews: " + _appStatus[appUrl].reviews);
                    Thread.Sleep(new Random().Next(14000, 21000));

                    if (!shouldContinueParsing)
                    {
                        break;
                    }
                }
            }

PeopleData:

            Console.WriteLine("\n\n => Processing People Data");

            Console.WriteLine("\nSimulating Google Login Using Selenium.");
            using (var firefoxDriver = new FirefoxDriver())
            {
                // Navigating to Dummy Url - One that I Know that well be asked for a login
                firefoxDriver.Navigate().GoToUrl("https://play.google.com/store/people/details?id=101242565951396343093");

                // Reaching Login Fields
                var loginField    = firefoxDriver.FindElementById("Email");
                var passwordField = firefoxDriver.FindElementById("Passwd");
                var btnSignIn     = firefoxDriver.FindElementById("signIn");

                // Sending Credentials to the browser
                loginField.SendKeys("YOUREMAIL");
                passwordField.SendKeys("YOURPASSWORD");
                btnSignIn.Click();

                string lastPeople     = "https://play.google.com/store/people/details?id=115037241907660526856";
                bool   shouldcontinue = false;

                // Processing Reviewers Data
                foreach (string peopleUrl in mongoDB.FindPeopleUrls())
                {
                    // Skipping until last link
                    if (peopleUrl == lastPeople)
                    {
                        shouldcontinue = true;
                    }

                    if (!shouldcontinue)
                    {
                        continue;
                    }

                    // Navigating To the Reviewer Page
                    firefoxDriver.Navigate().GoToUrl(peopleUrl);

                    // Executing Get Request for the Reviewer page on Google Play
                    string reviewerPage = firefoxDriver.PageSource;

                    // Extracting Reviewer Data from the Page
                    ReviewerPageData reviewerData = dataParser.ParsePeopleData(reviewerPage);

                    // Adding Url to the model
                    reviewerData.reviewerUrl = peopleUrl;

                    // Inserting it to the database - If no previous record of this Reviewer is found
                    if (!mongoDB.IsReviewerOnDatabase(peopleUrl))
                    {
                        mongoDB.Insert <ReviewerPageData> (reviewerData, "ReviewersData");
                    }
                }
            }

            // End of Processing + Console Feedback
            Console.WriteLine("\n\n == Processing Summary ==");

            foreach (var status in _appStatus.Select(t => t.Value))
            {
                // Message
                string cMessage = "=> App : {0} - Status {1} - Reviews : {2}";

                Console.WriteLine(String.Format(cMessage, status.appName, status.status, status.reviews));
            }

            Console.ReadLine();
        }
コード例 #23
0
        /// <summary>
        /// Executes a Search using the searchField as the search parameter,
        /// paginates / scrolls the search results to the end adding all the url of apps
        /// it finds to a Mongo "QUEUE" collection
        /// </summary>
        /// <param name="searchField"></param>
        private static void CrawlStore(string searchField, bool shouldUseProxies)
        {
            // Console Feedback
            _logger.Warn("Crawling Search Term : [ " + searchField + " ]");

            // Hashset of urls used to keep track of what's been parsed already
            HashSet <String> foundUrls = new HashSet <String> ();

            // Control variable to avoid "Loop" on pagging
            bool isDonePagging = false;

            // Compiling Regular Expression used to parse the "pagToken" out of the Play Store
            Regex pagTokenRegex = new Regex(@"GAEi+.+\:S\:.{11}\\42", RegexOptions.Compiled);

            // HTML Response
            string response;

            // MongoDB Helper
            // Configuring MongoDB Wrapper
            MongoDBWrapper mongoDB           = new MongoDBWrapper();
            string         fullServerAddress = String.Join(":", Consts.MONGO_SERVER, Consts.MONGO_PORT);

            mongoDB.ConfigureDatabase(Consts.MONGO_USER, Consts.MONGO_PASS, Consts.MONGO_AUTH_DB, fullServerAddress, Consts.MONGO_TIMEOUT, Consts.MONGO_DATABASE, Consts.MONGO_COLLECTION);

            // Ensuring the database has the proper indexe
            mongoDB.EnsureIndex("Url");

            // Response Parser
            PlayStoreParser parser = new PlayStoreParser();

            // Executing Web Requests
            using (WebRequests server = new WebRequests())
            {
                // Creating Request Object
                server.Headers.Add(Consts.ACCEPT_LANGUAGE);
                server.Host      = Consts.HOST;
                server.UserAgent = Consts.GITHUBURL;
                server.Encoding  = "utf-8";

                // Checking for the need to use "HTTP Proxies"
                if (shouldUseProxies)
                {
                    server.Proxy = ProxiesLoader.GetWebProxy();
                }

                // Executing Initial Request
                response = server.Post(String.Format(Consts.CRAWL_URL, searchField), Consts.INITIAL_POST_DATA);

                // Parsing Links out of Html Page (Initial Request)
                foreach (string url in parser.ParseAppUrls(response))
                {
                    // Checks whether the app have been already processed
                    // or is queued to be processed
                    foundUrls.Add(url);
                    if ((!mongoDB.AppProcessed(Consts.APP_URL_PREFIX + url)) && (!mongoDB.AppQueued(url)))
                    {
                        // Than, queue it :)
                        mongoDB.AddToQueue(url);
                        Thread.Sleep(250);  // Hiccup
                    }
                }

                // Executing Requests for more Play Store Links
                int initialSkip       = 48;
                int currentMultiplier = 1;
                int errorsCount       = 0;
                do
                {
                    // Finding pagToken from HTML
                    var rgxMatch = pagTokenRegex.Match(response);

                    // If there's no match, skips it
                    if (!rgxMatch.Success)
                    {
                        break;
                    }

                    // Reading Match from Regex, and applying needed replacements
                    string pagToken = rgxMatch.Value.Replace(":S:", "%3AS%3A").Replace("\\42", String.Empty).Replace(@"\\u003d", String.Empty);

                    // Assembling new PostData with paging values
                    string postData = String.Format(Consts.POST_DATA, pagToken);

                    // Executing request for values
                    response = server.Post(String.Format(Consts.CRAWL_URL, searchField), postData);

                    // Checking Server Status
                    if (server.StatusCode != System.Net.HttpStatusCode.OK)
                    {
                        _logger.Error("Http Error" + " - Status Code [ " + server.StatusCode + " ]");
                        errorsCount++;
                        continue;
                    }

                    // Parsing Links
                    foreach (string url in parser.ParseAppUrls(response))
                    {
                        if (foundUrls.Contains(url))
                        {
                            isDonePagging = true;
                            break;
                        }
                        // Checks whether the app have been already processed
                        foundUrls.Add(url);
                        if ((!mongoDB.AppProcessed(Consts.APP_URL_PREFIX + url)) && (!mongoDB.AppQueued(url)))
                        {
                            // Than, queue it :)
                            mongoDB.AddToQueue(url);
                            Thread.Sleep(250);  // Hiccup
                        }
                    }

                    // Incrementing Paging Multiplier
                    currentMultiplier++;
                }  while (!isDonePagging && parser.AnyResultFound(response) && errorsCount <= Consts.MAX_REQUEST_ERRORS);
            }
        }
コード例 #24
0
        static void Main(string[] args)
        {
            // Creating needed Instances
            _logger = new LogWrapper();

            // Loading Configuration
            _logger.LogMessage("Loading Configurations from App.config");
            LoadConfiguration();

            // Initializing Queue
            _logger.LogMessage("Initializing Queue");
            AWSSQSHelper appsDataQueue = new AWSSQSHelper(_appsDataQueueName, _maxMessagesPerDequeue, _awsKey, _awsKeySecret);

            // Creating MongoDB Instance
            _logger.LogMessage("Loading MongoDB / Creating Instances");

            MongoDBWrapper mongoDB    = new MongoDBWrapper();
            string         serverAddr = String.Join(":", Consts.MONGO_SERVER, Consts.MONGO_PORT);

            mongoDB.ConfigureDatabase(Consts.MONGO_USER, Consts.MONGO_PASS, Consts.MONGO_AUTH_DB, serverAddr, 10000, Consts.MONGO_DATABASE, Consts.MONGO_COLLECTION);

            // Setting Error Flag to No Error ( 0 )
            System.Environment.ExitCode = 0;

            // Initialiazing Control Variables
            int fallbackWaitTime = 1;

            _logger.LogMessage("Started Processing App Urls");

            do
            {
                try
                {
                    // Dequeueing messages from the Queue
                    if (!appsDataQueue.DeQueueMessages())
                    {
                        Thread.Sleep(_hiccupTime);  // Hiccup
                        continue;
                    }

                    // Checking for no message received, and false positives situations
                    if (!appsDataQueue.AnyMessageReceived())
                    {
                        // If no message was found, increases the wait time
                        int waitTime;
                        if (fallbackWaitTime <= 12)
                        {
                            // Exponential increase on the wait time, truncated after 12 retries
                            waitTime = Convert.ToInt32(Math.Pow(2, fallbackWaitTime) * 1000);
                        }
                        else // Reseting Wait after 12 fallbacks
                        {
                            waitTime         = 2000;
                            fallbackWaitTime = 0;
                        }

                        fallbackWaitTime++;

                        // Sleeping before next try
                        Console.WriteLine("Fallback (seconds) => " + waitTime);
                        Thread.Sleep(waitTime);
                        continue;
                    }

                    // Reseting fallback time
                    fallbackWaitTime = 1;

                    // Iterating over dequeued Messages
                    foreach (var appDataMessage in appsDataQueue.GetDequeuedMessages())
                    {
                        try
                        {
                            // Deserializing message
                            var appData = AppleStoreAppModel.FromJson(appDataMessage.Body);

                            // Checking for duplicates
                            if (!mongoDB.IsAppOnDatabase <AppleStoreAppModel> (appData.url))
                            {
                                // Recording App Data
                                mongoDB.Insert <AppleStoreAppModel> (appData);
                            }
                        }
                        catch (Exception ex)
                        {
                            _logger.LogMessage(ex.Message, "App Recording", BDC.BDCCommons.TLogEventLevel.Error);
                        }
                        finally
                        {
                            // Deleting the message
                            appsDataQueue.DeleteMessage(appDataMessage);
                        }
                    }
                }
                catch (Exception ex)
                {
                    _logger.LogMessage(ex);
                }
            } while (true);
        }
コード例 #25
0
        static void Main(string[] args)
        {
            // Loading Keen.IO Keys and Misc. from Config File
            _keenIOProjectID = ConfigurationManager.AppSettings["keenIOProjectID"];
            _keenIOMasterKey = ConfigurationManager.AppSettings["keenIOMasterKey"];
            _keenIOWriteKey  = ConfigurationManager.AppSettings["keenIOWriteKey"];
            _keenIOReadKey   = ConfigurationManager.AppSettings["keenIOReadKey"];
            _bucketName      = ConfigurationManager.AppSettings["keenIOBucketName"];

            // Configuring MongoDB Wrapper for connection and queries
            MongoDBWrapper mongoDB           = new MongoDBWrapper();
            string         fullServerAddress = String.Join(":", Consts.MONGO_SERVER, Consts.MONGO_PORT);

            mongoDB.ConfigureDatabase(Consts.MONGO_USER, Consts.MONGO_PASS, Consts.MONGO_AUTH_DB, fullServerAddress, Consts.MONGO_TIMEOUT, Consts.MONGO_DATABASE, Consts.MONGO_COLLECTION);

            // Creating Keen.IO Variables
            var projectSettings = new ProjectSettingsProvider(_keenIOProjectID, _keenIOMasterKey, _keenIOWriteKey, _keenIOReadKey);
            var keenClient      = new KeenClient(projectSettings);

            var  eventsToSend   = new List <AppModel>();
            long totalProcessed = 0;
            long totalSent      = 0;

            DateTime start = DateTime.Now;

            // From This point on, you can change your code to reflect your own "Reading" logic
            // What I've done is simply read the records from the MongoDB database and Upload them to Keen.IO

            // if(args.Length != 0 && args[0] == "reset")
            {
                int count = 0;

                foreach (var currentApp in mongoDB.FindMatch <AppModel>(Query.NE("Uploaded", true)))
                {
                    mongoDB.SetUpdated(currentApp.Url, false);
                    ++count;

                    if ((count % 100) == 0)
                    {
                        Console.WriteLine("Reset update for {0}", count);
                    }
                }
            }

            foreach (var currentApp in mongoDB.FindMatch <AppModel> (Query.NE("Uploaded", true)))
            {
                if (eventsToSend.Count < 1000)
                {
                    eventsToSend.Add(currentApp);
                    continue;
                }

                var sent = SendEventsToKeep(keenClient, eventsToSend, mongoDB);

                totalProcessed += eventsToSend.Count;
                totalSent      += sent;

                Console.WriteLine("processed {0} events took {1}: ({2} events per sec)", totalProcessed, DateTime.Now - start, ((double)totalProcessed) / (DateTime.Now - start).TotalSeconds);

                eventsToSend.Clear();
            }

            {
                var sent = SendEventsToKeep(keenClient, eventsToSend, mongoDB);
                totalProcessed += eventsToSend.Count;
                Console.WriteLine("processed {0} events took {1}: ({2} events per sec)", totalProcessed, DateTime.Now - start, ((double)totalProcessed) / (DateTime.Now - start).TotalSeconds);
            }

            if (totalProcessed != totalSent)
            {
                totalProcessed = 0;
                totalSent      = 0;

                foreach (var currentApp in mongoDB.FindMatch <AppModel>(Query.NE("Uploaded", true)))
                {
                    if (eventsToSend.Count < 1)
                    {
                        eventsToSend.Add(currentApp);
                        continue;
                    }

                    var sent = SendEventsToKeep(keenClient, eventsToSend, mongoDB);

                    totalProcessed += eventsToSend.Count;
                    totalSent      += sent;

                    Console.WriteLine("processed {0} events took {1}: ({2} events per sec)", totalProcessed, DateTime.Now - start, ((double)totalProcessed) / (DateTime.Now - start).TotalSeconds);

                    eventsToSend.Clear();
                }

                {
                    var sent = SendEventsToKeep(keenClient, eventsToSend, mongoDB);
                    totalProcessed += eventsToSend.Count;
                    Console.WriteLine("processed {0} events took {1}: ({2} events per sec)", totalProcessed, DateTime.Now - start, ((double)totalProcessed) / (DateTime.Now - start).TotalSeconds);
                }
            }
        }
コード例 #26
0
        static void Main(string[] args)
        {
            // Configuring Log Object
            Logger logger = LogManager.GetCurrentClassLogger();

            // Parsing Arguments
            logger.Info("Checking for Arguments");

            if (args == null || args.Length != 3)
            {
                logger.Fatal("Arguments Fatal", "Incorrect number of arguments received. Try passing two.");
                return; // Halts.
            }

            logger.Info("Reading Arguments");

            // Reading actual arguments received
            _arguments.Add("AppsToProcess", Int32.Parse(args[0]));
            _arguments.Add("ReviewsPagePerApp", Int32.Parse(args[1]));
            _arguments.Add("AppsToSkip", Int32.Parse(args[2]));

            // Building MongoDB Query - This query specifies which applications you want to parse out the reviews
            // For more regarding MongoDB Queries, check the documentation on the project wiki page
            //var mongoQuery = Query.EQ ("Instalations", "1,000,000 - 5,000,000");
            var mongoQuery = Query.EQ("Category", "/store/apps/category/EDUCATION");

            logger.Info("Configuring MonboDB Client");

            // Creating instance of Mongo Handler for the main collection
            MongoDBWrapper mongoClient       = new MongoDBWrapper();
            string         fullServerAddress = String.Join(":", Consts.MONGO_SERVER, Consts.MONGO_PORT);

            mongoClient.ConfigureDatabase(Consts.MONGO_USER, Consts.MONGO_PASS, Consts.MONGO_AUTH_DB, fullServerAddress, Consts.MONGO_TIMEOUT, Consts.MONGO_DATABASE, Consts.MONGO_COLLECTION);

            logger.Info("Iterating over Apps");

            // Creating Play Store Parser
            PlayStoreParser parser = new PlayStoreParser();

            // Iterating over Query Results for the App Ids
            foreach (var appRecord in mongoClient.FindMatch <AppModel>(mongoQuery, _arguments["AppsToProcess"], _arguments["AppsToSkip"]))
            {
                // Extracting app ID from URL
                string appId = appRecord.Url.Replace(Consts.PLAY_STORE_PREFIX, String.Empty);

                // Console Feedback
                logger.Info("Processing App [ " + appRecord.Name + " ] ");

                bool shouldSkipApp = false;

                // Iterating over Review Pages up to the max received as argument
                for (int currentPage = 1; currentPage <= _arguments["ReviewsPagePerApp"]; currentPage++)
                {
                    // Checking for the need to skip this app in case of duplicated review
                    if (shouldSkipApp)
                    {
                        break;
                    }

                    try
                    {
                        // Page Feedback
                        logger.Info("\tCurrent Page: " + currentPage);

                        // Issuing Request for Reviews
                        string response = ReviewsWrapper.GetAppReviews(appId, currentPage);

                        // Checking for Blocking Situation
                        if (String.IsNullOrEmpty(response))
                        {
                            logger.Info("Blocked by Play Store. Sleeping process for 10 minutes before retrying.");

                            // Thread Wait for 10 Minutes
                            Thread.Sleep(10 * 60 * 1000);
                        }

                        // Checking for "No Reviews" app
                        if (response.Length < 50)
                        {
                            logger.Info("No Reviews for this app. Skipping");
                            break;
                        }

                        // Normalizing Response to Proper HTML
                        response = ReviewsWrapper.NormalizeResponse(response);

                        // Iterating over Parsed Reviews
                        foreach (var review in parser.ParseReviews(response))
                        {
                            // Adding App Data to the review
                            review.appID   = appId;
                            review.appName = appRecord.Name;
                            review.appURL  = appRecord.Url;

                            // Adding processing timestamp to the model
                            review.timestamp = DateTime.Now;

                            // Building Query to check for duplicated review
                            var duplicatedReviewQuery = Query.EQ("permalink", review.permalink);

                            // Checking for duplicated review before inserting it
                            if (mongoClient.FindMatch <AppReview>(duplicatedReviewQuery, 1, 0, Consts.REVIEWS_COLLECTION).Count() == 0)
                            {
                                // Inserting Review into MongoDB
                                mongoClient.Insert <AppReview>(review, Consts.REVIEWS_COLLECTION);
                            }
                            else
                            {
                                logger.Info("Duplicated Review", "Review already parsed. Skipping App");
                                //shouldSkipApp = true;
                                //break;
                            }
                        }
                    }
                    catch (Exception ex)
                    {
                        logger.Error(ex);
                    }
                }
            }
        }
コード例 #27
0
ファイル: Recorder.cs プロジェクト: W1N3/Apple-Store-Crawler
        static void Main (string[] args)
        {
            // Loading Configuration
            LogSetup.InitializeLog ("Apple_Store_Recorder.log", "info");
            _logger = LogManager.GetCurrentClassLogger ();

            // Loading Config
            _logger.Info ("Loading Configurations from App.config");
            LoadConfiguration ();

            // Initializing Queue
            _logger.Info ("Initializing Queue");
            AWSSQSHelper appsDataQueue = new AWSSQSHelper (_appsDataQueueName, _maxMessagesPerDequeue, _awsKey, _awsKeySecret);

            // Creating MongoDB Instance
            _logger.Info ("Loading MongoDB / Creating Instances");

            MongoDBWrapper mongoDB = new MongoDBWrapper ();
            string serverAddr      = String.Join (":", Consts.MONGO_SERVER, Consts.MONGO_PORT);
            mongoDB.ConfigureDatabase (Consts.MONGO_USER, Consts.MONGO_PASS, Consts.MONGO_AUTH_DB, serverAddr, 10000, Consts.MONGO_DATABASE, Consts.MONGO_COLLECTION);

            // Setting Error Flag to No Error ( 0 )
            System.Environment.ExitCode = 0;

            // Initialiazing Control Variables
            int fallbackWaitTime = 1;

            // Buffer of Messages to be recorder
            List<AppleStoreAppModel> recordsBuffer  = new List<AppleStoreAppModel> ();
            List<Message>            messagesBuffer = new List<Message> ();

            // Insert Batch Size
            int batchSize = 1000;

            _logger.Info ("Started Recording App Data");

            do
            {
                try
                {
                    // Dequeueing messages from the Queue
                    if (!appsDataQueue.DeQueueMessages ())
                    {
                        Thread.Sleep (_hiccupTime); // Hiccup                   
                        continue;
                    }

                    // Checking for no message received, and false positives situations
                    if (!appsDataQueue.AnyMessageReceived ())
                    {
                        // If no message was found, increases the wait time
                        int waitTime;
                        if (fallbackWaitTime <= 12)
                        {
                            // Exponential increase on the wait time, truncated after 12 retries
                            waitTime = Convert.ToInt32 (Math.Pow (2, fallbackWaitTime) * 1000);
                        }
                        else // Reseting Wait after 12 fallbacks
                        {
                            waitTime         = 2000;
                            fallbackWaitTime = 0;
                        }

                        fallbackWaitTime++;

                        // Sleeping before next try
                        Console.WriteLine ("Fallback (seconds) => " + waitTime);
                        Thread.Sleep (waitTime);
                        continue;
                    }

                    // Reseting fallback time
                    fallbackWaitTime = 1;

                    // Iterating over dequeued Messages
                    foreach (var appDataMessage in appsDataQueue.GetDequeuedMessages ())
                    {
                        try
                        {
                            // Deserializing message
                            var appData = AppleStoreAppModel.FromJson (appDataMessage.Body);

                            // Dumping "Url" to "_id"
                            appData._id = appData.url;
                                                        
                            // Adding it to the buffer of records to be recorded
                            recordsBuffer.Add (appData);

                            // Adding message to the buffer of messages to be deleted
                            messagesBuffer.Add (appDataMessage);

                            // Is it time to batch insert ?
                            if ((recordsBuffer.Count % batchSize) == 0)
                            {
                                // Batch Insertion
                                mongoDB.BatchInsert<AppleStoreAppModel> (recordsBuffer);

                                // Logging Feedback
                                _logger.Info ("\tApps Recorded : " + recordsBuffer.Count);

                                // Deleting Messages
                                messagesBuffer.ForEach ( (msg) => appsDataQueue.DeleteMessage (msg));

                                _logger.Info ("\tMessages Deleted: " + messagesBuffer.Count);

                                // Clearing Buffers
                                recordsBuffer.Clear ();
                                messagesBuffer.Clear ();
                            }
                        }
                        catch (Exception ex)
                        {
                            _logger.Error (ex);
                        }
                        finally
                        {
                            // Deleting the message
                            appsDataQueue.DeleteMessage (appDataMessage);
                        }
                    }
                }
                catch (Exception ex)
                {
                    _logger.Error (ex);
                }

            } while (true);
        }
コード例 #28
0
 public void Dispose()
 {
     _mongoDB = null;
 }
コード例 #29
0
        ///  *** READ THIS BEFORE YOU START. ***
        ///  *** I MEAN IT, PLEASE, READ IT  ***
        /// 
        ///  This exporting helper will download ALL THE APPS found on the database, and
        ///  dump it to a CSV file (with headers).
        ///  
        ///  Note that, since the database is Hosted on AWS, i will PAY (for the internet outbound traffic) if you execute a full database export,
        ///  so, if you are going to execute a full export, please, get in touch with me before running this project, or send me a donation
        ///  via paypal on [email protected]
        ///  
        ///  Also, be nice with the database.
        ///  
        ///  ** END OF WARNING ***
        static void Main(string[] args)
        {
            // Logs Counter
            int processedApps = 0;

            // Configuring Log Object Threshold
            LogWriter.Threshold = TLogEventLevel.Information;

            // Overriding LogWriter Event
            LogWriter.LogEvent += LogWriter_LogEvent;

            LogWriter.Info("Checking Arguments");

            // Periodic Log Timer
            Timer loggingThread = new Timer((TimerCallback) =>
            {

                LogWriter.Info ("Processed Apps: " + processedApps);

            }, null, 10000, 10000);

            // Validating Arguments
            if (!ValidateArgs (args))
            {
                LogWriter.Fatal ("Invalid Args", "Args must have 1 element");
                return;
            }

            LogWriter.Info("Checking Write Permissions on output Path");
            // Validating Write Permissions on output path
            if (!ValidateFilePermissions (args[0]))
            {
                LogWriter.Fatal("Insuficient Permissions", "Cannot write on path : " + args[0]);
                return;
            }

            // Configuring MongoDB Wrapper
            MongoDBWrapper mongoDB = new MongoDBWrapper();
            string fullServerAddress = String.Join(":", Consts.MONGO_SERVER, Consts.MONGO_PORT);
            mongoDB.ConfigureDatabase(Consts.MONGO_USER, Consts.MONGO_PASS, Consts.MONGO_AUTH_DB, fullServerAddress, Consts.MONGO_TIMEOUT, Consts.MONGO_DATABASE, Consts.MONGO_COLLECTION);

            // Opening Output Stream
            using (StreamWriter sWriter = new StreamWriter (args[0], true, Encoding.GetEncoding("ISO-8859-1")))
            {
                // Auto Flush Content
                sWriter.AutoFlush = true;

                // Writing Headers
                String headersLine = "_id,Url,Name,Developer,IsTopDeveloper,DeveloperURL,PublicationDate,"
                                   + "Category,IsFree,Price,Reviewers,CoverImgUrl,Description,Score.Total,Score.Count,Score.FiveStars,"
                                   + "Score.FourStars,Score.ThreeStars,Score.TwoStars,Score.OneStars,LastUpdateDate"
                                   + "AppSize,Instalations,CurrentVersion,MinimumOSVersion,ContentRating,HaveInAppPurchases,DeveloperEmail,DeveloperWebsite,DeveloperPrivacyPolicy";

                sWriter.WriteLine (headersLine);

                // Reading all apps from the database
                foreach (AppModel app in mongoDB.FindAll<AppModel>())
                {
                    try
                    {
                        // Writing line to File
                        sWriter.WriteLine (app.ToString ());
                        processedApps++;
                    }
                    catch (Exception ex)
                    {
                        LogWriter.Error (ex);
                    }
                }
            }

            // Logging end of the Process
            LogWriter.Info ("Finished Exporting Database");

            // Removing Event
            LogWriter.LogEvent -= LogWriter_LogEvent;
        }
コード例 #30
0
        public void ProcessRequest(HttpContext context)
        {
            context.Response.ContentType = "text/plain";
            try
            {
                foreach (String key in context.Request.QueryString.AllKeys)
                {
                    switch (key.ToLower())
                    {
                    case "localnoid":
                        _localNoID = context.Request.QueryString[key];
                        break;

                    case "fieldname":
                        _confirmFieldName = context.Request.QueryString[key];
                        break;

                    case "confirmreponse":
                        _confirmReponse = context.Request.QueryString[key];
                        break;

                    case "computername":
                        _computerName = context.Request.QueryString[key];
                        break;

                    case "clinicarea":
                        _clinicArea = context.Request.QueryString[key];
                        break;
                    }
                }
                MongoDBWrapper dbwrapper      = new MongoDBWrapper(NoIDMongoDBAddress, SparkMongoDBAddress);
                FhirClient     client         = new FhirClient(sparkEndpointAddress);
                string         sparkReference = dbwrapper.GetSparkID(_localNoID);
                string         sparkAddress   = sparkEndpointAddress.ToString() + "/Patient/" + sparkReference;
                Patient        pendingPatient = (Patient)client.Get(sparkAddress);
                if (pendingPatient != null)
                {
                    if (_confirmFieldName == "birthdate")
                    {
                        if (pendingPatient.BirthDate != null && _confirmReponse == pendingPatient.BirthDate)
                        {
                            SessionQueue seq = Utilities.PatientToSessionQueue(pendingPatient, sparkReference, _localNoID, "return", "pending");
                            seq.SubmitDate          = DateTime.UtcNow;
                            seq._id                 = StringUtilities.SHA256(DomainName + Guid.NewGuid().ToString() + NodeSalt);
                            seq.SessionComputerName = _computerName;
                            seq.ClinicArea          = _clinicArea;
                            dbwrapper.AddPendingPatient(seq);
                            context.Response.Write("yes");
                        }
                        else
                        {
                            context.Response.Write("no");
                        }
                    }
                    else if (_confirmFieldName == "lastname")
                    {
                        //TODO: implement lastname, use metaphone or just accept exact matches?
                        context.Response.Write("Error occurred.  " + _confirmFieldName + " is not implemented yet!");
                    }
                    else if (_confirmFieldName == "firstname")
                    {
                        //TODO: implement firstname, use root or just accept exact matches?
                        context.Response.Write("Error occurred.  " + _confirmFieldName + " is not implemented yet!");
                    }
                    else if (_confirmFieldName == "failedchallenge")
                    {
                        SessionQueue seq = Utilities.PatientToSessionQueue(pendingPatient, sparkReference, _localNoID, "return**", "pending");
                        seq.SubmitDate          = DateTime.UtcNow;
                        seq._id                 = StringUtilities.SHA256(DomainName + Guid.NewGuid().ToString() + NodeSalt);
                        seq.SessionComputerName = _computerName;
                        seq.ClinicArea          = _clinicArea;
                        dbwrapper.AddPendingPatient(seq);
                        context.Response.Write("yes");
                    }
                }
            }
            catch (Exception ex)
            {
                context.Response.Write("no. Error occured for LocalNoID = " + _localNoID + ".  UpdatePendingStatus::ProcessRequest: " + ex.Message);
            }
            context.Response.End();
        }
コード例 #31
0
        static void Main(string[] args)
        {
            // Configuring Log Object
            Logger logger = LogManager.GetCurrentClassLogger();

            // Control Variable (Bool - Should the process use proxies? )
            bool isUsingProxies = false;

            // Checking for the need to use proxies
            if (args != null && args.Length == 1)
            {
                // Setting flag to true
                isUsingProxies = true;

                // Loading proxies from .txt received as argument
                String fPath = args[0];

                // Sanity Check
                if (!File.Exists(fPath))
                {
                    logger.Fatal("Couldnt find proxies on path : " + fPath);
                    System.Environment.Exit(-100);
                }

                // Reading Proxies from File
                string[] fLines = File.ReadAllLines(fPath, Encoding.GetEncoding("UTF-8"));

                try
                {
                    // Actual Load of Proxies
                    ProxiesLoader.Load(fLines.ToList());
                }
                catch (Exception ex)
                {
                    logger.Fatal(ex);
                    System.Environment.Exit(-101);
                }
            }

            // MongoDB instance Creation
            logger.Info("Configuring MonboDB Client");

            // Creating instance of Mongo Handler for the main collection
            MongoDBWrapper mongoClient       = new MongoDBWrapper();
            string         fullServerAddress = String.Join(":", Consts.MONGO_SERVER, Consts.MONGO_PORT);

            mongoClient.ConfigureDatabase(Consts.MONGO_USER, Consts.MONGO_PASS, Consts.MONGO_AUTH_DB, fullServerAddress, Consts.MONGO_TIMEOUT, Consts.MONGO_DATABASE, Consts.MONGO_COLLECTION);

            logger.Info("Iterating over Apps");

            // Creating Play Store Parser
            PlayStoreParser parser = new PlayStoreParser();

            // App Model
            AppModel appRecord;

            // Control Variable
            bool noError = true;

            // Finding all the "Apps" that didn't have the reviews visited yet
            while ((appRecord = mongoClient.FindAndModifyReviews()) != null)
            {
                // Extracting app ID from URL
                string appId = appRecord.Url.Replace(Consts.PLAY_STORE_PREFIX, String.Empty);

                // Console Feedback
                logger.Info("Processing App [ " + appRecord.Name + " ] ");

                try
                {
                    // Console Feedback
                    Console.Write("Reviews from : " + appRecord.Name);

                    // Issuing Request for Reviews
                    string response = ReviewsWrapper.GetAppReviews(appId, 1, isUsingProxies);

                    // Checking for Blocking Situation
                    if (String.IsNullOrEmpty(response))
                    {
                        logger.Info("Blocked by Play Store. Sleeping process for 10 minutes before retrying.");

                        // Thread Wait for 10 seconds
                        Thread.Sleep(TimeSpan.FromSeconds(10));
                    }

                    // Checking for "No Reviews" app
                    if (response.Length < 50)
                    {
                        logger.Info("No Reviews for this app. Skipping");
                        Console.Write(" - No Reviews Found\n");
                        continue;
                    }

                    // Normalizing Response to Proper HTML
                    response = ReviewsWrapper.NormalizeResponse(response);

                    // List of Reviews
                    List <AppReview> reviews = new List <AppReview> ();

                    // Iterating over Parsed Reviews
                    foreach (var review in parser.ParseReviews(response))
                    {
                        // Adding App Data to the review
                        review.appID   = appId;
                        review.appName = appRecord.Name;
                        review.appURL  = appRecord.Url;

                        // Capture Timestamp to the model
                        review.timestamp = DateTime.Now;

                        // Adding reviews to the current list
                        reviews.Add(review);
                    }

                    // Any Review Found ?
                    if (reviews.Count > 0)
                    {
                        Console.Write(" - " + reviews.Count + " Reviews Found\n");

                        // Checking if there was any previous list of reviews
                        if (appRecord.Reviews == null)
                        {
                            appRecord.Reviews = reviews;
                        }
                        else // Previous List found - Appending only the new ones
                        {
                            foreach (var review in reviews)
                            {
                                if (!appRecord.Reviews.Any(t => t.permalink.Equals(review.permalink)))
                                {
                                    appRecord.Reviews.Add(review);
                                }
                            }
                        }
                    }
                }
                catch (Exception ex)
                {
                    logger.Error(ex);
                    Console.ForegroundColor = ConsoleColor.Red;
                    Console.WriteLine("Error : " + ex.Message);
                    Console.ForegroundColor = ConsoleColor.White;
                    noError = false;
                }
                finally
                {
                    // Toggling back the "ReviewsStatus" attribute from the model
                    if (noError)
                    {
                        appRecord.ReviewsStatus = "Visited";
                        mongoClient.SaveRecord <AppModel> (appRecord);
                    }
                    else // "Error" status
                    {
                        appRecord.ReviewsStatus = "Error";
                        mongoClient.SaveRecord <AppModel> (appRecord);
                    }
                }
            }
        }
コード例 #32
0
        private static int SendEventsToKeep(Keen.Core.KeenClient keenClient, List<AppModel> eventsToSend, MongoDBWrapper mongoDB)
        {
            try
            {
                // Adding Event to Keen.IO
                keenClient.AddEvents("PlayStore2014", eventsToSend);

                // Incrementing Counter
                _appsCounter += eventsToSend.Count;

                // Console feedback Every 100 Processed Apps
                if (_appsCounter % 100 == 0)
                {
                    Console.WriteLine("Uploaded : " + _appsCounter);
                }

                foreach (var e in eventsToSend)
                {
                    mongoDB.SetUpdated(e.Url);
                }

                return eventsToSend.Count;
            }
            catch (Exception ex)
            {
                Console.WriteLine("\n\t" + ex.Message);
            }

            return 0;
        }
コード例 #33
0
ファイル: Crawler.cs プロジェクト: W1N3/GooglePlayAppsCrawler
        /// <summary>
        /// Executes a Search using the searchField as the search parameter, 
        /// paginates / scrolls the search results to the end adding all the url of apps
        /// it finds to a Mongo "QUEUE" collection
        /// </summary>
        /// <param name="searchField"></param>
        private static void CrawlStore (string searchField, bool shouldUseProxies)
        {
            // Console Feedback
			_logger.Warn ("Crawling Search Term : [ " + searchField + " ]");

			// Hashset of urls used to keep track of what's been parsed already
			HashSet<String> foundUrls = new HashSet<String> ();

			// Control variable to avoid "Loop" on pagging
			bool isDonePagging = false;

            // Compiling Regular Expression used to parse the "pagToken" out of the Play Store
            Regex pagTokenRegex = new Regex (@"GAEi+.+\:S\:.{11}\\42", RegexOptions.Compiled);

            // HTML Response
            string response;

            // MongoDB Helper
            // Configuring MongoDB Wrapper
            MongoDBWrapper mongoDB   = new MongoDBWrapper ();
            string fullServerAddress = String.Join (":", Consts.MONGO_SERVER, Consts.MONGO_PORT);
            mongoDB.ConfigureDatabase (Consts.MONGO_USER, Consts.MONGO_PASS, Consts.MONGO_AUTH_DB, fullServerAddress, Consts.MONGO_TIMEOUT, Consts.MONGO_DATABASE, Consts.MONGO_COLLECTION);

            // Ensuring the database has the proper indexe
            mongoDB.EnsureIndex ("Url");

            // Response Parser
            PlayStoreParser parser = new PlayStoreParser (); 

            // Executing Web Requests
            using (WebRequests server = new WebRequests ())
            {
                // Creating Request Object
                server.Headers.Add (Consts.ACCEPT_LANGUAGE);
                server.Host      = Consts.HOST;
                server.UserAgent = Consts.GITHUBURL;
                server.Encoding  = "utf-8";

                // Checking for the need to use "HTTP Proxies"
                if (shouldUseProxies)
                {
                    server.Proxy = ProxiesLoader.GetWebProxy ();
                }   

                // Executing Initial Request
                response    = server.Post (String.Format (Consts.CRAWL_URL, searchField), Consts.INITIAL_POST_DATA);

                // Parsing Links out of Html Page (Initial Request)                
                foreach (string url in parser.ParseAppUrls (response))
                {
                    // Checks whether the app have been already processed 
					// or is queued to be processed
					foundUrls.Add (url);
                    if ((!mongoDB.AppProcessed (Consts.APP_URL_PREFIX + url)) && (!mongoDB.AppQueued (url)))
                    {
                        // Than, queue it :)
                        mongoDB.AddToQueue (url);
                        Thread.Sleep (250); // Hiccup
                    }
                }

                // Executing Requests for more Play Store Links
                int initialSkip       = 48;
                int currentMultiplier = 1;
                int errorsCount       = 0;
                do
                {
                    // Finding pagToken from HTML
                    var rgxMatch = pagTokenRegex.Match (response);

                    // If there's no match, skips it
                    if (!rgxMatch.Success)
                    {
                        break;
                    }

                    // Reading Match from Regex, and applying needed replacements
                    string pagToken = rgxMatch.Value.Replace (":S:", "%3AS%3A").Replace("\\42", String.Empty).Replace(@"\\u003d", String.Empty);

                    // Assembling new PostData with paging values
                    string postData = String.Format (Consts.POST_DATA, pagToken);

                    // Executing request for values
                    response = server.Post (String.Format (Consts.CRAWL_URL, searchField), postData);

                    // Checking Server Status
                    if (server.StatusCode != System.Net.HttpStatusCode.OK)
                    {
                        _logger.Error ("Http Error" + " - Status Code [ " + server.StatusCode + " ]");
                        errorsCount++;
                        continue;
                    }

                    // Parsing Links
                    foreach (string url in parser.ParseAppUrls (response))
					{
						if (foundUrls.Contains (url))
						{
							isDonePagging = true;
							break;
						}
                        // Checks whether the app have been already processed 
						foundUrls.Add (url);
                        if ((!mongoDB.AppProcessed (Consts.APP_URL_PREFIX + url)) && (!mongoDB.AppQueued (url)))
                        {
                            // Than, queue it :)
                            mongoDB.AddToQueue (url);
                            Thread.Sleep (250); // Hiccup
                        }
                    }

                    // Incrementing Paging Multiplier
                    currentMultiplier++;

				}  while (!isDonePagging && parser.AnyResultFound (response) && errorsCount <= Consts.MAX_REQUEST_ERRORS);
            }
        }
コード例 #34
0
ファイル: Program.cs プロジェクト: uwtcat/PlayStoreCrawler
        static void Main (string[] args)
        {
            // Configuring Log Object Threshold
            LogWriter.Threshold = TLogEventLevel.Information;
            LogWriter.LogEvent  += LogWriter_LogEvent;

            // Parsing Arguments
            LogWriter.Info ("Checking for Arguments");

            if (args == null || args.Length != 3)
            {
                LogWriter.Fatal ("Arguments Fatal", "Incorrect number of arguments received. Try passing two.");
                return; // Halts.
            }

            LogWriter.Info ("Reading Arguments");

            // Reading actual arguments received
            _arguments.Add ("AppsToProcess", Int32.Parse (args[0]));
            _arguments.Add ("ReviewsPagePerApp", Int32.Parse (args[1]));
            _arguments.Add ("AppsToSkip", Int32.Parse (args[2]));

            // Building MongoDB Query - This query specifies which applications you want to parse out the reviews
            // For more regarding MongoDB Queries, check the documentation on the project wiki page
            //var mongoQuery = Query.EQ ("Instalations", "1,000,000 - 5,000,000");
            var mongoQuery = Query.EQ ("Category", "/store/apps/category/EDUCATION");

            LogWriter.Info ("Configuring MonboDB Client");

            // Creating instance of Mongo Handler for the main collection
            MongoDBWrapper mongoClient = new MongoDBWrapper ();
            string fullServerAddress = String.Join (":", Consts.MONGO_SERVER, Consts.MONGO_PORT);
            mongoClient.ConfigureDatabase (Consts.MONGO_USER, Consts.MONGO_PASS, Consts.MONGO_AUTH_DB, fullServerAddress, Consts.MONGO_TIMEOUT, Consts.MONGO_DATABASE, Consts.MONGO_COLLECTION);

            LogWriter.Info ("Iterating over Apps");

            // App URL Prefix (must be removed in order to obtain the app ID)
            string playStorePrefix = "https://play.google.com/store/apps/details?id=";

            // Creating Play Store Parser
            PlayStoreParser parser = new PlayStoreParser ();

            // Iterating over Query Results for the App Ids
            foreach (var appRecord in mongoClient.FindMatch<AppModel>(mongoQuery, _arguments["AppsToProcess"], _arguments["AppsToSkip"]))
            {
                // Extracting app ID from URL
                string appId = appRecord.Url.Replace(playStorePrefix, String.Empty);

                // Console Feedback
                LogWriter.Info("Processing App [ " + appRecord.Name + " ] ");

                bool shouldSkipApp = false;

                // Iterating over Review Pages up to the max received as argument
                for (int currentPage = 1; currentPage <= _arguments["ReviewsPagePerApp"]; currentPage++)
                {
                    // Checking for the need to skip this app in case of duplicated review
                    if (shouldSkipApp)
                        break;

                    try
                    {
                        // Page Feedback
                        LogWriter.Info("\tCurrent Page: " + currentPage);

                        // Issuing Request for Reviews
                        string response = GetAppReviews(appId, currentPage);

                        // Checking for Blocking Situation
                        if (String.IsNullOrEmpty(response))
                        {
                            LogWriter.Info("Blocked by Play Store. Sleeping process for 10 minutes before retrying.");

                            // Thread Wait for 10 Minutes
                            Thread.Sleep(10 * 60 * 1000);
                        }

                        // Checking for "No Reviews" app
                        if (response.Length < 50)
                        {
                            LogWriter.Info("No Reviews for this app. Skipping");
                            break;
                        }

                        // Normalizing Response to Proper HTML
                        response = NormalizeResponse(response);

                        // Iterating over Parsed Reviews
                        foreach (var review in parser.ParseReviews(response))
                        {
                            // Adding App Data to the review
                            review.appID = appId;
                            review.appName = appRecord.Name;
                            review.appURL = appRecord.Url;

                            // Adding processing timestamp to the model
                            review.timestamp = DateTime.Now;

                            // Building Query to check for duplicated review
                            var duplicatedReviewQuery = Query.EQ("permalink", review.permalink);

                            // Checking for duplicated review before inserting it
                            if (mongoClient.FindMatch<AppReview>(duplicatedReviewQuery, 1, 0, Consts.REVIEWS_COLLECTION).Count() == 0)
                            {
                                // Inserting Review into MongoDB
                                mongoClient.Insert<AppReview>(review, Consts.REVIEWS_COLLECTION);
                            }
                            else
                            {
                                LogWriter.Info("Duplicated Review", "Review already parsed. Skipping App");
                                //shouldSkipApp = true;
                                //break;
                            }
                        }
                    }
                    catch (Exception ex)
                    {
                        LogWriter.Error(ex);
                    }
                }
            }
        }
コード例 #35
0
        static void Main(string[] args)
        {
            // Loading Keen.IO Keys and Misc. from Config File
            _keenIOProjectID = ConfigurationManager.AppSettings["keenIOProjectID"];
            _keenIOMasterKey = ConfigurationManager.AppSettings["keenIOMasterKey"];
            _keenIOWriteKey  = ConfigurationManager.AppSettings["keenIOWriteKey"];
            _keenIOReadKey   = ConfigurationManager.AppSettings["keenIOReadKey"];
            _bucketName      = ConfigurationManager.AppSettings["keenIOBucketName"];

            // Configuring MongoDB Wrapper for connection and queries
            MongoDBWrapper mongoDB   = new MongoDBWrapper ();
            string fullServerAddress = String.Join (":", Consts.MONGO_SERVER, Consts.MONGO_PORT);
            mongoDB.ConfigureDatabase (Consts.MONGO_USER, Consts.MONGO_PASS, Consts.MONGO_AUTH_DB, fullServerAddress, Consts.MONGO_TIMEOUT, Consts.MONGO_DATABASE, Consts.MONGO_COLLECTION);

            // Creating Keen.IO Variables
            var projectSettings = new ProjectSettingsProvider (_keenIOProjectID, _keenIOMasterKey, _keenIOWriteKey, _keenIOReadKey);
            var keenClient      = new KeenClient (projectSettings);

            var eventsToSend = new List<AppModel>();
            long totalProcessed = 0;
            long totalSent = 0;

            DateTime start = DateTime.Now;

            // From This point on, you can change your code to reflect your own "Reading" logic
            // What I've done is simply read the records from the MongoDB database and Upload them to Keen.IO

            // if(args.Length != 0 && args[0] == "reset")
            {
                int count = 0;

                foreach (var currentApp in mongoDB.FindMatch<AppModel>(Query.NE("Uploaded", true)))
                {
                    mongoDB.SetUpdated(currentApp.Url, false);
                    ++count;

                    if((count % 100) == 0)
                    {
                        Console.WriteLine("Reset update for {0}", count);
                    }
                }
            }

            foreach (var currentApp in mongoDB.FindMatch<AppModel> (Query.NE ("Uploaded", true)))
            {
                if (eventsToSend.Count < 1000)
                {
                    eventsToSend.Add(currentApp);
                    continue;
                }

                var sent = SendEventsToKeep(keenClient, eventsToSend, mongoDB);

                totalProcessed += eventsToSend.Count;
                totalSent += sent;

                Console.WriteLine("processed {0} events took {1}: ({2} events per sec)", totalProcessed, DateTime.Now - start, ((double)totalProcessed) / (DateTime.Now - start).TotalSeconds);

                eventsToSend.Clear();
            }

            {
                var sent = SendEventsToKeep(keenClient, eventsToSend, mongoDB);
                totalProcessed += eventsToSend.Count;
                Console.WriteLine("processed {0} events took {1}: ({2} events per sec)", totalProcessed, DateTime.Now - start, ((double)totalProcessed) / (DateTime.Now - start).TotalSeconds);
            }

            if(totalProcessed != totalSent)
            {
                totalProcessed = 0;
                totalSent = 0;

                foreach (var currentApp in mongoDB.FindMatch<AppModel>(Query.NE("Uploaded", true)))
                {
                    if (eventsToSend.Count < 1)
                    {
                        eventsToSend.Add(currentApp);
                        continue;
                    }

                    var sent = SendEventsToKeep(keenClient, eventsToSend, mongoDB);

                    totalProcessed += eventsToSend.Count;
                    totalSent += sent;

                    Console.WriteLine("processed {0} events took {1}: ({2} events per sec)", totalProcessed, DateTime.Now - start, ((double)totalProcessed) / (DateTime.Now - start).TotalSeconds);

                    eventsToSend.Clear();
                }

                {
                    var sent = SendEventsToKeep(keenClient, eventsToSend, mongoDB);
                    totalProcessed += eventsToSend.Count;
                    Console.WriteLine("processed {0} events took {1}: ({2} events per sec)", totalProcessed, DateTime.Now - start, ((double)totalProcessed) / (DateTime.Now - start).TotalSeconds);
                }
            }
        }
コード例 #36
0
        /// <summary>
        /// Executes a Search using the searchField as the search parameter, 
        /// paginates / scrolls the search results to the end adding all the url of apps
        /// it finds to a AWS SQS queue
        /// </summary>
        /// <param name="searchField"></param>
        private static void CrawlStore (string searchField)
        {
            // Console Feedback
            Console.WriteLine ("Crawling Search Term : [ " + searchField + " ]");

            // HTML Response
            string response;

            // MongoDB Helper
            // Configuring MongoDB Wrapper
            MongoDBWrapper mongoDB   = new MongoDBWrapper ();
            string fullServerAddress = String.Join (":", Consts.MONGO_SERVER, Consts.MONGO_PORT);
            mongoDB.ConfigureDatabase (Consts.MONGO_USER, Consts.MONGO_PASS, Consts.MONGO_AUTH_DB, fullServerAddress, Consts.MONGO_TIMEOUT, Consts.MONGO_DATABASE, Consts.MONGO_COLLECTION);

            // Response Parser
            PlayStoreParser parser = new PlayStoreParser (); 

            // Executing Web Requests
            using (WebRequests server = new WebRequests ())
            {
                // Creating Request Object
                server.Host = Consts.HOST;

                // Executing Initial Request
                response    = server.Post (Consts.CRAWL_URL, Consts.INITIAL_POST_DATA);

                // Parsing Links out of Html Page (Initial Request)                
                foreach (string url in parser.ParseAppUrls (response))
                {
                    // Checks whether the app have been already processed 
                    // or is queued to be processed
                    if ((!mongoDB.AppProcessed (Consts.APP_URL_PREFIX + url)) && (!mongoDB.AppQueued (url)))
                    {
                        // Console Feedback
                        Console.WriteLine (" . Queued App");

                        // Than, queue it :)
                        mongoDB.AddToQueue (url);
                    }
                    else
                    {
                        // Console Feedback
                        Console.WriteLine (" . Duplicated App. Skipped");
                    }
                }

                // Executing Requests for more Play Store Links
                int initialSkip       = 48;
                int currentMultiplier = 1;
                int errorsCount       = 0;
                do
                {
                    // Assembling new PostData with paging values
                    string postData = String.Format (Consts.POST_DATA, (initialSkip * currentMultiplier));

                    // Executing request for values
                    response = server.Post (Consts.CRAWL_URL, postData);

                    // Checking Server Status
                    if (server.StatusCode != System.Net.HttpStatusCode.OK)
                    {
                        LogWriter.Error ("Http Error", "Status Code [ " + server.StatusCode + " ]");
                        errorsCount++;
                        continue;
                    }

                    // Parsing Links
                    foreach (string url in parser.ParseAppUrls (response))
                    {
                        // Checks whether the app have been already processed 
                        // or is queued to be processed
                        if ((!mongoDB.AppProcessed (Consts.APP_URL_PREFIX + url)) && (!mongoDB.AppQueued (url)))
                        {
                            // Console Feedback
                            Console.WriteLine (" . Queued App");

                            // Than, queue it :)
                            mongoDB.AddToQueue (url);
                        }
                        else
                        {
                            // Console Feedback
                            Console.WriteLine (" . Duplicated App. Skipped");
                        }
                    }

                    // Incrementing Paging Multiplier
                    currentMultiplier++;

                }  while (parser.AnyResultFound (response) && errorsCount <= Consts.MAX_REQUEST_ERRORS);
            }
        }
コード例 #37
0
        static void Main (string[] args)
        {
            // Checking for Input Parameters
            if (args == null || args.Length != 1)
            {
                Console.WriteLine ("Incorrect number of arguments received. Expected One");
                System.Environment.Exit (-100);
            }

            // Human Readable Variable
            string inputFile = args[0];

            // Checking if the Input file received exists
            if (!File.Exists (inputFile))
            {
                Console.WriteLine (String.Format("Received input file does not exist : {0}", inputFile));
                System.Environment.Exit (-101);
            }

            // App Status 
            _appStatus = new Dictionary<String, AppStatusModel> ();

            // Creating Instance of Database Manager
            MongoDBWrapper mongoDB   = new MongoDBWrapper ();
            string fullServerAddress = String.Join (":", Consts.MONGO_SERVER, Consts.MONGO_PORT);
            mongoDB.ConfigureDatabase (Consts.MONGO_USER, Consts.MONGO_PASS, Consts.MONGO_AUTH_DB, fullServerAddress, Consts.MONGO_TIMEOUT, Consts.MONGO_DATABASE, Consts.MONGO_COLLECTION);

            // Creating Instance of Parser
            PlayStoreParser dataParser = new PlayStoreParser ();

            goto PeopleData;

            using (WebRequests httpClient = new WebRequests ())
            {
                // Minor Configuration of the Http Client - Ensures that the requests response will be in english
                // By doing so, we have no problems parsing the dates to their proper formats
                httpClient.Headers.Add (Consts.ACCEPT_LANGUAGE);
                httpClient.Host     = Consts.HOST;
                httpClient.Encoding = "utf-8";
                httpClient.EncodingDetection = WebRequests.CharsetDetection.DefaultCharset;

                // Iterating over File Lines (App Urls) - To Extract Data, Not The Reviews Yet.
                foreach (string appUrl in File.ReadAllLines (inputFile))
                {
                    // Logging Progress
                    Console.WriteLine ("\n => Processing App : " + appUrl);

                    // Executing Http Get Request for the Apps's Data - With max of 5 Retries
                    String appDataResponse = String.Empty;
                    int currentRetry       = 0;

                    do
                    {
                        // Http Get
                        appDataResponse = httpClient.Get (appUrl);

                    } while (String.IsNullOrWhiteSpace(appDataResponse) || ++currentRetry <= _maxRetries);

                    // Sanity Check
                    if (String.IsNullOrWhiteSpace (appDataResponse))
                    {
                        Console.WriteLine ("\t\t.Error - Failed to find page of app : " + appUrl + ". Skipping it");
                        continue;
                    }

                    Console.WriteLine("\t\t.Page Found. Firing Parser");

                    // Parsing App Data
                    AppModel appData = dataParser.ParseAppPage (appDataResponse, appUrl);

                    // Checking If this app is on the database already
                    if (mongoDB.AppProcessed (appUrl))
                    {
                        Console.WriteLine ("\t\t.Previous Version of App Found. Updating It");
                        mongoDB.UpdateRecord (appData, "Url", appData.Url);

                        // Updating App Status
                        _appStatus.Add
                            (
                                appData.Url,
                                new AppStatusModel ()
                                {
                                    appId   = appData.Url.Replace (Consts.PLAY_STORE_PREFIX, String.Empty),
                                    appUrl  = appData.Url,
                                    appName = appData.Name,
                                    status  = "Updated"
                                }
                            );
                    }
                    else
                    {
                        Console.WriteLine ("\t\t.No Previous Version of the App Found. Adding to Database");
                        mongoDB.Insert<AppModel> (appData);

                        // Updating App Status
                        _appStatus.Add
                            (
                                appData.Url,
                                new AppStatusModel ()
                                {
                                    appId   = appData.Url.Replace (Consts.PLAY_STORE_PREFIX, String.Empty),
                                    appUrl  = appData.Url,
                                    appName = appData.Name,
                                    status  = "Inserted"
                                }
                            );
                    }
                }
            }

            Reviews:
            // Next Phase: Parse Reviews of those Apps
            Console.WriteLine ("\n => Parsing Complete. Obtaining Reviews");

            // Iterating again over app urls to parse the reviews from this app
            foreach (string appUrl in File.ReadAllLines (inputFile))
            {
                // Reaching App Id
                string appID = _appStatus[appUrl].appId;

                // Reviews-Break-Parsing Flag
                bool shouldContinueParsing = true;

                // Parsing Review Pages from the apps
                for (int currentPage = 1; /* no stop condition */; currentPage++)
                {
                    // Getting Reviews Data Bundle
                    string reviewsData = ReviewsWrapper.GetAppReviews (appID, currentPage);

                    // Checking for Blocking Situation
                    if (String.IsNullOrEmpty (reviewsData))
                    {
                        Console.WriteLine("Blocked by Play Store. Sleeping process for 10 minutes before retrying.");

                        // Thread Wait for 10 Minutes
                        Thread.Sleep (10 * 60 * 1000);
                    }

                    // Checking for "No Reviews" app
                    if (reviewsData.Length < 50)
                    {
                        Console.WriteLine ("No Reviews left for this app. Skipping");
                        break;
                    }

                    // Normalizing Response to Proper HTML
                    reviewsData = ReviewsWrapper.NormalizeResponse (reviewsData);

                    // Iterating over Parsed Reviews
                    foreach (var review in dataParser.ParseReviews (reviewsData))
                    {
                        // Adding App Data to the review
                        review.appID   = _appStatus[appUrl].appId;
                        review.appName = _appStatus[appUrl].appName;
                        review.appURL  = _appStatus[appUrl].appUrl;

                        // Incrementing Reviews Count for this app
                        _appStatus[appUrl].reviews++;

                        // Adding Review Object to Database
                        review.timestamp = DateTime.Now;

                        // Building Query to check for duplicated review
                        var duplicatedReviewQuery = Query.EQ ("permalink", review.permalink);

                        // Checking for duplicated review before inserting it
                        if (mongoDB.FindMatch<AppReview> (duplicatedReviewQuery, 1, 0, Consts.REVIEWS_COLLECTION).Count () == 0)
                        {
                            // Inserting Review into MongoDB
                            mongoDB.Insert<AppReview> (review, Consts.REVIEWS_COLLECTION);
                        }
                        else
                        {
                            Console.WriteLine ("Duplicated Review. Skipping App");

                            // When this happens, there are no more reviews to be parsed
                            shouldContinueParsing = false; // Skipping this apps processing
                        }
                    }

                    // Hiccup to avoid Blocking problems
                    Console.WriteLine ("Parsed Reviews: " + _appStatus[appUrl].reviews);
                    Thread.Sleep (new Random ().Next (14000, 21000));

                    if (!shouldContinueParsing)
                    {
                        break;
                    }
                }
            }

            PeopleData:

            Console.WriteLine ("\n\n => Processing People Data");

            Console.WriteLine ("\nSimulating Google Login Using Selenium.");
            using (var firefoxDriver = new FirefoxDriver ())
            {
                // Navigating to Dummy Url - One that I Know that well be asked for a login
                firefoxDriver.Navigate ().GoToUrl ("https://play.google.com/store/people/details?id=101242565951396343093");

                // Reaching Login Fields
                var loginField    = firefoxDriver.FindElementById ("Email");
                var passwordField = firefoxDriver.FindElementById ("Passwd");
                var btnSignIn     = firefoxDriver.FindElementById ("signIn");

                // Sending Credentials to the browser
                loginField.SendKeys ("YOUREMAIL");
                passwordField.SendKeys ("YOURPASSWORD");
                btnSignIn.Click ();

                string lastPeople = "https://play.google.com/store/people/details?id=115037241907660526856";
                bool shouldcontinue = false;

                // Processing Reviewers Data
                foreach (string peopleUrl in mongoDB.FindPeopleUrls ())
                {
                    // Skipping until last link
                    if (peopleUrl == lastPeople)
                    {
                        shouldcontinue = true;
                    }

                    if (!shouldcontinue) continue;

                    // Navigating To the Reviewer Page
                    firefoxDriver.Navigate ().GoToUrl (peopleUrl);

                    // Executing Get Request for the Reviewer page on Google Play
                    string reviewerPage = firefoxDriver.PageSource;

                    // Extracting Reviewer Data from the Page
                    ReviewerPageData reviewerData = dataParser.ParsePeopleData (reviewerPage);

                    // Adding Url to the model
                    reviewerData.reviewerUrl = peopleUrl;

                    // Inserting it to the database - If no previous record of this Reviewer is found
                    if (!mongoDB.IsReviewerOnDatabase (peopleUrl))
                    {
                        mongoDB.Insert<ReviewerPageData> (reviewerData, "ReviewersData");
                    }
                }
            }

            // End of Processing + Console Feedback
            Console.WriteLine ("\n\n == Processing Summary ==");

            foreach (var status in _appStatus.Select (t => t.Value))
            {
                // Message
                string cMessage = "=> App : {0} - Status {1} - Reviews : {2}";

                Console.WriteLine (String.Format (cMessage, status.appName, status.status, status.reviews));
            }

            Console.ReadLine ();
        }
コード例 #38
0
ファイル: Worker.cs プロジェクト: uwtcat/PlayStoreCrawler
        /// <summary>
        /// Entry point of the worker piece of the process
        /// Notice that you can run as many workers as you want to in order to make the crawling faster
        /// </summary>
        /// <param name="args"></param>
        static void Main(string[] args)
        {
            // Configuring Log Object Threshold
            LogWriter.Threshold = TLogEventLevel.Information;
            LogWriter.Info ("Worker Started");

            // Parser
            PlayStoreParser parser = new PlayStoreParser();

            // Configuring MongoDB Wrapper
            MongoDBWrapper mongoDB   = new MongoDBWrapper();
            string fullServerAddress = String.Join(":", Consts.MONGO_SERVER, Consts.MONGO_PORT);
            mongoDB.ConfigureDatabase(Consts.MONGO_USER, Consts.MONGO_PASS, Consts.MONGO_AUTH_DB, fullServerAddress, Consts.MONGO_TIMEOUT, Consts.MONGO_DATABASE, Consts.MONGO_COLLECTION);

            // Creating Instance of Web Requests Server
            WebRequests server = new WebRequests ();
            
            QueuedApp app;

            // Retry Counter (Used for exponential wait increasing logic)
            int retryCounter = 0;

            // Iterating Over MongoDB Records while no document is found to be processed                
            while ((app = mongoDB.FindAndModify ()) != null)
            {
                try
                {
                    // Building APP URL
                    string appUrl = Consts.APP_URL_PREFIX + app.Url;

                    // Checking if this app is on the database already
                    if (mongoDB.AppProcessed(appUrl))
                    {
                        // Console Feedback, Comment this line to disable if you want to
                        Console.WriteLine("Duplicated App, skipped.");

                        // Delete it from the queue and continues the loop
                        mongoDB.RemoveFromQueue (app.Url);
                        continue;
                    }

                    // Vu
                    // Check if the app does not meet criteria
                    if (app.NotMeetCrit)
                    {
                        Console.WriteLine("App Not meet Criteria, Skipped.");
                    }

                    // Configuring server and Issuing Request
                    server.Headers.Add (Consts.ACCEPT_LANGUAGE);
                    server.Host              = Consts.HOST;
                    server.Encoding          = "utf-8";
                    server.EncodingDetection = WebRequests.CharsetDetection.DefaultCharset;
                    string response          = server.Get (appUrl);

                    // Flag Indicating Success while processing and parsing this app
                    bool ProcessingWorked = true;

                    // Sanity Check
                    if (String.IsNullOrEmpty (response) || server.StatusCode != System.Net.HttpStatusCode.OK)
                    {
                        LogWriter.Info ("Error opening app page : " + appUrl);
                        ProcessingWorked = false;
                        
                        // Renewing WebRequest Object to get rid of Cookies
                        server = new WebRequests ();

                        // Inc. retry counter
                        retryCounter++;

                        Console.WriteLine ("Retrying:" + retryCounter);

                        // Checking for maximum retry count
                        double waitTime;
                        if (retryCounter >= 7)
                        {
                            waitTime = TimeSpan.FromMinutes (35).TotalMilliseconds;

                            // Removing App from the database (this the app page may have expired)
                            mongoDB.RemoveFromQueue (app.Url);

                            Process.Start ("PlayStoreWorker.exe");
                            Process.GetCurrentProcess ().Kill ();
                        }
                        else
                        {
                            // Calculating next wait time ( 2 ^ retryCounter seconds)
                            waitTime = TimeSpan.FromSeconds (Math.Pow (2, retryCounter)).TotalMilliseconds;
                        }

                        // Hiccup to avoid google blocking connections in case of heavy traffic from the same IP
                        Thread.Sleep (Convert.ToInt32 (waitTime));
                    }
                    else
                    {
                        // Reseting retry counter
                        retryCounter = 0;

                        // Parsing Useful App Data
                        AppModel parsedApp = parser.ParseAppPage (response, appUrl);

                        // Vu
                        // Here is where insert the app into the ProcessedApps Database.
                        // Attemp to check for the condition base on number of instalation and rating

                        // First split the string into the string array
                        string[] installations;
                        string[] separators = new string[] { " - " };
                        // Getting the Installation number for the current app
                        installations = parsedApp.Instalations.Split(separators, StringSplitOptions.RemoveEmptyEntries);
                        installations[0] = installations[0].Replace(",", "");   // replace the "," in the number of installations
                        installations[1] = installations[1].Replace(",", "");
                        long install_num = 0;
                        try {
                            install_num = Convert.ToInt64(installations[0]);
                        }
                        catch (OverflowException) {
                            Console.WriteLine("{0} is outside the range of the Int64 type.");
                        }
                        catch (FormatException) {
                            Console.WriteLine("The {0} value '{1}' is not recognizable");
                        }
                        
                        bool removed = false;
                        // Getting the rating for the current app
                        double rating = parsedApp.Score.Total;

                        // Getting the developer name ( company name)
                        string developer = parsedApp.Developer;
                                               
                        // if the installation number is less than 1000,000 
                        // OR rating less than 3 stars
                        // OR appName is empty
                        // -> skip the app

                        string appName = parsedApp.Name;
                        if (install_num < 1000000 || rating < 3.5 || appName == "" || appName == null)
                        {
                            Console.WriteLine("Cannot add app <" + appName + "> -- NOT MEET CRITERIA");
                            // TODO: Update the NotMeetCriteria
                            // Removing App from the database
                            mongoDB.RemoveFromQueue(app.Url);
                            removed = true;
                        }
                        // Inserting App into MONGO_COLLECTION collection
                        // if the Insert func return false, then print a message indicates that
                        if (ProcessingWorked && !mongoDB.Insert<AppModel>(parsedApp) && !removed)
                        {
                            Console.WriteLine("Cannot add app <" + appName + "> -- FAIL TO ADD TO Database");
                            ProcessingWorked = false;
                        }

                        // If processing failed, do not remove the app from the database, instead, keep it and flag it as not busy 
                        // so that other workers can try to process it later
                        if (!ProcessingWorked)
                        {
                            mongoDB.ToggleBusyApp(app, false);
                        }
                        else // On the other hand, if processing worked, removes it from the database
                        {
                            // Console Feedback, Comment this line to disable if you want to
                            if (!removed)
                            {
                                Console.WriteLine("Inserted App : " + parsedApp.Name);
                                 mongoDB.RemoveFromQueue(app.Url);
                            }
                            else
                            {
                                Console.WriteLine("Removed App : " + parsedApp.Name);
                            }                           
                        }


                        // Vu
                        // TRY TO NOT DOWNLOAD THE RELATED APPS
                        /*
                        // Counters for console feedback only
                        int extraAppsCounter = 0, newExtraApps = 0;

                        // Parsing "Related Apps" and "More From Developer" Apps (URLS Only)
                        foreach (string extraAppUrl in parser.ParseExtraApps (response))
                        {
                            // Incrementing counter of extra apps
                            extraAppsCounter++;

                            // Assembling Full app Url to check with database
                            string fullExtraAppUrl = Consts.APP_URL_PREFIX + extraAppUrl;

                            // Checking if the app was either processed or queued to be processed already
                            if ((!mongoDB.AppProcessed (fullExtraAppUrl)) && (!mongoDB.IsAppOnQueue(extraAppUrl)))
                            {
                                // Incrementing counter of inserted apps
                                newExtraApps++;

                                // Adds it to the queue of apps to be processed
                                mongoDB.AddToQueue (extraAppUrl);
                            }
                        }

                        // Console Feedback
                        Console.WriteLine ("Queued " + newExtraApps + " / " + extraAppsCounter + " related apps");
                        
                        */

                        // Hiccup (used to minimize blocking issues)
                        Thread.Sleep (300);
                    }
                }
                catch (Exception ex)
                {
                    LogWriter.Error (ex);
                }
                finally
                {
                    try
                    {
                        // Toggles Busy status back to false
                        mongoDB.ToggleBusyApp(app, false);
                    }
                    catch (Exception ex)
                    {
                        // Toggle Busy App may raise an exception in case of lack of internet connection, so, i must use this
                        // "inner catch" to avoid it from happenning
                        LogWriter.Error (ex);
                    }
                }
            }
        }
コード例 #39
0
        /// <summary>
        /// Entry point of the worker piece of the process
        /// Notice that you can run as many workers as you want to in order to make the crawling faster
        /// </summary>
        /// <param name="args"></param>
        static void Main(string[] args)
        {
            // Configuring Log Object Threshold
            LogWriter.Threshold = TLogEventLevel.Information;
            LogWriter.Info ("Worker Started");

            // Parser
            PlayStoreParser parser = new PlayStoreParser();

            // Configuring MongoDB Wrapper
            MongoDBWrapper mongoDB = new MongoDBWrapper();
            string fullServerAddress = String.Join(":", Consts.MONGO_SERVER, Consts.MONGO_PORT);
            mongoDB.ConfigureDatabase(Consts.MONGO_USER, Consts.MONGO_PASS, Consts.MONGO_AUTH_DB, fullServerAddress, Consts.MONGO_TIMEOUT, Consts.MONGO_DATABASE, Consts.MONGO_COLLECTION);

            // Creating Instance of Web Requests Server
            WebRequests server = new WebRequests ();
            
            QueuedApp app;

            // Retry Counter (Used for exponential wait increasing logic)
            int retryCounter = 0;

            // Iterating Over MongoDB Records while no document is found to be processed                
            while ((app = mongoDB.FindAndModify ()) != null)
            {
                try
                {
                    // Building APP URL
                    string appUrl = Consts.APP_URL_PREFIX + app.Url;

                    // Checking if this app is on the database already
                    if (mongoDB.AppProcessed(appUrl))
                    {
                        // Console Feedback, Comment this line to disable if you want to
                        Console.WriteLine("Duplicated App, skipped.");

                        // Delete it from the queue and continues the loop
                        mongoDB.RemoveFromQueue (app.Url);
                        continue;
                    }

                    // Configuring server and Issuing Request
                    server.Headers.Add(Consts.ACCEPT_LANGUAGE);
                    server.Host = Consts.HOST;
                    server.Encoding = "utf-8";
                    server.EncodingDetection = WebRequests.CharsetDetection.DefaultCharset;
                    string response = server.Get (appUrl);

                    // Flag Indicating Success while processing and parsing this app
                    bool ProcessingWorked = true;

                    // Sanity Check
                    if (String.IsNullOrEmpty (response) || server.StatusCode != System.Net.HttpStatusCode.OK)
                    {
                        LogWriter.Info ("Error opening app page : " + appUrl);
                        ProcessingWorked = false;
                        
                        // Renewing WebRequest Object to get rid of Cookies
                        server = new WebRequests ();

                        // Inc. retry counter
                        retryCounter++;

                        Console.WriteLine ("Retrying:" + retryCounter);

                        // Checking for maximum retry count
                        double waitTime;
                        if (retryCounter >= 11)
                        {
                            waitTime = TimeSpan.FromMinutes (35).TotalMilliseconds;

                            // Removing App from the database (this the app page may have expired)
                            mongoDB.RemoveFromQueue (appUrl);
                        }
                        else
                        {
                            // Calculating next wait time ( 2 ^ retryCounter seconds)
                            waitTime = TimeSpan.FromSeconds (Math.Pow (2, retryCounter)).TotalMilliseconds;
                        }

                        // Hiccup to avoid google blocking connections in case of heavy traffic from the same IP
                        Thread.Sleep (Convert.ToInt32 (waitTime));
                    }
                    else
                    {
                        // Reseting retry counter
                        retryCounter = 0;

                        // Parsing Useful App Data
                        AppModel parsedApp = parser.ParseAppPage (response, appUrl);

                        // Inserting App into Mongo DB Database
                        if (!mongoDB.Insert<AppModel>(parsedApp))
                        {
                            ProcessingWorked = false;
                        }

                        // If the processing failed, do not remove the app from the database, instead, keep it and flag it as not busy 
                        // so that other workers can try to process it later
                        if (!ProcessingWorked)
                        {
                            mongoDB.ToggleBusyApp(app, false);
                        }
                        else // On the other hand, if processing worked, removes it from the database
                        {
                            // Console Feedback, Comment this line to disable if you want to
                            Console.WriteLine("Inserted App : " + parsedApp.Name);

                            mongoDB.RemoveFromQueue(app.Url);
                        }

                        // Counters for console feedback only
                        int extraAppsCounter = 0, newExtraApps = 0;

                        // Parsing "Related Apps" and "More From Developer" Apps (URLS Only)
                        foreach (string extraAppUrl in parser.ParseExtraApps (response))
                        {
                            // Incrementing counter of extra apps
                            extraAppsCounter++;

                            // Assembling Full app Url to check with database
                            string fullExtraAppUrl = Consts.APP_URL_PREFIX + extraAppUrl;

                            // Checking if the app was either processed or queued to be processed already
                            if ((!mongoDB.AppProcessed (fullExtraAppUrl)) && (!mongoDB.IsAppOnQueue(extraAppUrl)))
                            {
                                // Incrementing counter of inserted apps
                                newExtraApps++;

                                // Adds it to the queue of apps to be processed
                                mongoDB.AddToQueue (extraAppUrl);
                            }
                        }

                        // Console Feedback
                        Console.WriteLine ("Queued " + newExtraApps + " / " + extraAppsCounter + " related apps");
                    }
                }
                catch (Exception ex)
                {
                    LogWriter.Error (ex);
                }
                finally
                {
                    try
                    {
                        // Toggles Busy status back to false
                        mongoDB.ToggleBusyApp(app, false);
                    }
                    catch (Exception ex)
                    {
                        // Toggle Busy App may raise an exception in case of lack of internet connection, so, i must use this
                        // "inner catch" to avoid it from happenning
                        LogWriter.Error (ex);
                    }
                }
            }
        }
コード例 #40
0
        /// <summary>
        /// Entry point of the worker piece of the process
        /// Notice that you can run as many workers as you want to in order to make the crawling faster
        /// </summary>
        /// <param name="args"></param>
        static void Main(string[] args)
        {
            // Configuring Log Object
            LogSetup.InitializeLog("PlayStoreWorker.log", "info");
            Logger logger = LogManager.GetCurrentClassLogger();

            logger.Info("Worker Started");

            // Control Variable (Bool - Should the process use proxies? )
            bool isUsingProxies = false;

            // Checking for the need to use proxies
            if (args != null && args.Length == 1)
            {
                // Setting flag to true
                isUsingProxies = true;

                // Loading proxies from .txt received as argument
                String fPath = args[0];

                // Sanity Check
                if (!File.Exists(fPath))
                {
                    logger.Fatal("Couldnt find proxies on path : " + fPath);
                    System.Environment.Exit(-100);
                }

                // Reading Proxies from File
                string[] fLines = File.ReadAllLines(fPath, Encoding.GetEncoding("UTF-8"));

                try
                {
                    // Actual Load of Proxies
                    ProxiesLoader.Load(fLines.ToList());
                }
                catch (Exception ex)
                {
                    logger.Fatal(ex);
                    System.Environment.Exit(-101);
                }
            }

            // Parser
            PlayStoreParser parser = new PlayStoreParser();

            // Configuring MongoDB Wrapper
            MongoDBWrapper mongoDB           = new MongoDBWrapper();
            string         fullServerAddress = String.Join(":", Consts.MONGO_SERVER, Consts.MONGO_PORT);

            mongoDB.ConfigureDatabase(Consts.MONGO_USER, Consts.MONGO_PASS, Consts.MONGO_AUTH_DB, fullServerAddress, Consts.MONGO_TIMEOUT, Consts.MONGO_DATABASE, Consts.MONGO_COLLECTION);

            /*
             * // populate min downloaded & max downloaded
             * int count = 0;
             * var apps = mongoDB.FindAll<AppModel>();
             * foreach(var a in apps)
             * {
             *  a.FillMinAndMaxInstalls();
             ++count;
             *
             *  if((count % 100) == 0)
             *  {
             *      Console.WriteLine("updated {0}", count);
             *  }
             *
             *  if (!mongoDB.UpsertKeyEq<AppModel>(a, "Url", a.Url))
             *  {
             *      Console.WriteLine("UpsertKey failed");
             *  }
             * }
             */

            // Creating Instance of Web Requests Server
            WebRequests server = new WebRequests();

            // Queued App Model
            QueuedApp app;

            // Retry Counter (Used for exponential wait increasing logic)
            int retryCounter = 0;

            // Iterating Over MongoDB Records while no document is found to be processed
            while ((app = mongoDB.FindAndModify()) != null)
            {
                try
                {
                    // Building APP URL
                    string appUrl = app.Url;

                    // Sanity check of app page url
                    if (app.Url.IndexOf("http", StringComparison.OrdinalIgnoreCase) < 0)
                    {
                        appUrl = Consts.APP_URL_PREFIX + app.Url;
                    }

                    // Checking if this app is on the database already
                    if (mongoDB.AppProcessed(appUrl))
                    {
                        // Console Feedback, Comment this line to disable if you want to
                        logger.Info("Duplicated App, skipped.");

                        // Delete it from the queue and continues the loop
                        mongoDB.RemoveFromQueue(app.Url);
                        continue;
                    }

                    // Configuring server and Issuing Request
                    server.Headers.Add(Consts.ACCEPT_LANGUAGE);
                    server.Host              = Consts.HOST;
                    server.UserAgent         = Consts.GITHUBURL;
                    server.Encoding          = "utf-8";
                    server.EncodingDetection = WebRequests.CharsetDetection.DefaultCharset;

                    // Checking for the need to use "HTTP Proxies"
                    if (isUsingProxies)
                    {
                        server.Proxy = ProxiesLoader.GetWebProxy();
                    }

                    // Issuing HTTP Request
                    string response = server.Get(appUrl);

                    // Flag Indicating Success while processing and parsing this app
                    bool ProcessingWorked = true;

                    // Sanity Check
                    if (String.IsNullOrEmpty(response) || server.StatusCode != System.Net.HttpStatusCode.OK)
                    {
                        logger.Info("Error opening app page : " + appUrl);
                        ProcessingWorked = false;

                        if (isUsingProxies)
                        {
                            ProxiesLoader.IncrementCurrentProxy();
                        }

                        // Renewing WebRequest Object to get rid of Cookies
                        server = new WebRequests();

                        // Fallback time variable
                        double waitTime;

                        // Checking which "Waiting Logic" to use - If there are proxies being used, there's no need to wait too much
                        // If there are no proxies in use, on the other hand, the process must wait more time
                        if (isUsingProxies)
                        {
                            // Waits two seconds everytime
                            waitTime = TimeSpan.FromSeconds(2).TotalMilliseconds;
                        }
                        else
                        {
                            // Increments retry counter
                            retryCounter++;

                            // Checking for maximum retry count
                            if (retryCounter >= 8)
                            {
                                waitTime = TimeSpan.FromMinutes(20).TotalMilliseconds;
                            }
                            else
                            {
                                // Calculating next wait time ( 2 ^ retryCounter seconds)
                                waitTime = TimeSpan.FromSeconds(Math.Pow(2, retryCounter)).TotalMilliseconds;
                            }
                        }

                        // Hiccup to avoid google blocking connections in case of heavy traffic from the same IP
                        logger.Info("======================================================");
                        logger.Info("\n\tFallback : " + waitTime + " Seconds");
                        Thread.Sleep(Convert.ToInt32(waitTime));

                        // If The Status code is "ZERO" (it means 404) - App must be removed from "Queue"
                        if (server.StatusCode == 0)
                        {
                            // Console Feedback
                            logger.Info("\tApp Not Found (404) - " + app.Url);

                            mongoDB.RemoveFromQueue(app.Url);
                        }
                        logger.Info("======================================================");
                    }
                    else
                    {
                        // Reseting retry counter
                        retryCounter = 0;

                        // Parsing Useful App Data
                        AppModel parsedApp = parser.ParseAppPage(response, appUrl);

                        // Normalizing URLs
                        if (!String.IsNullOrWhiteSpace(parsedApp.DeveloperPrivacyPolicy))
                        {
                            parsedApp.DeveloperPrivacyPolicy = parsedApp.DeveloperPrivacyPolicy.Replace("https://www.google.com/url?q=", String.Empty);
                        }

                        if (!String.IsNullOrWhiteSpace(parsedApp.DeveloperWebsite))
                        {
                            parsedApp.DeveloperNormalizedDomain = parser.NormalizeDomainName(parsedApp.DeveloperWebsite);
                        }

                        List <String> relatedApps = new List <String> ();

                        // Avoiding Exceptions caused by "No Related Apps" situations - Must be treated differently
                        try
                        {
                            // Parsing "Related Apps" and "More From Developer" Apps (URLS Only)
                            foreach (string extraAppUrl in parser.ParseExtraApps(response))
                            {
                                relatedApps.Add(Consts.APP_URL_PREFIX + extraAppUrl);
                            }

                            // Adding "Related Apps" to Apps Model
                            parsedApp.RelatedUrls = relatedApps.Distinct().ToArray();
                        }
                        catch
                        {
                            logger.Info("\tNo Related Apps Found. Skipping");
                        }

                        // Inserting App into Mongo DB Database
                        if (!mongoDB.UpsertKeyEq <AppModel>(parsedApp, "Url", appUrl))
                        {
                            ProcessingWorked = false;
                        }

                        // If the processing failed, do not remove the app from the database, instead, keep it and flag it as not busy
                        // so that other workers can try to process it later
                        if (!ProcessingWorked)
                        {
                            mongoDB.ToggleBusyApp(app, false);
                        }
                        else // On the other hand, if processing worked, removes it from the database
                        {
                            // Console Feedback, Comment this line to disable if you want to
                            Console.ForegroundColor = ConsoleColor.Red;
                            logger.Info("Inserted App : " + parsedApp.Name);
                            Console.ForegroundColor = ConsoleColor.White;

                            mongoDB.RemoveFromQueue(app.Url);
                        }

                        // Counters for console feedback only
                        int extraAppsCounter = 0, newExtraApps = 0;

                        // Parsing "Related Apps" and "More From Developer" Apps (URLS Only)
                        foreach (string extraAppUrl in relatedApps)
                        {
                            // Incrementing counter of extra apps
                            extraAppsCounter++;

                            // Assembling Full app Url to check with database
                            string fullExtraAppUrl;
                            if (extraAppUrl.IndexOf("https://play.google.com/") >= 0)
                            {
                                fullExtraAppUrl = extraAppUrl;
                            }
                            else
                            {
                                fullExtraAppUrl = Consts.APP_URL_PREFIX + extraAppUrl;
                            }

                            // Checking if the app was either processed or queued to be processed already
                            if ((!mongoDB.AppProcessed(fullExtraAppUrl)) && (!mongoDB.IsAppOnQueue(extraAppUrl)))
                            {
                                // Incrementing counter of inserted apps
                                newExtraApps++;

                                // Adds it to the queue of apps to be processed
                                mongoDB.AddToQueue(extraAppUrl);
                            }
                        }

                        // Console Feedback
                        logger.Info("Queued " + newExtraApps + " / " + extraAppsCounter + " related apps");
                    }
                }
                catch (Exception ex)
                {
                    logger.Error(ex);
                }
                finally
                {
                    try
                    {
                        // Toggles Busy status back to false
                        mongoDB.ToggleBusyApp(app, false);
                    }
                    catch (Exception ex)
                    {
                        // Toggle Busy App may raise an exception in case of lack of internet connection, so, i must use this
                        // "inner catch" to avoid it from happenning
                        logger.Error(ex);
                    }
                }
            }
        }
コード例 #41
0
        static void Main(string[] args)
        {
            // Loading Configuration
            LogSetup.InitializeLog("Apple_Store_Recorder.log", "info");
            _logger = LogManager.GetCurrentClassLogger();

            // Loading Config
            _logger.Info("Loading Configurations from App.config");
            LoadConfiguration();

            // Initializing Queue
            _logger.Info("Initializing Queue");
            AWSSQSHelper appsDataQueue = new AWSSQSHelper(_appsDataQueueName, _maxMessagesPerDequeue, _awsKey, _awsKeySecret);
            AWSSQSHelper backup        = new AWSSQSHelper("DeadLetter", _maxMessagesPerDequeue, _awsKey, _awsKeySecret);

            // Creating MongoDB Instance
            _logger.Info("Loading MongoDB / Creating Instances");

            MongoDBWrapper mongoDB    = new MongoDBWrapper();
            string         serverAddr = String.Join(":", Consts.MONGO_SERVER, Consts.MONGO_PORT);

            mongoDB.ConfigureDatabase(Consts.MONGO_USER, Consts.MONGO_PASS, Consts.MONGO_AUTH_DB, serverAddr, 10000, Consts.MONGO_DATABASE, Consts.MONGO_COLLECTION);

            // Setting Error Flag to No Error ( 0 )
            System.Environment.ExitCode = 0;

            // Initialiazing Control Variables
            int fallbackWaitTime = 1;

            // Buffer of Messages to be recorder
            List <AppleStoreAppModel> recordsBuffer  = new List <AppleStoreAppModel> ();
            List <Message>            messagesBuffer = new List <Message> ();

            // Insert Batch Size
            int batchSize = 1000;

            _logger.Info("Started Recording App Data");

            do
            {
                try
                {
                    // Dequeueing messages from the Queue
                    if (!appsDataQueue.DeQueueMessages())
                    {
                        Thread.Sleep(_hiccupTime);  // Hiccup
                        continue;
                    }

                    // Checking for no message received, and false positives situations
                    if (!appsDataQueue.AnyMessageReceived())
                    {
                        // If no message was found, increases the wait time
                        int waitTime;
                        if (fallbackWaitTime <= 12)
                        {
                            // Exponential increase on the wait time, truncated after 12 retries
                            waitTime = Convert.ToInt32(Math.Pow(2, fallbackWaitTime) * 1000);
                        }
                        else // Reseting Wait after 12 fallbacks
                        {
                            waitTime         = 2000;
                            fallbackWaitTime = 0;
                        }

                        fallbackWaitTime++;

                        // Sleeping before next try
                        Console.WriteLine("Fallback (seconds) => " + waitTime);
                        Thread.Sleep(waitTime);
                        continue;
                    }

                    // Reseting fallback time
                    fallbackWaitTime = 1;

                    // Iterating over dequeued Messages
                    foreach (var appDataMessage in appsDataQueue.GetDequeuedMessages())
                    {
                        try
                        {
                            // Deserializing message
                            var appData = AppleStoreAppModel.FromJson(appDataMessage.Body);

                            // Dumping "Url" to "_id"
                            appData._id = appData.url;

                            // Adding it to the buffer of records to be recorded
                            recordsBuffer.Add(appData);

                            // Adding message to the buffer of messages to be deleted
                            messagesBuffer.Add(appDataMessage);

                            // Is it time to batch insert ?
                            if ((recordsBuffer.Count % batchSize) == 0)
                            {
                                // Batch Insertion
                                mongoDB.BatchInsert <AppleStoreAppModel> (recordsBuffer);

                                // Logging Feedback
                                _logger.Info("\tApps Recorded : " + recordsBuffer.Count);

                                // Deleting Messages
                                messagesBuffer.ForEach((msg) => appsDataQueue.DeleteMessage(msg));

                                _logger.Info("\tMessages Deleted: " + messagesBuffer.Count);

                                // Clearing Buffers
                                recordsBuffer.Clear();
                                messagesBuffer.Clear();
                            }
                        }
                        catch (Exception ex)
                        {
                            _logger.Error(ex);
                        }
                        finally
                        {
                            // Deleting the message
                            appsDataQueue.DeleteMessage(appDataMessage);
                            backup.EnqueueMessage(appDataMessage.Body);
                        }
                    }
                }
                catch (Exception ex)
                {
                    _logger.Error(ex);
                }
            } while (true);
        }
コード例 #42
0
        /// <summary>
        /// Entry point of the worker piece of the process
        /// Notice that you can run as many workers as you want to in order to make the crawling faster
        /// </summary>
        /// <param name="args"></param>
        static void Main(string[] args)
        {
            // Configuring Log Object Threshold
            LogWriter.Threshold = TLogEventLevel.Information;
            LogWriter.Info("Worker Started");

            // Parser
            PlayStoreParser parser = new PlayStoreParser();

            // Configuring MongoDB Wrapper
            MongoDBWrapper mongoDB           = new MongoDBWrapper();
            string         fullServerAddress = String.Join(":", Consts.MONGO_SERVER, Consts.MONGO_PORT);

            mongoDB.ConfigureDatabase(Consts.MONGO_USER, Consts.MONGO_PASS, Consts.MONGO_AUTH_DB, fullServerAddress, Consts.MONGO_TIMEOUT, Consts.MONGO_DATABASE, Consts.MONGO_COLLECTION);

            // Creating Instance of Web Requests Server
            WebRequests server = new WebRequests();

            QueuedApp app;

            // Retry Counter (Used for exponential wait increasing logic)
            int retryCounter = 0;

            // Iterating Over MongoDB Records while no document is found to be processed
            while ((app = mongoDB.FindAndModify()) != null)
            {
                try
                {
                    // Building APP URL
                    string appUrl = Consts.APP_URL_PREFIX + app.Url;

                    // Checking if this app is on the database already
                    if (mongoDB.AppProcessed(appUrl))
                    {
                        // Console Feedback, Comment this line to disable if you want to
                        Console.WriteLine("Duplicated App, skipped.");

                        // Delete it from the queue and continues the loop
                        mongoDB.RemoveFromQueue(app.Url);
                        continue;
                    }

                    // Configuring server and Issuing Request
                    server.Headers.Add(Consts.ACCEPT_LANGUAGE);
                    server.Host              = Consts.HOST;
                    server.Encoding          = "utf-8";
                    server.EncodingDetection = WebRequests.CharsetDetection.DefaultCharset;
                    string response = server.Get(appUrl);

                    // Flag Indicating Success while processing and parsing this app
                    bool ProcessingWorked = true;

                    // Sanity Check
                    if (String.IsNullOrEmpty(response) || server.StatusCode != System.Net.HttpStatusCode.OK)
                    {
                        LogWriter.Info("Error opening app page : " + appUrl);
                        ProcessingWorked = false;

                        // Renewing WebRequest Object to get rid of Cookies
                        server = new WebRequests();

                        // Inc. retry counter
                        retryCounter++;

                        Console.WriteLine("Retrying:" + retryCounter);

                        // Checking for maximum retry count
                        double waitTime;
                        if (retryCounter >= 11)
                        {
                            waitTime = TimeSpan.FromMinutes(35).TotalMilliseconds;

                            // Removing App from the database (this the app page may have expired)
                            mongoDB.RemoveFromQueue(app.Url);
                        }
                        else
                        {
                            // Calculating next wait time ( 2 ^ retryCounter seconds)
                            waitTime = TimeSpan.FromSeconds(Math.Pow(2, retryCounter)).TotalMilliseconds;
                        }

                        // Hiccup to avoid google blocking connections in case of heavy traffic from the same IP
                        Thread.Sleep(Convert.ToInt32(waitTime));
                    }
                    else
                    {
                        // Reseting retry counter
                        retryCounter = 0;

                        // Parsing Useful App Data
                        AppModel parsedApp = parser.ParseAppPage(response, appUrl);

                        // Inserting App into Mongo DB Database
                        if (!mongoDB.Insert <AppModel>(parsedApp))
                        {
                            ProcessingWorked = false;
                        }

                        // If the processing failed, do not remove the app from the database, instead, keep it and flag it as not busy
                        // so that other workers can try to process it later
                        if (!ProcessingWorked)
                        {
                            mongoDB.ToggleBusyApp(app, false);
                        }
                        else // On the other hand, if processing worked, removes it from the database
                        {
                            // Console Feedback, Comment this line to disable if you want to
                            Console.WriteLine("Inserted App : " + parsedApp.Name);

                            mongoDB.RemoveFromQueue(app.Url);
                        }

                        // Counters for console feedback only
                        int extraAppsCounter = 0, newExtraApps = 0;

                        // Parsing "Related Apps" and "More From Developer" Apps (URLS Only)
                        foreach (string extraAppUrl in parser.ParseExtraApps(response))
                        {
                            // Incrementing counter of extra apps
                            extraAppsCounter++;

                            // Assembling Full app Url to check with database
                            string fullExtraAppUrl = Consts.APP_URL_PREFIX + extraAppUrl;

                            // Checking if the app was either processed or queued to be processed already
                            if ((!mongoDB.AppProcessed(fullExtraAppUrl)) && (!mongoDB.IsAppOnQueue(extraAppUrl)))
                            {
                                // Incrementing counter of inserted apps
                                newExtraApps++;

                                // Adds it to the queue of apps to be processed
                                mongoDB.AddToQueue(extraAppUrl);
                            }
                        }

                        // Console Feedback
                        Console.WriteLine("Queued " + newExtraApps + " / " + extraAppsCounter + " related apps");
                    }
                }
                catch (Exception ex)
                {
                    LogWriter.Error(ex);
                }
                finally
                {
                    try
                    {
                        // Toggles Busy status back to false
                        mongoDB.ToggleBusyApp(app, false);
                    }
                    catch (Exception ex)
                    {
                        // Toggle Busy App may raise an exception in case of lack of internet connection, so, i must use this
                        // "inner catch" to avoid it from happenning
                        LogWriter.Error(ex);
                    }
                }
            }
        }
コード例 #43
0
        ///  *** READ THIS BEFORE YOU START. ***
        ///  *** I MEAN IT, PLEASE, READ IT  ***
        ///
        ///  This exporting helper will download ALL THE APPS found on the database, and
        ///  dump it to a CSV file (with headers).
        ///
        ///  Note that, since the database is Hosted on AWS, i will PAY (for the internet outbound traffic) if you execute a full database export,
        ///  so, if you are going to execute a full export, please, get in touch with me before running this project, or send me a donation
        ///  via paypal on [email protected]
        ///
        ///  Also, be nice with the database.
        ///
        ///  ** END OF WARNING ***

        static void Main(string[] args)
        {
            // Logs Counter
            int processedApps = 0;

            // Configuring Log Object Threshold
            LogWriter.Threshold = TLogEventLevel.Information;

            // Overriding LogWriter Event
            LogWriter.LogEvent += LogWriter_LogEvent;

            LogWriter.Info("Checking Arguments");

            // Periodic Log Timer
            Timer loggingThread = new Timer((TimerCallback) =>
            {
                LogWriter.Info("Processed Apps: " + processedApps);
            }, null, 10000, 10000);

            // Validating Arguments
            if (!ValidateArgs(args))
            {
                LogWriter.Fatal("Invalid Args", "Args must have 1 element");
                return;
            }

            LogWriter.Info("Checking Write Permissions on output Path");
            // Validating Write Permissions on output path
            if (!ValidateFilePermissions(args[0]))
            {
                LogWriter.Fatal("Insuficient Permissions", "Cannot write on path : " + args[0]);
                return;
            }

            // Configuring MongoDB Wrapper
            MongoDBWrapper mongoDB           = new MongoDBWrapper();
            string         fullServerAddress = String.Join(":", Consts.MONGO_SERVER, Consts.MONGO_PORT);

            mongoDB.ConfigureDatabase(Consts.MONGO_USER, Consts.MONGO_PASS, Consts.MONGO_AUTH_DB, fullServerAddress, Consts.MONGO_TIMEOUT, Consts.MONGO_DATABASE, Consts.MONGO_COLLECTION);

            // Opening Output Stream
            using (StreamWriter sWriter = new StreamWriter(args[0], true, Encoding.GetEncoding("ISO-8859-1")))
            {
                // Auto Flush Content
                sWriter.AutoFlush = true;

                // Writing Headers
                String headersLine = "_id,Url,ReferenceDate,Name,Developer,IsTopDeveloper,DeveloperURL,PublicationDate,"
                                     + "Category,IsFree,Price,Reviewers,CoverImgUrl,Description,Score.Total,Score.Count,Score.FiveStars,"
                                     + "Score.FourStars,Score.ThreeStars,Score.TwoStars,Score.OneStars,LastUpdateDate"
                                     + "AppSize,Instalations,CurrentVersion,MinimumOSVersion,ContentRating,HaveInAppPurchases,DeveloperEmail,DeveloperWebsite,DeveloperPrivacyPolicy";

                sWriter.WriteLine(headersLine);

                // Example of MongoDB Query Construction
                // Queries for records which have the attribute "IsTopDeveloper" equal to "false"
                var mongoQuery = Query.EQ("IsTopDeveloper", false);

                // Reading all apps from the database
                // USAGE: CHANGE FindMatches to FindAll if you want to export all the records from the database
                foreach (AppModel app in mongoDB.FindMatch <AppModel>(mongoQuery, 10, 0))
                {
                    try
                    {
                        // Writing line to File
                        sWriter.WriteLine(app.ToString());
                        processedApps++;
                    }
                    catch (Exception ex)
                    {
                        LogWriter.Error(ex);
                    }
                }
            }

            // Logging end of the Process
            LogWriter.Info("Finished Exporting Database");

            // Removing Event
            LogWriter.LogEvent -= LogWriter_LogEvent;
        }
コード例 #44
0
        /// <summary>
        /// Executes a Search using the searchField as the search parameter,
        /// paginates / scrolls the search results to the end adding all the url of apps
        /// it finds to a AWS SQS queue
        /// </summary>
        /// <param name="searchField"></param>
        private static void CrawlStore(string searchField)
        {
            // Console Feedback
            Console.WriteLine("Crawling Search Term : [ " + searchField + " ]");

            // Compiling Regular Expression used to parse the "pagToken" out of the Play Store
            Regex pagTokenRegex = new Regex(@"GAEi+.+\:S\:.{11}\\42", RegexOptions.Compiled);

            // HTML Response
            string response;

            // MongoDB Helper
            // Configuring MongoDB Wrapper
            MongoDBWrapper mongoDB           = new MongoDBWrapper();
            string         fullServerAddress = String.Join(":", Consts.MONGO_SERVER, Consts.MONGO_PORT);

            mongoDB.ConfigureDatabase(Consts.MONGO_USER, Consts.MONGO_PASS, Consts.MONGO_AUTH_DB, fullServerAddress, Consts.MONGO_TIMEOUT, Consts.MONGO_DATABASE, Consts.MONGO_COLLECTION);

            // Ensuring the database has the proper indexe
            mongoDB.EnsureIndex("Url");

            // Response Parser
            PlayStoreParser parser = new PlayStoreParser();

            // Executing Web Requests
            using (WebRequests server = new WebRequests())
            {
                // Creating Request Object
                server.Host = Consts.HOST;

                // Executing Initial Request
                response = server.Post(String.Format(Consts.CRAWL_URL, searchField), Consts.INITIAL_POST_DATA);

                // Parsing Links out of Html Page (Initial Request)
                foreach (string url in parser.ParseAppUrls(response))
                {
                    // Checks whether the app have been already processed
                    // or is queued to be processed
                    if ((!mongoDB.AppProcessed(Consts.APP_URL_PREFIX + url)) && (!mongoDB.AppQueued(url)))
                    {
                        // Console Feedback
                        Console.WriteLine(" . Queued App");

                        // Than, queue it :)
                        mongoDB.AddToQueue(url);
                        Thread.Sleep(250);  // Hiccup
                    }
                    else
                    {
                        // Console Feedback
                        Console.WriteLine(" . Duplicated App. Skipped");
                    }
                }

                // Executing Requests for more Play Store Links
                int initialSkip       = 48;
                int currentMultiplier = 1;
                int errorsCount       = 0;
                do
                {
                    // Finding pagToken from HTML
                    var rgxMatch = pagTokenRegex.Match(response);

                    // If there's no match, skips it
                    if (!rgxMatch.Success)
                    {
                        break;
                    }

                    // Reading Match from Regex, and applying needed replacements
                    string pagToken = rgxMatch.Value.Replace(":S:", "%3AS%3A").Replace("\\42", String.Empty).Replace(@"\\u003d", String.Empty);

                    // Assembling new PostData with paging values
                    string postData = String.Format(Consts.POST_DATA, pagToken);

                    // Executing request for values
                    response = server.Post(String.Format(Consts.CRAWL_URL, searchField), postData);

                    // Checking Server Status
                    if (server.StatusCode != System.Net.HttpStatusCode.OK)
                    {
                        LogWriter.Error("Http Error", "Status Code [ " + server.StatusCode + " ]");
                        errorsCount++;
                        continue;
                    }

                    // Parsing Links
                    foreach (string url in parser.ParseAppUrls(response))
                    {
                        // Checks whether the app have been already processed
                        // or is queued to be processed
                        if ((!mongoDB.AppProcessed(Consts.APP_URL_PREFIX + url)) && (!mongoDB.AppQueued(url)))
                        {
                            // Console Feedback
                            Console.WriteLine(" . Queued App");

                            // Than, queue it :)
                            mongoDB.AddToQueue(url);
                            Thread.Sleep(250);  // Hiccup
                        }
                        else
                        {
                            // Console Feedback
                            Console.WriteLine(" . Duplicated App. Skipped");
                        }
                    }

                    // Incrementing Paging Multiplier
                    currentMultiplier++;
                }  while (parser.AnyResultFound(response) && errorsCount <= Consts.MAX_REQUEST_ERRORS);
            }
        }
コード例 #45
0
        /// <summary>
        /// Executes a Search using the searchField as the search parameter,
        /// paginates / scrolls the search results to the end adding all the url of apps
        /// it finds to a AWS SQS queue
        /// </summary>
        /// <param name="searchField"></param>
        private static void CrawlStore(string searchField)
        {
            // Console Feedback
            Console.WriteLine("Crawling Search Term : [ " + searchField + " ]");

            // HTML Response
            string response;

            // MongoDB Helper
            // Configuring MongoDB Wrapper
            MongoDBWrapper mongoDB           = new MongoDBWrapper();
            string         fullServerAddress = String.Join(":", Consts.MONGO_SERVER, Consts.MONGO_PORT);

            mongoDB.ConfigureDatabase(Consts.MONGO_USER, Consts.MONGO_PASS, Consts.MONGO_AUTH_DB, fullServerAddress, Consts.MONGO_TIMEOUT, Consts.MONGO_DATABASE, Consts.MONGO_COLLECTION);

            // Response Parser
            PlayStoreParser parser = new PlayStoreParser();

            // Executing Web Requests
            using (WebRequests server = new WebRequests())
            {
                // Creating Request Object
                server.Host = Consts.HOST;

                // Executing Initial Request
                response = server.Post(Consts.CRAWL_URL, Consts.INITIAL_POST_DATA);

                // Parsing Links out of Html Page (Initial Request)
                foreach (string url in parser.ParseAppUrls(response))
                {
                    // Checks whether the app have been already processed
                    // or is queued to be processed
                    if ((!mongoDB.AppProcessed(Consts.APP_URL_PREFIX + url)) && (!mongoDB.AppQueued(url)))
                    {
                        // Console Feedback
                        Console.WriteLine(" . Queued App");

                        // Than, queue it :)
                        mongoDB.AddToQueue(url);
                    }
                    else
                    {
                        // Console Feedback
                        Console.WriteLine(" . Duplicated App. Skipped");
                    }
                }

                // Executing Requests for more Play Store Links
                int initialSkip       = 48;
                int currentMultiplier = 1;
                int errorsCount       = 0;
                do
                {
                    // Assembling new PostData with paging values
                    string postData = String.Format(Consts.POST_DATA, (initialSkip * currentMultiplier));

                    // Executing request for values
                    response = server.Post(Consts.CRAWL_URL, postData);

                    // Checking Server Status
                    if (server.StatusCode != System.Net.HttpStatusCode.OK)
                    {
                        LogWriter.Error("Http Error", "Status Code [ " + server.StatusCode + " ]");
                        errorsCount++;
                        continue;
                    }

                    // Parsing Links
                    foreach (string url in parser.ParseAppUrls(response))
                    {
                        // Checks whether the app have been already processed
                        // or is queued to be processed
                        if ((!mongoDB.AppProcessed(Consts.APP_URL_PREFIX + url)) && (!mongoDB.AppQueued(url)))
                        {
                            // Console Feedback
                            Console.WriteLine(" . Queued App");

                            // Than, queue it :)
                            mongoDB.AddToQueue(url);
                        }
                        else
                        {
                            // Console Feedback
                            Console.WriteLine(" . Duplicated App. Skipped");
                        }
                    }

                    // Incrementing Paging Multiplier
                    currentMultiplier++;
                }  while (parser.AnyResultFound(response) && errorsCount <= Consts.MAX_REQUEST_ERRORS);
            }
        }