public async void FacebookScrapeExecuted(object args)
        {
            // Display the loading screen
            IsLoading = true;

            // Get the LinkModel that we're tring to scrape details for
            FacebookLinkModel facebookLinkModel = args as FacebookLinkModel;

            SelectedFacebookLinkModel = facebookLinkModel;

            // Make a call to the target profile url and get the source
            var source = await ExternalBrowser.CallExternalBrowser(facebookLinkModel.TargetUrl);

            // Get the real name from the profile
            string realName = FacebookStuff.GetRealNameFromProfilePage(source);

            // Get the profile image from the profile
            facebookLinkModel.ProfileImage = FacebookStuff.GetProfilePhotoFromProfilePage(source);

            // Get all of the details from the profile page for parsing
            List <string> details = FacebookStuff.GetIntroFromAuthenticatedProfilePage(source);

            facebookLinkModel.TargetDetails = details;

            facebookLinkModel.ParsedDetails = new List <string>();
            foreach (string detail in facebookLinkModel.TargetDetails)
            {
                string text = FacebookStuff.GetTextFromSingleDetailAuthenticated(detail);
                if (!String.IsNullOrEmpty(text) && !facebookLinkModel.ParsedDetails.Contains(FacebookStuff.GetTextFromSingleDetailAuthenticated(detail)))
                {
                    facebookLinkModel.ParsedDetails.Add(FacebookStuff.GetTextFromSingleDetailAuthenticated(detail));
                }
            }

            // If the name contains a space there's more than one name listed
            if (realName.Contains(' '))
            {
                // If the name has two items in it then set index 0 to first name and index 1 to last name
                if (realName.Split(' ').Count() == 2)
                {
                    facebookLinkModel.FirstName = realName.Split(' ')[0];
                    facebookLinkModel.LastName  = realName.Split(' ')[1];
                }
                // Otherwise they list a middle name or initial and set index 0 as first name and index 2 as last name
                else
                {
                    facebookLinkModel.FirstName = realName.Split(' ')[0];
                    facebookLinkModel.LastName  = realName.Split(' ')[2];
                }
            }
            else
            {
                facebookLinkModel.FirstName = realName;
            }

            if (realName.Contains(','))
            {
                facebookLinkModel.FirstName = realName.Split(',')[1];
                facebookLinkModel.LastName  = realName.Split(',')[0];
            }

            foreach (string detail in facebookLinkModel.TargetDetails)
            {
                // lower the detail so we can identify it easier
                string lowerDetail = detail.ToLower();

                // If the detail contains "lives in" or "current city" then we know this is the current city detail
                if (lowerDetail.Contains("lives in") || lowerDetail.Contains("current city"))
                {
                    string cityState = FacebookStuff.GetTextFromSingleDetailAuthenticated(detail);

                    if (cityState.Contains(','))
                    {
                        facebookLinkModel.CurrentCity  = cityState.Split(',')[0];
                        facebookLinkModel.CurrentState = cityState.Split(',')[1];
                    }
                    else
                    {
                        facebookLinkModel.CurrentCity = cityState;
                    }
                }

                // If the detail contains "studied" then it's the college detail
                if (lowerDetail.Contains("studied"))
                {
                    facebookLinkModel.College = FacebookStuff.GetTextFromSingleDetailAuthenticated(detail);
                }

                // If the detail contains "went to" then it's the high school detail
                if (lowerDetail.Contains("went to"))
                {
                    facebookLinkModel.HighSchool = FacebookStuff.GetTextFromSingleDetailAuthenticated(detail);
                }

                // If the detail contains "from" then this detail is for the origin city and state
                if (lowerDetail.Contains("from"))
                {
                    string cityState = FacebookStuff.GetTextFromSingleDetailAuthenticated(detail);

                    // If there's a comma then split on it and set index 0 as the city and index 1 as the state
                    if (cityState.Contains(','))
                    {
                        facebookLinkModel.OriginCity  = cityState.Split(',')[0];
                        facebookLinkModel.OriginState = cityState.Split(',')[1];
                    }
                    else
                    {
                        facebookLinkModel.OriginCity = cityState;
                    }
                }

                // If the detail contains mairried to then it will list a spouse
                if (lowerDetail.Contains("married to"))
                {
                    string spouseName = FacebookStuff.GetTextFromSingleDetailAuthenticated(detail);
                    facebookLinkModel.MarriedTo = spouseName;

                    // If the name contains a space there's more than one name listed
                    if (spouseName.Contains(' '))
                    {
                        // If the name has two items in it then set index 0 to first name and index 1 to last name
                        if (spouseName.Split(' ').Count() == 2)
                        {
                            facebookLinkModel.SpouseFirstName = spouseName.Split(' ')[0];
                            facebookLinkModel.SpouseLastName  = spouseName.Split(' ')[1];
                        }
                        // Otherwise they list a middle name or initial and set index 0 as first name and index 2 as last name
                        else
                        {
                            facebookLinkModel.SpouseFirstName = spouseName.Split(' ')[0];
                            facebookLinkModel.SpouseLastName  = spouseName.Split(' ')[2];
                        }
                    }
                    else if (spouseName.Contains(','))
                    {
                        facebookLinkModel.SpouseFirstName = spouseName.Split(',')[1];
                        facebookLinkModel.SpouseLastName  = spouseName.Split(',')[0];
                    }
                }

                // This is the magic string for the little suitcase image next to the job detail text
                // It's a bad identifier and will likely break
                if (lowerDetail.Contains("sx_9deefd"))
                {
                    if (facebookLinkModel.Jobs == null)
                    {
                        facebookLinkModel.Jobs = new List <string>();
                    }

                    facebookLinkModel.Jobs.Add(FacebookStuff.GetTextFromSingleDetailAuthenticated(detail));
                }
            }
            IsLoading = false;
        }
Exemplo n.º 2
0
        public void ScanFacebookExecuted(object param)
        {
            // Okay I apologize in advance for this shit.... Read on if you dare...
            //
            // Purpose of this block of code:
            // To scrape facebook as fast as possible, asynchronously.
            //
            // How it's achieved:
            // Parallel foreach loops iterating over urls and async web requests
            //
            // Why this is garbage:
            // Parallel foreach is used to iterate over the urls as fast as possible. However, I'm using
            // async web requests inside of the parallel loops. Unfortunately this breaks C# apparently.
            // When you await an async request inside of a parallel foreach, the parallel loop is no longer
            // blocking and exits immediately. This means that that part at the bottom, the part that updates the
            // UI, will run prior to the results being returned.... Yeah... fml
            //
            // How I worked around this:
            // I create a List<Object>. When a request goes out we add one to the list. When a response comes back
            // we remove one from the list. At the base of the function we have an infinite non-blocking loop that
            // waits for the requests count to equal zero. When it's zero it sorts our list based on confidence score
            // and updates the UI.
            //
            // Problems with this method:
            // Currently I'm not accounting for failed requests.... I have no idea what would happen. Probably nothing good.
            // Like seriously... I don't expect the application to suddenly get better if a request fails... Fix this...

            List <Object> requests = new List <Object>();

            List <UsernameLinkModel> tempLinks = new List <UsernameLinkModel>();

            try
            {
                IEnumerable <DBLinkModel> tmpCache = DatabaseSearchResults.Where(pm => pm.IsSelected);
                foreach (var item in tmpCache)
                {
                    var temp = TargetLinks.Where(tl => tl.id == item.Id && tl.usernames.Count > 0);
                    if (temp.Count() > 0)
                    {
                        tempLinks.Add(temp.FirstOrDefault());
                        tmpCache = tmpCache.Where(tl => tl.Id != item.Id);
                    }
                }

                TargetLinks.Clear();
                TopTargets.Clear();

                object outerLock = new object();
                object innerLock = new object();

                // First we run over our list of voter db targets
                Parallel.ForEach(tmpCache, async pm =>
                {
                    DBLinkModel target = pm;

                    Console.WriteLine("Running search on target: " + target.firstname + " " + target.lastname);

                    lock (outerLock)
                    {
                        requests.Add(new Object());
                    }

                    // foreach voter db target, try and see if there are any facebook users in ohio with that name
                    string source = await ExternalBrowser.CallExternalBrowser(FacebookStuff.FormatQueryURL(target), true);

                    // lock adding and removing requests, because you know, threads, shared resources, fml
                    lock (outerLock)
                    {
                        requests.RemoveAt(requests.Count - 1);
                    }

                    // Link made
                    TargetLinks.Add(FacebookStuff.ExtractUsernamesFromSource(target, source));

                    if (requests.Count == 0)
                    {
                        // For each link made
                        Parallel.ForEach(TargetLinks, targetLink =>
                        {
                            // Run over every possible user and score it
                            Parallel.ForEach(targetLink.usernames, async username =>
                            {
                                lock (innerLock)
                                {
                                    requests.Add(new Object());
                                }

                                // Get the source for this possible facebook match
                                string profileSource = await ExternalBrowser.CallExternalBrowser(username.ProfileLink);

                                lock (innerLock)
                                {
                                    requests.RemoveAt(requests.Count - 1);
                                }

                                // Parse and score the source
                                username.ConfidenceScore = RelationshipStuff.GetConfidenceLevel(SelectedPerson, profileSource);
                                username.UserModelLinkId = targetLink.id;
                                username.FullName        = targetLink.name;

                                // wait until alllllllll the requests are done. If you're curious why in 2017 I need to do this, read the
                                // above giant comment block
                                while (requests.Count > 0)
                                {
                                    System.Windows.Forms.Application.DoEvents();
                                }

                                Console.WriteLine("SCAN COMPLETED AT: " + DateTime.Now.ToString());
                                Sort();
                            });
                        });
                    }
                });
            }
            catch (Exception err) { Console.WriteLine(err.Message); /*Nom nom nom*/ }
        }