public List <string> Scrape(ScrapeCriteria scrapeCriteria)
        {
            List <string> scrapedElements = new List <string>();

            // Find all matches for specified RegEx
            MatchCollection matches = Regex.Matches(scrapeCriteria.Data, scrapeCriteria.Regex, scrapeCriteria.RegexOption);

            // Loop through found matches to extract smaller parts if specified
            foreach (Match match in matches)
            {
                // If no parts exist to drill into, add the scraped elements
                // Else loop through list of parts to extract parts of the matched elements
                if (!scrapeCriteria.Parts.Any())
                {
                    scrapedElements.Add(match.Groups[0].Value);
                }
                else
                {
                    // Loop through each part and scrape matching RegEx from parent match element
                    foreach (var part in scrapeCriteria.Parts)
                    {
                        Match matchedPart = Regex.Match(match.Groups[0].Value, part.Regex, part.RegexOption);

                        if (matchedPart.Success)
                        {
                            scrapedElements.Add(matchedPart.Groups[0].Value);
                        }
                    }
                }
            }

            return(scrapedElements);
        }
        public List <string> Scrape(ScrapeCriteria scrapeCriteria)
        {
            List <string> scrapedElements = new List <string>();

            MatchCollection matches = Regex.Matches(scrapeCriteria.Data, scrapeCriteria.Regex, scrapeCriteria.RegexOption);

            foreach (Match match in matches)
            {
                if (scrapeCriteria.Parts.Count.Equals(0))
                {
                    // there are no parts to check, results are final
                    scrapedElements.Add(match.Groups[0].Value);
                }
                else
                {
                    // Some parts to distill, so let's find'em
                    foreach (ScrapeCriteriaPart part in scrapeCriteria.Parts)
                    {
                        Match matchedPart = Regex.Match(match.Groups[0].Value, part.Regex, part.RegexOption);

                        if (matchedPart.Success)
                        {
                            scrapedElements.Add(matchedPart.Groups[1].Value);
                        }
                    }
                }
            }

            return(scrapedElements);
        }
Exemple #3
0
        public List <string> Scrape(ScrapeCriteria scrapeCriteria)
        {
            List <string> scrapedElements = new List <string>(); // this is what we return
            //begin matching
            MatchCollection matches = Regex.Matches(scrapeCriteria.Data, scrapeCriteria.Regex, scrapeCriteria.RegexOption);

            foreach (Match match in matches)
            {
                if (!scrapeCriteria.Parts.Any())                // if we dont need to go deeper
                {
                    scrapedElements.Add(match.Groups[0].Value); //
                }
                else // going deeper and grabbing certain parts of element
                {
                    foreach (var part in scrapeCriteria.Parts)
                    {
                        Match matchedPart = Regex.Match(match.Groups[0].Value, part.Regex, part.RegexOption);

                        if (matchedPart.Success)
                        {
                            scrapedElements.Add(matchedPart.Groups[1].Value);
                        }
                    }
                }
            }

            return(scrapedElements);
        }
Exemple #4
0
        public List <string> Scrape(ScrapeCriteria scrapeCriteria)
        {
            List <string> scraperElements = new List <string>();

            MatchCollection matches = Regex.Matches(scrapeCriteria.Data, scrapeCriteria.Regex, scrapeCriteria.RegexOption);

            foreach (Match match in matches)
            {
                if (!scrapeCriteria.Parts.Any())
                {
                    scraperElements.Add(match.Groups[0].Value);
                }
                else
                {
                    foreach (var part in scrapeCriteria.Parts)
                    {
                        Match matchedPart = Regex.Match(match.Groups[0].Value, part.Regex, part.RegexOption);

                        if (matchedPart.Success)
                        {
                            scraperElements.Add(matchedPart.Groups[1].Value);
                        }
                    }
                }
            }

            return(scraperElements);
        }
        public List <string> Scrape(ScrapeCriteria ScrapeCriteria)   // method takes the ScrapeCriteria & returns scraped list
        {
            List <string> scrapedElements = new List <string>();

            // Perform scraping
            MatchCollection matches = Regex.Matches(ScrapeCriteria.Data, ScrapeCriteria.Regex, ScrapeCriteria.RegExOption); // input, regex, regexOption

            foreach (Match match in matches)
            {
                if (!ScrapeCriteria.Parts.Any())                // no parts specified in this element
                {
                    scrapedElements.Add(match.Groups[0].Value); // adding value of matched element to list
                }
                else // parts specified for this element
                {
                    foreach (var part in ScrapeCriteria.Parts)
                    {
                        Match matchedPart = Regex.Match(match.Groups[0].Value, part.Regex, part.RegexOption);

                        if (matchedPart.Success)
                        {
                            scrapedElements.Add(matchedPart.Groups[1].Value);
                        }
                    }
                }
            }

            return(scrapedElements); // as list of strings
        }
Exemple #6
0
        public List <string> Scrape(ScrapeCriteria scrapeCriteria)
        {
            List <string> scrapedElements = new List <string>();

            MatchCollection matches = Regex.Matches(scrapeCriteria.Data, scrapeCriteria.Regex, scrapeCriteria.RegexOption);

            //match.Groups[0].Value is the first level of the match

            foreach (Match match in matches)
            {
                //1 level of detail
                if (!scrapeCriteria.Parts.Any())
                {
                    scrapedElements.Add(match.Groups[0].Value);
                }
                //Deeper level of the match
                else
                {
                    foreach (var part in scrapeCriteria.Parts)
                    {
                        Match matchedPart = Regex.Match(match.Groups[0].Value, part.Regex, part.RegexOption);

                        if (matchedPart.Success)
                        {
                            scrapedElements.Add(matchedPart.Groups[1].Value);
                        }
                    }
                }
            }

            return(scrapedElements);
        }
Exemple #7
0
        //This us the builders to scrape by matching on regex
        //Takes in a list of scrape criteria as strings and returns a list of scraped elements
        //Use regular expression engine Regex to match

        public List <string> Scrape(ScrapeCriteria scrapeCriteria)
        {
            List <string> scrapedElements = new List <string>();

            //Takes 3 args needed to match, the data, the regex, and optional options
            MatchCollection matches = Regex.Matches(scrapeCriteria.Data, scrapeCriteria.Regex, scrapeCriteria.RegexOption);

            //After machting complete, iterate through list to decide whether to add to the returned list
            //If the match doesnt have parts to go through, then just add to the list
            //If there is sub part in match, then iterate and match using regex and then add to list
            //finally return the complete list
            foreach (Match match in matches)
            {
                if (!scrapeCriteria.Parts.Any())
                {
                    scrapedElements.Add(match.Groups[0].Value);
                }
                else
                {
                    foreach (var part in scrapeCriteria.Parts)
                    {
                        Match matchedPart = Regex.Match(match.Groups[0].Value, part.Regex, part.Regexoption);

                        if (matchedPart.Success)
                        {
                            scrapedElements.Add(matchedPart.Groups[1].Value);
                        }
                    }
                }
            }

            return(scrapedElements);
        }
Exemple #8
0
        public List <string> Scrape(ScrapeCriteria scrapeCriteria)
        {
            List <string> scrappedElements = new List <string>();

            MatchCollection matches = Regex.Matches(scrapeCriteria.Data, scrapeCriteria.Regex, scrapeCriteria.RegexOption);

            foreach (Match match in matches)
            {
                if (!scrapeCriteria.Parts.Any())
                {
                    scrappedElements.Add(match.Groups[0].Value);
                }
                else
                {
                    foreach (var part in scrapeCriteria.Parts)
                    {
                        Match matchedPart = Regex.Match(match.Groups[0].Value, part.Regex, part.RegexOptions);

                        // [0] {>2007 Toyota Highlander very low miles</a>}
                        // [1] {2007 Toyota Highlander verly low miles} <--- Takes the group out
                        if (matchedPart.Success)
                        {
                            scrappedElements.Add(matchedPart.Groups[1].Value);
                        }
                    }
                }
            }

            return(scrappedElements);
        }
Exemple #9
0
        public List <string> Scrape(ScrapeCriteria scrapeCriteria)
        {
            List <string> scrapedElements = new List <string>();
            //Matches method takes an data and searches for a specified pattern (Regex), and RegexOption
            MatchCollection matches = Regex.Matches(scrapeCriteria.Data, scrapeCriteria.Regex,
                                                    scrapeCriteria.RegexOption);

            foreach (Match match in matches)
            {
                if (!scrapeCriteria.Parts.Any())
                {
                    scrapedElements.Add(match.Groups[0].Value);
                }
                else
                {
                    foreach (var part in scrapeCriteria.Parts)
                    {
                        Match matchedPart = Regex.Match(match.Groups[0].Value, part.Regex, part.RegexOption);

                        if (matchedPart.Success)
                        {
                            scrapedElements.Add(matchedPart.Groups[1].Value);
                        }
                    }
                }
            }

            return(scrapedElements);
        }
        public ScrapeCriteria Build()
        {
            ScrapeCriteria scrapeCriteria = new ScrapeCriteria();

            scrapeCriteria.Data        = _data;
            scrapeCriteria.Regex       = _regex;
            scrapeCriteria.RegexOption = _regexOptions;
            scrapeCriteria.Parts       = _parts;
            return(scrapeCriteria);
        }
Exemple #11
0
        public ScrapeCriteria Build()
        {
            ScrapeCriteria scrapeCriteria = new ScrapeCriteria();

            scrapeCriteria.Data        = data;
            scrapeCriteria.Regex       = regex;
            scrapeCriteria.RegexOption = regexOption;
            scrapeCriteria.Parts       = parts;
            return(scrapeCriteria);
        }
Exemple #12
0
        //create a method returing a list of scrapes
        //add parameters to the method
        public List <string> Scrape(ScrapeCriteria scrapeCriteria)
        {
            //make List of string to return scrapped elements
            List <string> scrappedElements = new List <string>();

            //make a body for the scrapping operation


            //return the list
            return(scrappedElements);
        }
        public ScrapeCriteria Build()
        {
            ScrapeCriteria scrape = new ScrapeCriteria {
                Data         = _data,
                Regex        = _regex,
                RegexOptions = _regexOption,
                parts        = _parts
            };

            return(scrape);
        }
        public ScrapeCriteria Build()
        {
            var scrapeCriteria = new ScrapeCriteria
            {
                Data        = _data,
                Regex       = _regex,
                RegexOption = _regexOption,
                Parts       = _parts
            };

            return(scrapeCriteria);
        }
Exemple #15
0
        public static List <VideoItem> scrapeYoutube(String searchQuery)
        {
            List <VideoItem> results = new List <VideoItem>();

            using (WebClient webClient = new WebClient())
            {
                string url;
                if (!searchQuery.Contains(' ') && searchQuery.Length != 11)
                {
                    url = urlBuilder(searchQuery);
                }
                else
                {
                    url = "https://www.youtube.com/results?search_query=" + searchQuery;
                }

                string data;
                try
                {
                    data = webClient.DownloadString(url);
                }
                catch (Exception ex)
                {
                    throw ex;
                }
                //webClient.DownloadFile(url , "result");
                //string data = File.ReadAllText("result");

                ScrapeCriteria  scrapeCriteria = new ScrapeCriteria(data, Constants.pattern);
                MatchCollection matches        = scraper.scrape(scrapeCriteria);
                foreach (Match element in matches)
                {
                    Match match = Regex.Match(element.Groups[0].Value, Constants.pattern, RegexOptions.Singleline);
                    if (match.Success)
                    {
                        if (match.Groups[1].Value.Length == 11)
                        {
                            results.Add(new VideoItem(match.Groups[2].Value, match.Groups[1].Value));
                        }
                    }
                }
            }
            return(results);
        }
        private void ScrapeByRegex(ScrapeCriteria scrapeCriteria, List <string> scrappedElements, Match match)
        {
            if (!scrapeCriteria.Parts.Any())
            {
                scrappedElements.Add(match.Groups[0].Value);
            }
            else
            {
                foreach (var part in scrapeCriteria.Parts)
                {
                    var matchedPart = Regex.Match(match.Groups[0].Value, part.Regex, part.RegexOptions);

                    if (matchedPart.Success)
                    {
                        scrappedElements.Add(matchedPart.Groups[1].Value);
                    }
                }
            }
        }
        public List <string> Scrape(ScrapeCriteria scrapeCriteria)
        {
            try
            {
                var scrappedElements = new List <string>();
                var matches          = Regex.Matches(scrapeCriteria.Data, scrapeCriteria.Regex, scrapeCriteria.RegexOptions);

                foreach (Match match in matches)
                {
                    ScrapeByRegex(scrapeCriteria, scrappedElements, match);
                }

                return(scrappedElements);
            }
            catch (Exception e)
            {
                Logger.Error(e.Message);
                throw;
            }
        }
Exemple #18
0
 public MatchCollection scrape(ScrapeCriteria scrapeCriteria)
 {
     return(Regex.Matches(scrapeCriteria.Data, scrapeCriteria.Regex, scrapeCriteria.regexOptions));
 }