예제 #1
0
        public List <Dictionary <string, string> > ProcessHtml(string html, List <RegexPattern> regexPatterns)
        {
            var results = new List <Dictionary <string, string> >();

            RegexInfo pageTypeMatch = GetPageType(html, regexPatterns, 0);

            if (pageTypeMatch == null)
            {
                return(null);
            }

            // can use reflection
            if (pageTypeMatch.RegexType.Equals(RegexType.List))
            {
                var selectionRegexs = regexPatterns.Where(r => r.Type.Equals(RegexType.Selection) && r.ParentId.Equals(pageTypeMatch.RegexId)).ToList();
                var selections      = GetMatchedResults(html, selectionRegexs);

                foreach (var s in selections)
                {
                    var rows = ListPageProcessor(s.Value, regexPatterns, pageTypeMatch);
                    results.AddRange(rows);
                }
            }
            return(results);
        }
예제 #2
0
        public List <Dictionary <string, string> > ListPageProcessor(string html, List <RegexPattern> regexPatterns, RegexInfo pageTypeMatch)
        {
            var results       = new List <Dictionary <string, string> >();
            var globalResults = new List <KeyValuePair <string, string> >();

            var globalRegexes = regexPatterns.Where(r => r.Type.Equals(RegexType.Global) && r.ParentId.Equals(pageTypeMatch.RegexId)).ToList();

            if (globalRegexes != null && globalRegexes.Count > 0)
            {
                globalResults = GetMatchedResults(html, globalRegexes);
            }

            var regexes = regexPatterns.Where(r => r.ParentId.Equals(pageTypeMatch.RegexId) && r.Type.Equals(RegexType.Item)).ToList();

            if (regexes == null)
            {
                return(null);
            }

            var items = GetMatchedResults(html, regexes);

            foreach (var regex in regexes)
            {
                var detailRegexes = regexPatterns.Where(r => r.Type.Equals(RegexType.Detail) && r.ParentId.Equals(regex.Id)).ToList();

                foreach (var item in items)
                {
                    var itemDetails = GetMatchedResults(item.Value, detailRegexes);
                    if (itemDetails != null && itemDetails.Count > 0)
                    {
                        if (globalResults.Count > 0)
                        {
                            itemDetails.AddRange(globalResults);
                        }
                        results.Add(itemDetails.GroupBy(f => f.Key).Select(g => g.First()).ToList().ToDictionary(x => x.Key, x => x.Value));
                    }
                }
            }
            return(results);
        }