public List <Dictionary <string, string> > ProcessHtml(string html, List <RegexPattern> regexPatterns) { var results = new List <Dictionary <string, string> >(); RegexInfo pageTypeMatch = GetPageType(html, regexPatterns, 0); if (pageTypeMatch == null) { return(null); } // can use reflection if (pageTypeMatch.RegexType.Equals(RegexType.List)) { var selectionRegexs = regexPatterns.Where(r => r.Type.Equals(RegexType.Selection) && r.ParentId.Equals(pageTypeMatch.RegexId)).ToList(); var selections = GetMatchedResults(html, selectionRegexs); foreach (var s in selections) { var rows = ListPageProcessor(s.Value, regexPatterns, pageTypeMatch); results.AddRange(rows); } } return(results); }
public List <Dictionary <string, string> > ListPageProcessor(string html, List <RegexPattern> regexPatterns, RegexInfo pageTypeMatch) { var results = new List <Dictionary <string, string> >(); var globalResults = new List <KeyValuePair <string, string> >(); var globalRegexes = regexPatterns.Where(r => r.Type.Equals(RegexType.Global) && r.ParentId.Equals(pageTypeMatch.RegexId)).ToList(); if (globalRegexes != null && globalRegexes.Count > 0) { globalResults = GetMatchedResults(html, globalRegexes); } var regexes = regexPatterns.Where(r => r.ParentId.Equals(pageTypeMatch.RegexId) && r.Type.Equals(RegexType.Item)).ToList(); if (regexes == null) { return(null); } var items = GetMatchedResults(html, regexes); foreach (var regex in regexes) { var detailRegexes = regexPatterns.Where(r => r.Type.Equals(RegexType.Detail) && r.ParentId.Equals(regex.Id)).ToList(); foreach (var item in items) { var itemDetails = GetMatchedResults(item.Value, detailRegexes); if (itemDetails != null && itemDetails.Count > 0) { if (globalResults.Count > 0) { itemDetails.AddRange(globalResults); } results.Add(itemDetails.GroupBy(f => f.Key).Select(g => g.First()).ToList().ToDictionary(x => x.Key, x => x.Value)); } } } return(results); }