public static List <T> ParseContent <T>(HtmlNodeCollection elements, IParseDetails <T> details) { var results = new List <T>(); if (elements != null && elements.Count > 0) { details.Logger.Debug("Selected node count: {0}", elements.Count); foreach (var element in elements) { try { T obj = ParseSingleNodeContent(element, details); results.Add(obj); } catch (Exception ex) { details.Logger.Error(ex.ToString()); } } } /// Return a list of the results with the duplicates culled. /// return(results.Distinct().ToList()); }
public static Uri CreateUriFromElementAttributeValue <T>(HtmlNode element, IParseDetails <T> details, Uri host) { var value = element.GetAttributeValue(details.AttributeName, null); details.Logger.Debug("Creating validated URI from \"{0}\"", value); if (value.StartsWith("/") && value[1] != '/') { value = string.Concat(host.Scheme, "://", host.Host, value); } return(new DMR11.Core.Net.ValidatedUri(value)); }
public static T SectionGenericParseAction <T>(HtmlNode element, IParseDetails <T> details, string parseRegex, string parseReplace, Func <string, T> postParse, Dictionary <string, string> hostVariables) { var input = GetElementValue(element, details.AttributeName); if (!string.IsNullOrWhiteSpace(input)) { // Remove any trailing whitespace or newlines from the value. input = input.Trim(); if (!string.IsNullOrWhiteSpace(parseRegex) && !string.IsNullOrWhiteSpace(parseReplace)) { var regex = new Regex(parseRegex, RegexOptions.Singleline | RegexOptions.Compiled | RegexOptions.IgnoreCase); var match = Match.Empty; if ((match = regex.Match(input)).Success) { // Register group values. foreach (var group in regex.GetGroupNames()) { // Todo: Use section name as well before overwriting the original value. // > regex__pages_conflicting_name // > regex__page_conflicting_name var newKey = string.Concat("regex__", group); var newValue = match.Groups[group].Value; if (hostVariables.ContainsKey(newKey)) { hostVariables[newKey] = newValue; } else { hostVariables.Add(newKey, newValue); } } var replace = EvaluateVariable(parseReplace, hostVariables); return(postParse(replace)); } } else { return(postParse(input)); } } return(default(T)); }
public string TitleParseAction(HtmlNode element, IParseDetails <string> parseDetails) { string title = string.Empty; // Todo: Replace with correct function call. if (string.Compare(parseDetails.AttributeName, "$(__inner_text)", true) == 0) { title = element.InnerText; } else { title = element.GetAttributeValue(parseDetails.AttributeName, null); } return(title); }
public static List <T> ParseContent <T>(string html, IParseDetails <T> details) { var doc = new HtmlDocument(); try { doc.LoadHtml(html); var elements = doc.DocumentNode.SelectNodes(details.XPath); return(ParseContent(elements, details)); } finally { doc = null; } }
public IChapter ChapterParseAction(HtmlNode element, IParseDetails <IChapter> parseDetails) { var uri = Parsing.CreateUriFromElementAttributeValue(element, parseDetails, Address); var chapter = new ChapterDistill(element.InnerText, uri, this.HostData, Log); ((Core.Helper.ChapterParseDetails)parseDetails).HostVariables.ToList().ForEach((pair) => { if (chapter.HostVariables.ContainsKey(pair.Key)) { chapter.HostVariables[pair.Key] = pair.Value; } else { chapter.HostVariables.Add(pair.Key, pair.Value); } }); return(chapter ?? null); }
public static T ParseSingleNodeContent <T>(HtmlNode element, IParseDetails <T> details) { T obj = default(T); if (details.ParseAction != null) { obj = details.ParseAction(element, details); if (obj == null) { throw new ArgumentNullException(); } } else { throw new ArgumentNullException(); } return(obj); }
public T GenericParseAction <T>(HtmlNode element, IParseDetails <T> details, IHostSection section, Func <string, T> postParse) { return(Parsing.GenericParseAction <T>(element, details, section, postParse, HostVariables)); }
public ParseDetailsDecorator(IParseDetails <T> parseDetails) { this._decoratedParseDetails = parseDetails; }
public VariableParseDetailsDecorator(IParseDetails <T> parseDetails) : base(parseDetails) { }
public T SectionGenericParseAction <T>(HtmlNode element, IParseDetails <T> details, string parseRegex, string parseReplace, Func <string, T> postParse) { return(Parsing.SectionGenericParseAction <T>(element, details, parseRegex, parseReplace, postParse, HostVariables)); }
public IChapter ChapterParseActionUriSupplied(Uri chapterUri, HtmlNode element, IParseDetails <IChapter> parseDetails, string html = null) { var chapterTitle = ParseChapterTitle(element, html); var chapter = new ChapterDistill(chapterTitle, chapterUri, this.HostData, Log); ((Core.Helper.ChapterParseDetails)parseDetails).HostVariables.ToList().ForEach((pair) => { if (chapter.HostVariables.ContainsKey(pair.Key)) { chapter.HostVariables[pair.Key] = pair.Value; } else { chapter.HostVariables.Add(pair.Key, pair.Value); } }); return(chapter ?? null); }
public static List <T> ParseContentFromNode <T>(HtmlNode element, IParseDetails <T> details) { var elements = element.SelectNodes(details.XPath); return(ParseContent(elements, details)); }
public static List <IChapter> ParseChapters(string html, IParseDetails <IChapter> details) { return(ParseContent <IChapter>(html, details)); }
public static List <Uri> ParseAddresses(string html, IParseDetails <Uri> details) { return(ParseContent <Uri>(html, details)); }
// TODO: Refactor into a more palatable method, please... public static T GenericParseAction <T>(HtmlNode element, IParseDetails <T> details, IHostSection section, Func <string, T> postParse, Dictionary <string, string> hostVariables) { return(SectionGenericParseAction(element, details, section.ParseRegex, section.ParseReplace, postParse, hostVariables)); }