Exemplo n.º 1
0
        public static List <T> ParseContent <T>(HtmlNodeCollection elements, IParseDetails <T> details)
        {
            var results = new List <T>();

            if (elements != null && elements.Count > 0)
            {
                details.Logger.Debug("Selected node count: {0}", elements.Count);

                foreach (var element in elements)
                {
                    try
                    {
                        T obj = ParseSingleNodeContent(element, details);

                        results.Add(obj);
                    }
                    catch (Exception ex)
                    {
                        details.Logger.Error(ex.ToString());
                    }
                }
            }

            /// Return a list of the results with the duplicates culled.
            ///
            return(results.Distinct().ToList());
        }
Exemplo n.º 2
0
        public static Uri CreateUriFromElementAttributeValue <T>(HtmlNode element, IParseDetails <T> details, Uri host)
        {
            var value = element.GetAttributeValue(details.AttributeName, null);

            details.Logger.Debug("Creating validated URI from \"{0}\"", value);

            if (value.StartsWith("/") && value[1] != '/')
            {
                value = string.Concat(host.Scheme, "://", host.Host, value);
            }

            return(new DMR11.Core.Net.ValidatedUri(value));
        }
Exemplo n.º 3
0
        public static T SectionGenericParseAction <T>(HtmlNode element, IParseDetails <T> details, string parseRegex, string parseReplace, Func <string, T> postParse, Dictionary <string, string> hostVariables)
        {
            var input = GetElementValue(element, details.AttributeName);

            if (!string.IsNullOrWhiteSpace(input))
            {
                // Remove any trailing whitespace or newlines from the value.
                input = input.Trim();

                if (!string.IsNullOrWhiteSpace(parseRegex) &&
                    !string.IsNullOrWhiteSpace(parseReplace))
                {
                    var regex = new Regex(parseRegex, RegexOptions.Singleline | RegexOptions.Compiled | RegexOptions.IgnoreCase);
                    var match = Match.Empty;

                    if ((match = regex.Match(input)).Success)
                    {
                        // Register group values.
                        foreach (var group in regex.GetGroupNames())
                        {
                            // Todo: Use section name as well before overwriting the original value.
                            // > regex__pages_conflicting_name
                            // > regex__page_conflicting_name

                            var newKey   = string.Concat("regex__", group);
                            var newValue = match.Groups[group].Value;

                            if (hostVariables.ContainsKey(newKey))
                            {
                                hostVariables[newKey] = newValue;
                            }
                            else
                            {
                                hostVariables.Add(newKey, newValue);
                            }
                        }

                        var replace = EvaluateVariable(parseReplace, hostVariables);
                        return(postParse(replace));
                    }
                }
                else
                {
                    return(postParse(input));
                }
            }

            return(default(T));
        }
Exemplo n.º 4
0
        public string TitleParseAction(HtmlNode element, IParseDetails <string> parseDetails)
        {
            string title = string.Empty;

            // Todo: Replace with correct function call.
            if (string.Compare(parseDetails.AttributeName, "$(__inner_text)", true) == 0)
            {
                title = element.InnerText;
            }
            else
            {
                title = element.GetAttributeValue(parseDetails.AttributeName, null);
            }

            return(title);
        }
Exemplo n.º 5
0
        public static List <T> ParseContent <T>(string html, IParseDetails <T> details)
        {
            var doc = new HtmlDocument();

            try
            {
                doc.LoadHtml(html);

                var elements = doc.DocumentNode.SelectNodes(details.XPath);
                return(ParseContent(elements, details));
            }
            finally
            {
                doc = null;
            }
        }
Exemplo n.º 6
0
        public IChapter ChapterParseAction(HtmlNode element, IParseDetails <IChapter> parseDetails)
        {
            var uri     = Parsing.CreateUriFromElementAttributeValue(element, parseDetails, Address);
            var chapter = new ChapterDistill(element.InnerText, uri, this.HostData, Log);

            ((Core.Helper.ChapterParseDetails)parseDetails).HostVariables.ToList().ForEach((pair) =>
            {
                if (chapter.HostVariables.ContainsKey(pair.Key))
                {
                    chapter.HostVariables[pair.Key] = pair.Value;
                }
                else
                {
                    chapter.HostVariables.Add(pair.Key, pair.Value);
                }
            });

            return(chapter ?? null);
        }
Exemplo n.º 7
0
        public static T ParseSingleNodeContent <T>(HtmlNode element, IParseDetails <T> details)
        {
            T obj = default(T);

            if (details.ParseAction != null)
            {
                obj = details.ParseAction(element, details);

                if (obj == null)
                {
                    throw new ArgumentNullException();
                }
            }
            else
            {
                throw new ArgumentNullException();
            }

            return(obj);
        }
Exemplo n.º 8
0
 public T GenericParseAction <T>(HtmlNode element, IParseDetails <T> details, IHostSection section, Func <string, T> postParse)
 {
     return(Parsing.GenericParseAction <T>(element, details, section, postParse, HostVariables));
 }
Exemplo n.º 9
0
 public ParseDetailsDecorator(IParseDetails <T> parseDetails)
 {
     this._decoratedParseDetails = parseDetails;
 }
Exemplo n.º 10
0
 public VariableParseDetailsDecorator(IParseDetails <T> parseDetails) : base(parseDetails)
 {
 }
Exemplo n.º 11
0
 public T SectionGenericParseAction <T>(HtmlNode element, IParseDetails <T> details, string parseRegex, string parseReplace, Func <string, T> postParse)
 {
     return(Parsing.SectionGenericParseAction <T>(element, details, parseRegex, parseReplace, postParse, HostVariables));
 }
Exemplo n.º 12
0
        public IChapter ChapterParseActionUriSupplied(Uri chapterUri, HtmlNode element, IParseDetails <IChapter> parseDetails, string html = null)
        {
            var chapterTitle = ParseChapterTitle(element, html);

            var chapter = new ChapterDistill(chapterTitle, chapterUri, this.HostData, Log);

            ((Core.Helper.ChapterParseDetails)parseDetails).HostVariables.ToList().ForEach((pair) =>
            {
                if (chapter.HostVariables.ContainsKey(pair.Key))
                {
                    chapter.HostVariables[pair.Key] = pair.Value;
                }
                else
                {
                    chapter.HostVariables.Add(pair.Key, pair.Value);
                }
            });

            return(chapter ?? null);
        }
Exemplo n.º 13
0
        public static List <T> ParseContentFromNode <T>(HtmlNode element, IParseDetails <T> details)
        {
            var elements = element.SelectNodes(details.XPath);

            return(ParseContent(elements, details));
        }
Exemplo n.º 14
0
 public static List <IChapter> ParseChapters(string html, IParseDetails <IChapter> details)
 {
     return(ParseContent <IChapter>(html, details));
 }
Exemplo n.º 15
0
 public static List <Uri> ParseAddresses(string html, IParseDetails <Uri> details)
 {
     return(ParseContent <Uri>(html, details));
 }
Exemplo n.º 16
0
        // TODO: Refactor into a more palatable method, please...

        public static T GenericParseAction <T>(HtmlNode element, IParseDetails <T> details, IHostSection section, Func <string, T> postParse, Dictionary <string, string> hostVariables)
        {
            return(SectionGenericParseAction(element, details, section.ParseRegex, section.ParseReplace, postParse, hostVariables));
        }