Ejemplo n.º 1
0
        protected override TagRemover GetTagRemover(TextProcessor next, ITagFormatter formatter)
        {
            TagRemover result = new TagRemover(next, formatter)
            {
                Tags = new List <HtmlTag>(new HtmlTag[] {
                    new HtmlTag("<script", "</script>"),
                    new HtmlTag("<style", "</style>"),
                    new HtmlTag("<link", ""),
                    new HtmlTag("<path", "</path>"),
                    new HtmlTag("<meta", ""),
                    new HtmlTag("<iframe", "</iframe>"),
                    new HtmlTag("<svg", "</svg>"),
                    new HtmlTag("<sup", "</sup>"),
                    new HtmlTag("<input", ""),
                    new HtmlTag("<label", "</label>"),
                    new HtmlTag("<form", "</form>"),
                    new HtmlTag("<noscript", "</noscript>"),
                    new HtmlTag("<nav", "</nav>"),
                    new HtmlTag("<!DOCTYPE", ""),
                    new HtmlTag("<button", "</button>"),
                    new HtmlTag("<aside", "</aside>"),
                    new HtmlTag("<!--[if", "<![endif]-->"),
                    new HtmlTag("<!--", "")
                })
            };

            return(result);
        }
Ejemplo n.º 2
0
 public HtmlElement(string startTag /*Should not include closing >.*/, string endTag, string text, ITagFormatter formatter)
 {
     Text       = text;
     StartTag   = startTag;
     EndTag     = endTag;
     _formatter = formatter;
 }
        public string GetUrlToPdf(string url)
        {
            HtmlCleanerInjector injector = new HtmlCleanerInjector(new BaseInjectorConfig(), new WebCleanerConfigSerializer(_pdfController.Server));
            //  Creating cleaner instance based on URL.
            IHtmlCleaner processChain = injector.CreateHtmlCleaner(url);

            //  Performs request.
            string s = HtmlCleanerApp.MakeRequest(url);

            _ = processChain.Process(s);

            ITagFormatter formatter = processChain.GetFormatter();

            //  Finishes processing.
            formatter.CloseDocument();
            using (MemoryStream dataStream = formatter.GetOutputStream())
            {
                string pdfFileName = _pdfController.UrlToFileName(url);
                string pdfFilePath = _pdfController.GetContentPath(pdfFileName);

                if (dataStream != null)
                {
                    using (FileStream fileStream = System.IO.File.Create(pdfFilePath))
                    {
                        dataStream.Seek(0, SeekOrigin.Begin);
                        dataStream.CopyTo(fileStream);
                    }
                }

                return(_pdfController.GetContentUri(pdfFileName));
            }
        }
Ejemplo n.º 4
0
 public HtmlElement(string startTag /*Should not include closing >.*/, string endTag, string text, ITagFormatter formatter, Dictionary <string, string> attributes)
 {
     Text        = text;
     StartTag    = startTag;
     EndTag      = endTag;
     _formatter  = formatter;
     _attributes = attributes;
 }
Ejemplo n.º 5
0
 public ParagraphExtractor(TextProcessor next, ITagFormatter formatter) : base(next, formatter)
 {
     //  By default isn't used in processing chain.
     //  Can be enabled by configuration file, using
     //  should be consistent with using other parts.
     Skipped = true;
     //  Default paragraph tag.
     Tag = new HtmlTag("<p", "</p>");
 }
Ejemplo n.º 6
0
        protected override TagRemover GetTagRemover(TextProcessor next, ITagFormatter formatter)
        {
            TagRemover result = new TagRemover(next, formatter)
            {
                Tags = new List <HtmlTag>(new HtmlTag[] {
                    new HtmlTag("<script", "</script>"),
                    new HtmlTag("<style", "</style>"),
                    new HtmlTag("<link", ""),
                    new HtmlTag("<path", "</path>"),
                    new HtmlTag("<meta", ""),
                    new HtmlTag("<svg", "</svg>"),
                    new HtmlTag("<sup", "</sup>"),
                    new HtmlTag("<label", "</label>"),
                    new HtmlTag("<input", ""),
                    new HtmlTag("<img", ""),
                    new HtmlTag("<iframe", "</iframe>"),
                    new HtmlTag("<footer", "</footer>"),
                    new HtmlTag("<form", "</form>"),
                    new HtmlTag("<noscript", "</noscript>"),
                    new HtmlTag("<nav", "</nav>"),
                    new HtmlTag("<!DOCTYPE", ""),
                    //  Advertising block and internal divs.
                    //  Items should be in the order reverse
                    //  to the nesting of divs (best possible
                    //  option for this primitive parser).
                    new HtmlTag("<div id=\"atatags", "</div>"),
                    new HtmlTag("<div style=\"", "</div>"),
                    new HtmlTag("<div class=\"wpa-notice", "</div>"),
                    new HtmlTag("<div class=\"u", "</div>"),
                    new HtmlTag("<div class=\"wpa", "</div>"),
                    //  Sharing buttons (by groups of tags).
                    new HtmlTag("<div class=\"sd-content", "</div>"),
                    new HtmlTag("<div class=\"robots-nocontent", "</div>"),
                    new HtmlTag("<div class=\"sharedaddy", "</div>"),

                    new HtmlTag("<div class=\'likes-", "</div>"),
                    new HtmlTag("<div class=\'sharedaddy", "</div>"),

                    new HtmlTag("<div id=\'jp-relatedposts", "</div>"),
                    new HtmlTag("<div id=\"jp-post-flair", "</div>"),

                    new HtmlTag("<div class=\"wpcnt", "</div>"),
                    //  Other tags.
                    new HtmlTag("<button", "</button>"),
                    new HtmlTag("<br", ""),
                    new HtmlTag("<aside", "</aside>"),
                    //  Hyperlinks are removed.
                    new HtmlTag("<!--[if", "<![endif]-->"),
                    new HtmlTag("<!--", "")
                })
            };

            return(result);
        }
 public ConsoleAppService(
     IDateParser dateParser,
     IDaySummaryService daySummaryService,
     ITagFormatter tagFormatter,
     IAppOutput appOutput)
 {
     _dateParser        = dateParser ?? throw new ArgumentNullException(nameof(dateParser));
     _daySummaryService = daySummaryService ?? throw new ArgumentNullException(nameof(daySummaryService));
     _tagFormatter      = tagFormatter ?? throw new ArgumentNullException(nameof(tagFormatter));
     _appOutput         = appOutput ?? throw new ArgumentNullException(nameof(appOutput));
 }
Ejemplo n.º 8
0
 public UnityLogger(
     string owner,
     ITag[] tags,
     ITagFormatter tagFormatter,
     IDefaultTagProvider defaultTagProvider)
 {
     this.tags               = tags;
     this.owner              = owner;
     this.tagFormatter       = tagFormatter;
     this.defaultTagProvider = defaultTagProvider;
 }
Ejemplo n.º 9
0
        public IHtmlCleaner CreateHtmlCleaner(string url)
        {
            System.Collections.Generic.List <HtmlCleanerConfigItem> list = _config.GetCleanerList();
            Type formatterType = Type.GetType(_config.GetFormatterType());

            foreach (HtmlCleanerConfigItem item in list)
            {
                if (url.Contains(item.urlPrefix))
                {
                    Type          cleanerType = Type.GetType(item.htmlCleanerType);
                    ITagFormatter formatter   = Activator.CreateInstance(formatterType) as ITagFormatter;
                    IHtmlCleaner  cleaner     = Activator.CreateInstance(cleanerType, new object[] { _configSerializer }) as IHtmlCleaner;
                    cleaner.SetFormatter(formatter);
                    return(cleaner);
                }
            }
            //  Default HTML parser.
            return(new UniversalHtmlCleaner(_configSerializer));
        }
Ejemplo n.º 10
0
            /// <summary>
            /// Searches text for any of tags from the list and returns corresponding element.
            /// </summary>
            public static HtmlElement FindAny(List <HtmlTag> tags, string text, ITagFormatter formatter)
            {
                int bracketPos = 0;

                while (true)
                {
                    bracketPos = text.IndexOf("<", bracketPos, StringComparison.OrdinalIgnoreCase);
                    if (bracketPos != -1)
                    {
                        string subString = text.Substring(bracketPos);
                        //  Compares with tag signatures.
                        for (int i = 0; i < tags.Count; i++)
                        {
                            HtmlTag t = tags[i];
                            if (subString.StartsWith(t.StartTag))
                            {
                                //  Tag has been found in the list.
                                HtmlElement htmlElement = new HtmlElement(t.StartTag, t.EndTag, text, formatter);
                                //  Properly initializes internal state.
                                _ = htmlElement.FindNext();
                                //  Parses attributes.
                                foreach (string attributeName in t.AttributeNames)
                                {
                                    htmlElement.AddAttribute(attributeName, htmlElement.GetAttr(attributeName));
                                }

                                return(htmlElement);
                            }
                        }
                        bracketPos++;
                    }
                    else
                    {
                        break;
                    }
                }
                return(null);
            }
Ejemplo n.º 11
0
 public InnerTextProcessor(TextProcessor next, ITagFormatter formatter) : base(next, formatter)
 {
 }
Ejemplo n.º 12
0
 protected abstract InnerTextProcessor GetInnerTextProcessor(TextProcessor next, ITagFormatter formatter);
Ejemplo n.º 13
0
 public SpecialHtmlRemover(TextProcessor next, ITagFormatter formatter) : base(next, formatter)
 {
 }
Ejemplo n.º 14
0
 public void SetFormatter(ITagFormatter formatter)
 {
     _formatter = formatter;
 }
Ejemplo n.º 15
0
        protected override InnerTextProcessor GetInnerTextProcessor(TextProcessor next, ITagFormatter formatter)
        {
            InnerTextProcessor result = new InnerTextProcessor(next, formatter)
            {
                Tags = new List <HtmlTag>(new HtmlTag[] {
                    new HtmlTag("<ul", "</ul>"),
                    new HtmlTag("<title", "</title>"),
                    new HtmlTag("<strong", "</strong>"),
                    new HtmlTag("<span", "</span>"),
                    new HtmlTag("<small", "</small>"),
                    new HtmlTag("<pre", "</pre>"),
                    new HtmlTag("<p", "</p>"),
                    new HtmlTag("<main", "</main>"),
                    new HtmlTag("<li", "</li>"),
                    new HtmlTag("<html", "</html>"),
                    new HtmlTag("<header", "</header>"),
                    new HtmlTag("<head", "</head>"),
                    new HtmlTag("<h4", "</h4>"),
                    new HtmlTag("<h3", "</h3>"),
                    new HtmlTag("<h3", "</h3>"),
                    new HtmlTag("<h2", "</h2>"),
                    new HtmlTag("<h1", "</h1>"),
                    new HtmlTag("<footer", "</footer>"),
                    new HtmlTag("<em", "</em>"),
                    new HtmlTag("<div", "</div>"),
                    new HtmlTag("<code", "</code>"),
                    new HtmlTag("<body", "</body>"),
                    new HtmlTag("<article", "</article>")
                })
            };

            return(result);
        }
Ejemplo n.º 16
0
 /// <summary>
 /// Creates and initializes domain-specific instance of TagRemover.
 /// </summary>
 /// <param name="next">Next processing object in the chain.</param>
 /// <returns>Instance of TagRemover specific for the domain supported
 /// by inherited class.</returns>
 protected abstract TagRemover GetTagRemover(TextProcessor next, ITagFormatter formatter);
Ejemplo n.º 17
0
 public TextFormatter(TextProcessor next, ITagFormatter formatter) : base(next, formatter)
 {
 }
Ejemplo n.º 18
0
 public TextProcessor(TextProcessor next, ITagFormatter formatter)
 {
     Next       = next;
     _formatter = formatter;
 }
Ejemplo n.º 19
0
 protected override ParagraphExtractor GetParagraphExtractor(TextProcessor next, ITagFormatter formatter)
 {
     return(new ParagraphExtractor(next, formatter)
     {
         Skipped = false,
         Tag = new HtmlTag("<article", "</article>")
     });
 }
Ejemplo n.º 20
0
 /// <summary>
 /// Creates and initializes domain-specific instance of ParagraphExtractor.
 /// </summary>
 /// <param name="next">Next processing object in the chain.</param>
 /// <returns>Instance of ParagraphExtractor specific for the domain supported
 /// by inherited class.</returns>
 protected abstract ParagraphExtractor GetParagraphExtractor(TextProcessor next, ITagFormatter formatter);
Ejemplo n.º 21
0
        protected override InnerTextProcessor GetInnerTextProcessor(TextProcessor next, ITagFormatter formatter)
        {
            InnerTextProcessor result = new InnerTextProcessor(next, formatter)
            {
                Tags = new List <HtmlTag>(new HtmlTag[] {
                    new HtmlTag("<ul", "</ul>"),
                    new HtmlTag("<u", "</u>"),
                    //  Removing tables.
                    new HtmlTag("<td", "</td>"),
                    new HtmlTag("<tr", "</tr>"),
                    new HtmlTag("<tbody", "</tbody>"),
                    new HtmlTag("<table", "</table>"),
                    //  Other tags.
                    new HtmlTag("<time", "</time>"),
                    new HtmlTag("<title", "</title>"),
                    new HtmlTag("<strong", "</strong>"),
                    new HtmlTag("<span", "</span>"),
                    new HtmlTag("<small", "</small>"),
                    new HtmlTag("<pre", "</pre>"),
                    new HtmlTag("<p", "</p>"),
                    new HtmlTag("<main", "</main>"),
                    new HtmlTag("<li", "</li>"),
                    new HtmlTag("<html", "</html>"),
                    new HtmlTag("<header", "</header>"),
                    new HtmlTag("<head", "</head>"),
                    new HtmlTag("<h4", "</h4>"),
                    new HtmlTag("<h3", "</h3>"),
                    new HtmlTag("<h3", "</h3>"),
                    new HtmlTag("<h2", "</h2>"),
                    new HtmlTag("<h1", "</h1>"),
                    new HtmlTag("<footer", "</footer>"),
                    new HtmlTag("<em", "</em>"),
                    new HtmlTag("<div", "</div>"),
                    new HtmlTag("<code", "</code>"),
                    new HtmlTag("<body", "</body>"),
                    new HtmlTag("<blockquote", "</blockquote>"),
                    new HtmlTag("<a", "</a>", new string[] { "href" })
                })
            };

            return(result);
        }
Ejemplo n.º 22
0
 protected override ParagraphExtractor GetParagraphExtractor(TextProcessor next, ITagFormatter formatter)
 {
     return(new ParagraphExtractor(next, formatter));
 }