public string GetUrlToPdf(string url)
        {
            HtmlCleanerInjector injector = new HtmlCleanerInjector(new BaseInjectorConfig(), new WebCleanerConfigSerializer(_pdfController.Server));
            //  Creating cleaner instance based on URL.
            IHtmlCleaner processChain = injector.CreateHtmlCleaner(url);

            //  Performs request.
            string s = HtmlCleanerApp.MakeRequest(url);

            _ = processChain.Process(s);

            ITagFormatter formatter = processChain.GetFormatter();

            //  Finishes processing.
            formatter.CloseDocument();
            using (MemoryStream dataStream = formatter.GetOutputStream())
            {
                string pdfFileName = _pdfController.UrlToFileName(url);
                string pdfFilePath = _pdfController.GetContentPath(pdfFileName);

                if (dataStream != null)
                {
                    using (FileStream fileStream = System.IO.File.Create(pdfFilePath))
                    {
                        dataStream.Seek(0, SeekOrigin.Begin);
                        dataStream.CopyTo(fileStream);
                    }
                }

                return(_pdfController.GetContentUri(pdfFileName));
            }
        }
Пример #2
0
        public ArticleProvider(IHtmlCleaner cleaner, IHtmlLoader loader)
        {
            if (cleaner == null)
            {
                throw new ArgumentNullException("cleaner");
            }
            if (loader == null)
            {
                throw new ArgumentNullException("loader");
            }

            _cleaner = cleaner;
            _loader  = loader;
        }
        public WebSiteOperation(IHtmlCleaner htmlCleaner, IKeywordOperation keywordOperation)
        {
            _htmlCleaner      = htmlCleaner;
            _keywordOperation = keywordOperation;

            WhiteList = new List <String> {
                "php", "xps", "aspx", "axd", "chm", "do", "jhtml",
                "jnlp", "json", "mht", "gg", "gsp", "adr", "css",
                "mvc", "pac", "url", "xul", "_eml", "!bt", "asp",
                "att", "cer", "cfm", "con", "htc", "htm", "html",
                "js", "jsf", "jsp", "mhtml", "nzb", "rss", "vbd",
                "web", "wsdl", "xfdl", "aex", "pem", "wrf", "xbel",
                "alx", "ap", "ascx", "asr", "dap", "dml", "dwt",
                "email", "mai", "phtml", "shtml", "wgt", "wml", "xhtml",
                "crl", "pando", "pfc", "qbo"
            };
        }
Пример #4
0
        public IHtmlCleaner CreateHtmlCleaner(string url)
        {
            System.Collections.Generic.List <HtmlCleanerConfigItem> list = _config.GetCleanerList();
            Type formatterType = Type.GetType(_config.GetFormatterType());

            foreach (HtmlCleanerConfigItem item in list)
            {
                if (url.Contains(item.urlPrefix))
                {
                    Type          cleanerType = Type.GetType(item.htmlCleanerType);
                    ITagFormatter formatter   = Activator.CreateInstance(formatterType) as ITagFormatter;
                    IHtmlCleaner  cleaner     = Activator.CreateInstance(cleanerType, new object[] { _configSerializer }) as IHtmlCleaner;
                    cleaner.SetFormatter(formatter);
                    return(cleaner);
                }
            }
            //  Default HTML parser.
            return(new UniversalHtmlCleaner(_configSerializer));
        }
 public KeywordOperation(ITagAndPointDal tagAndPointDal, IHtmlCleaner htmlClearer, IWordToExcludeDal wordToExcludeDal)
 {
     _tagAndPointDal   = tagAndPointDal;
     _htmlCleaner      = htmlClearer;
     _wordToExcludeDal = wordToExcludeDal;
 }