public string GetUrlToPdf(string url) { HtmlCleanerInjector injector = new HtmlCleanerInjector(new BaseInjectorConfig(), new WebCleanerConfigSerializer(_pdfController.Server)); // Creating cleaner instance based on URL. IHtmlCleaner processChain = injector.CreateHtmlCleaner(url); // Performs request. string s = HtmlCleanerApp.MakeRequest(url); _ = processChain.Process(s); ITagFormatter formatter = processChain.GetFormatter(); // Finishes processing. formatter.CloseDocument(); using (MemoryStream dataStream = formatter.GetOutputStream()) { string pdfFileName = _pdfController.UrlToFileName(url); string pdfFilePath = _pdfController.GetContentPath(pdfFileName); if (dataStream != null) { using (FileStream fileStream = System.IO.File.Create(pdfFilePath)) { dataStream.Seek(0, SeekOrigin.Begin); dataStream.CopyTo(fileStream); } } return(_pdfController.GetContentUri(pdfFileName)); } }
public ArticleProvider(IHtmlCleaner cleaner, IHtmlLoader loader) { if (cleaner == null) { throw new ArgumentNullException("cleaner"); } if (loader == null) { throw new ArgumentNullException("loader"); } _cleaner = cleaner; _loader = loader; }
public WebSiteOperation(IHtmlCleaner htmlCleaner, IKeywordOperation keywordOperation) { _htmlCleaner = htmlCleaner; _keywordOperation = keywordOperation; WhiteList = new List <String> { "php", "xps", "aspx", "axd", "chm", "do", "jhtml", "jnlp", "json", "mht", "gg", "gsp", "adr", "css", "mvc", "pac", "url", "xul", "_eml", "!bt", "asp", "att", "cer", "cfm", "con", "htc", "htm", "html", "js", "jsf", "jsp", "mhtml", "nzb", "rss", "vbd", "web", "wsdl", "xfdl", "aex", "pem", "wrf", "xbel", "alx", "ap", "ascx", "asr", "dap", "dml", "dwt", "email", "mai", "phtml", "shtml", "wgt", "wml", "xhtml", "crl", "pando", "pfc", "qbo" }; }
public IHtmlCleaner CreateHtmlCleaner(string url) { System.Collections.Generic.List <HtmlCleanerConfigItem> list = _config.GetCleanerList(); Type formatterType = Type.GetType(_config.GetFormatterType()); foreach (HtmlCleanerConfigItem item in list) { if (url.Contains(item.urlPrefix)) { Type cleanerType = Type.GetType(item.htmlCleanerType); ITagFormatter formatter = Activator.CreateInstance(formatterType) as ITagFormatter; IHtmlCleaner cleaner = Activator.CreateInstance(cleanerType, new object[] { _configSerializer }) as IHtmlCleaner; cleaner.SetFormatter(formatter); return(cleaner); } } // Default HTML parser. return(new UniversalHtmlCleaner(_configSerializer)); }
public KeywordOperation(ITagAndPointDal tagAndPointDal, IHtmlCleaner htmlClearer, IWordToExcludeDal wordToExcludeDal) { _tagAndPointDal = tagAndPointDal; _htmlCleaner = htmlClearer; _wordToExcludeDal = wordToExcludeDal; }