public void HtmlProcessor_CreateInlineEditingRegion_IsDummyContentwrappedIntoInlineEditingRegion() { //Arrange: create dummy data which will be set to the related attributes inside the region div tag TextWriter writer = new StringWriter(); string providerName = "dummyProvider"; string type = "dummyType"; var id = Guid.NewGuid(); string dummyContent = "Lorem ipsum dolor sit amet, consectetuer adipiscing elit"; var providerAttribute = "data-sf-provider"; var typeAttribute = "data-sf-type"; var idAttribute = "data-sf-id"; //Act: create the HTML region var htmlProcessor = new HtmlProcessor(); using (htmlProcessor.CreateInlineEditingRegion(writer, providerName, type, id)) { writer.WriteLine(dummyContent); } var outPut = writer.ToString(); //Assert: Parses the generated by the htmlTransformationProxy HTML checks if the HTML content is properly wrapped into a div tag //which has the required by the InlineEditing attributes //and these attributes has a proper data assigned using (HtmlParser parser = new HtmlParser(outPut)) { var chunk = parser.ParseNext(); Assert.IsNotNull(chunk); //checks if the HTML tag is of type div and if it has the required attributes Assert.IsTrue(chunk.TagName.Equals("div", StringComparison.InvariantCultureIgnoreCase)); Assert.IsTrue(chunk.HasAttribute(idAttribute)); Assert.IsTrue(chunk.HasAttribute(providerAttribute)); Assert.IsTrue(chunk.HasAttribute(typeAttribute)); //checks if the required attributes has proper values assigned to them Assert.AreEqual <string>(providerName, chunk.GetParamValue(providerAttribute)); Assert.AreEqual <string>(type, chunk.GetParamValue(typeAttribute)); Assert.AreEqual <string>(id.ToString(), chunk.GetParamValue(idAttribute)); string content = null; HtmlChunk nextChunk = null; while ((nextChunk = parser.ParseNext()) != null) { chunk = nextChunk; if (nextChunk.Type == HtmlChunkType.Text) { content = nextChunk.GenerateHtml(); } } //checks if the region inner content is what it should be Assert.IsTrue(content.StartsWith(dummyContent, StringComparison.InvariantCultureIgnoreCase)); //checks if the region is properly closed Assert.IsTrue(chunk.TagName.Equals("div", StringComparison.InvariantCultureIgnoreCase)); Assert.IsTrue(chunk.Type == HtmlChunkType.CloseTag); } }
public void GetStringContent_WithInlineEditingAttribute_TextElementProperlyCreated() { // Arrange var htmlProcessor = new HtmlProcessor(); var dummyWidgetModel = new DummyWidgetModel { EditableContent = this.dummyContent, NonEditableContent = this.dummyContent }; var fieldName = "DummyWidget"; var type = "LongText"; // Act string inlineeditingAwareContent = htmlProcessor.GetStringContent(dummyWidgetModel, "EditableContent"); // Assert using (var parser = new HtmlParser(inlineeditingAwareContent)) { HtmlChunk chunk = parser.ParseNext(); Assert.IsNotNull(chunk); // checks if the HTML tag is of type div and if it has the required attributes Assert.IsTrue(chunk.TagName.Equals(this.htmlWrapperTag, StringComparison.Ordinal), "There is no wrapper div appended to the property representation."); Assert.IsTrue(chunk.HasAttribute(this.fieldAttribute), "The field attribute is not appended correctly."); Assert.IsTrue(chunk.HasAttribute(this.fieldTypeAttribute), "The field type attribute is not appended correctly."); // checks if the required attributes has proper values assigned to them Assert.AreEqual(fieldName, chunk.GetParamValue(this.fieldAttribute), "The value of the field attribute is not correct."); Assert.AreEqual(type, chunk.GetParamValue(this.fieldTypeAttribute), "The value of the fieldType attribute is not correct."); this.AssertContentAndCloseTag(parser); } }
public void HtmlProcessor_ProcessFile_HtmlFile_SuccessedProcessedFile() { HtmlProcessor htmlProcessor = new HtmlProcessor(); string testFile = Path.Combine ( Directory.GetCurrentDirectory(), @"TestFiles\HtmlProcessor_ProcessFile_HtmlFile_SuccessedProcessedFile\HtmlProcessor_ProcessFile_HtmlFile_SuccessedProcessedFile.html" ); tempFile = Path.Combine ( Directory.GetCurrentDirectory(), @"TestFiles\HtmlProcessor_ProcessFile_HtmlFile_SuccessedProcessedFile\Expected_HtmlProcessor_ProcessFile_HtmlFile_SuccessedProcessedFile_Temp.html" ); string expectedFile = Path.Combine ( Directory.GetCurrentDirectory(), @"TestFiles\HtmlProcessor_ProcessFile_HtmlFile_SuccessedProcessedFile\Expected_HtmlProcessor_ProcessFile_HtmlFile_SuccessedProcessedFile.html" ); File.Copy(testFile, tempFile, true); htmlProcessor.ProcessFile(tempFile); var tempFileBytes = File.ReadAllBytes(tempFile); var expectedFileBytes = File.ReadAllBytes(expectedFile); Assert.AreEqual(expectedFileBytes, tempFileBytes); }
public override Task Process(string parameter, HttpListenerContext context) { var modifiedUrl = HtmlProcessor.SimplifyUrl(parameter); var resource = ServerContext.ResourceManager.GetResource(modifiedUrl); if (resource != null && resource.IsDownloaded) { if (resource.ContentType != null && resource.ContentType.StartsWith("text/html")) { using var stream = resource.Get(); var pm = new PageModifier(stream, modifiedUrl); pm.Process(); ResultStream = pm.GetResult(); } else { ResultStream = resource.Get(); } ContentType = resource.ContentType; } else { StatusCode = 404; } return(Task.CompletedTask); }
/// <summary> /// HTML helper which adds the meta data required by InlineEditing. /// </summary> /// <param name="helper">The helper.</param> /// <param name="model">The object which contains the property.</param> /// <param name="propName">Name of the property.</param> /// <returns></returns> public static IHtmlString TextField(this HtmlHelper helper, object model, string propName) { var htmlProcessor = new HtmlProcessor(); var htmlString = htmlProcessor.GetStringContent(model, propName); return(new System.Web.Mvc.MvcHtmlString(htmlString)); }
public void Setup() { _htmlProcessor = new HtmlProcessor { AssetList = new List <AssetModel>(), BlacklistedExtensions = new List <string> { ".JPG" } }; }
public DraftPreviewQueryTests() { _dateProvider = Substitute.For <IDateProvider>(); _htmlProcessor = new HtmlProcessor(Substitute.For <ICodeFormatter>(), Substitute.For <IImageProcessor>()); var mapper = new MapperConfiguration(cfg => cfg.AddProfile <MappingProfile>()).CreateMapper(); _handler = new Handler(_dateProvider, _htmlProcessor, mapper); }
public void TestGetTextOutsideTag() { Assert.AreEqual("text1text2", HtmlProcessor.GetOutsideTagContent("text1 <a href='http://www.ya.ru'>yandex</a> text2", "a")); Assert.AreEqual("text1text2text3", HtmlProcessor.GetOutsideTagContent("text1 <a href='http://www.ya.ru'>yandex</a> text2 <a href='http://www.google.ru'>google</a> text3", "a")); }
public void TestGetTagContent() { Assert.AreEqual("yandex'\"", HtmlProcessor.GetTagContent("text <a href='http://www.ya.ru'>yandex'\"</a> text", "a")); Assert.AreEqual("yandexgoogle", HtmlProcessor.GetTagContent("text <a href='http://www.ya.ru'>yandex</a> text <a href='http://www.google.ru'>google</a>", "a")); }
public MainWindow() { InitializeComponent(); var htmlProcessor = new HtmlProcessor(); var browserViewModel = new BrowserViewModel(htmlProcessor); rootLayout.DataContext = browserViewModel; }
public void SgmlTest() { HtmlProcessor filter = SgmlSetup(); SgmlTest1(filter, "jessica-alba", 182); SgmlTest1(filter, "bruce-willis", 183); Console.WriteLine("Complete"); }
/// <summary> /// HTML helper which adds an InlineEditing region. This should be added once at the top of the page, and the whole region will support InlineEditing. /// </summary> /// <param name="htmlHelper">The HTML helper.</param> /// <param name="providerName">Name of the provider.</param> /// <param name="type">The type.</param> /// <param name="id">The identifier.</param> /// <returns></returns> public static HtmlRegion InlineEditingRegion(this HtmlHelper htmlHelper, string providerName, string type, Guid id) { var htmlProcessor = new HtmlProcessor(); return(htmlProcessor.CreateInlineEditingRegion(htmlHelper.ViewContext.Writer, providerName, type, id)); }
public static string RewriteLinksInHtmlForLocalProxy(Uri baseUri, string html) { var htmlProcessor = new HtmlProcessor(html, webServer); htmlProcessor.RedirectLinks(baseUri); var content = htmlProcessor.GetContent(); return(content); }
async void GenerateOutput(Object obj) { CmdletObject cmd = Tab.EditorContext.CurrentCmdlet; ModuleObject module = Tab.Module; if (cmd == null) { return; } BusyControlVisible = Visibility.Visible; RtbVisible = Visibility.Collapsed; WebBrowserVisible = Visibility.Collapsed; if (HtmlChecked) { HtmlText = await HtmlProcessor.GenerateHtmlView(cmd, module); HtmlText = String.Format(Properties.Resources.HtmlTemplate, cmd.Name, HtmlText, cmd.ExtraHeader, cmd.ExtraFooter); BusyControlVisible = Visibility.Collapsed; RtbVisible = Visibility.Collapsed; WebBrowserVisible = Visibility.Visible; return; } IEnumerable <XmlToken> data = new List <XmlToken>(); if (XmlChecked) { if (module.UpgradeRequired) { Utils.MsgBox("Warning", "The module is offline and requires upgrade. Upgrade the project to allow XML view.", MessageBoxButton.OK, MessageBoxImage.Warning); BusyControlVisible = Visibility.Collapsed; return; } List <CmdletObject> cmdlets = new List <CmdletObject> { cmd }; StringBuilder SB = new StringBuilder(); await XmlProcessor.XmlGenerateHelp(SB, cmdlets, null, module.IsOffline); data = XmlTokenizer.LoopTokenize(SB.ToString()); } else if (HtmlSourceChecked) { data = await HtmlProcessor.GenerateHtmlSourceHelp(cmd, module); } Paragraph para = new Paragraph(); para.Inlines.AddRange(ColorizeSource(data)); Document = new FlowDocument(); Document.Blocks.Add(para); BusyControlVisible = Visibility.Collapsed; WebBrowserVisible = Visibility.Collapsed; RtbVisible = Visibility.Visible; }
public List <Transfer> GetTransfers() { var transfers = new List <Transfer>(); ServicePointManager.Expect100Continue = true; ServicePointManager.DefaultConnectionLimit = 9999; ServicePointManager.SecurityProtocol = SecurityProtocolType.Tls12; ServicePointManager.ServerCertificateValidationCallback = delegate { return(true); }; HttpWebRequest req = (HttpWebRequest)HttpWebRequest.Create("https://etherscan.io/token/generic-tokentxns2?contractAddress=0x46b9ad944d1059450da1163511069c718f699d31&mode=&m=normal&p=1"); req.Headers[HttpRequestHeader.AcceptEncoding] = "gzip, deflate"; req.Headers.Add("Accept-Language", "en-US"); req.Headers.Add("Cache-Control", "max-age=0"); req.Headers.Add("UserAgent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.140 Safari/537.36 Edge/18.17763"); //req.Headers.Add("Cookie", "ASP.NET_SessionId=4w1cceuu2qvume0izkluy4ue; __cfduid=d8ba8b8eefe8c4d571612bef9cbf76bc01567768581; __cflb=686120027"); req.ContentType = "text/html; charset=utf-8"; req.Referer = "https://etherscan.io/token/0x46b9ad944d1059450da1163511069c718f699d31#balances"; req.Accept = "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3"; req.Host = "etherscan.io"; req.Method = "GET"; req.Timeout = 10000; using (var resp = req.GetResponse()) { using (var str = resp.GetResponseStream()) using (var gsr = new GZipStream(str, CompressionMode.Decompress)) using (var sr = new StreamReader(gsr)) { string responseString = sr.ReadToEnd(); HtmlProcessor proc = new HtmlProcessor(responseString); DataTable table = proc.HtmlTables[0]; foreach (DataRow row in table.Rows) { if (row.ItemArray[0].ToString() == "Txn Hash") { continue; } transfers.Add(new Transfer() { Hash = row.ItemArray[0].ToString().Replace("/tx", "https://etherscan.io/tx").Replace("_parent", "_blank"), Age = row.ItemArray[1].ToString(), From = row.ItemArray[2].ToString().Replace("0x46b9ad944d1059450da1163511069c718f699d31", "https://etherscan.io/token/0x46b9ad944d1059450da1163511069c718f699d31").Replace("_parent", "_blank"), To = row.ItemArray[4].ToString().Replace("0x46b9ad944d1059450da1163511069c718f699d31", "https://etherscan.io/token/0x46b9ad944d1059450da1163511069c718f699d31").Replace("_parent", "_blank"), Quantity = row.ItemArray[5].ToString() }); } } } return(transfers); }
public void Test_HtmlProcessor() { string html = @"<h3>Добре дошли!</h3><p>Добре дошли в системата за електронни услуги на библиотеката на Нов български университет!</p><p>Информация за видовете услуги може да намерите в <a target=""_blank"" href=""http://nbu.bg/index.php?l=116"">сайта на библиотеката</a>.</p><p>За допълнителна информация:</p><ul><li>имейл: [email protected]</li><li>тел.: 02/8110296</li><li>скайп: NBU_Biblioteka</li></ul>Намерете ни в <a href=""https://google.com"">Google</a> сега!"; string htmlWithoutAnchors = @"<h3>Добре дошли!</h3><p>Добре дошли в системата за електронни услуги на библиотеката на Нов български университет!</p><p>Информация за видовете услуги може да намерите</p>"; string htmlWithEveryAnchor = "<p>Go to <a target=\"_self\" href=\"http://google.com\">google</a></p>are<a href=\"https://google.com\">Google</a>are<a target=\"_blank\" href=\"https://google.com\">Google</a>are<a target=\"_self\" href=\"https://google.com\">Google</a>are<a target=\"_parent\" href=\"https://google.com\">Google</a>are<a target=\"_top\" href=\"https://google.com\">Google</a>"; string result = HtmlProcessor.ProcessEncodedHtml(System.Web.HttpUtility.HtmlEncode(html)).Replace(" ", " "); Assert.AreEqual(html, result); Assert.AreEqual(htmlWithoutAnchors, HtmlProcessor.ProcessEncodedHtml(System.Web.HttpUtility.HtmlEncode(htmlWithoutAnchors)).Replace(" ", " ")); Assert.AreEqual(htmlWithEveryAnchor, HtmlProcessor.ProcessEncodedHtml(System.Web.HttpUtility.HtmlEncode(htmlWithEveryAnchor)).Replace(" ", " ")); }
public BlogViewModel() { ProjectSettings = new ProjectSettings(); Paging = new PaginationSettings(); Categories = new Dictionary <string, int>(); Archives = new Dictionary <string, int>(); filter = new HtmlProcessor(); cryptoHelper = new CryptoHelper(); EditorSettings = new EditorModel(); BlogRoutes = new DefaultBlogRoutes(); }
public void NoChangesTest(string resourceName) { var input = GetResourceContent(resourceName); var stringWriter = new StringWriter(); var processor = new HtmlProcessor(stringWriter, new TagHelperContainer()); processor.Write(input); var output = stringWriter.GetStringBuilder().ToString(); output.Should().Be(input); }
public BrowserViewModel(HtmlProcessor htmlProcessor) { if (htmlProcessor == null) { throw new ArgumentNullException(nameof(htmlProcessor)); } _goCommand = new RelayCommand(OnGoCommand, CanExecuteGoCommand); _stopCommand = new RelayCommand(OnStopCommand, CanExecuteStopCommand); _htmlProcessor = htmlProcessor; _url = "http://github.com"; }
public void TestGetTagAttributeValues() { Assert.AreEqual(@"http://www.ya.ru", HtmlProcessor.GetTagAttributeValues("text <a href='http://www.ya.ru'>yandex</a> text", "a", "href")[0]); Assert.AreEqual(@"http://www.ya.ru", HtmlProcessor.GetTagAttributeValues(@"text <a href = ""http://www.ya.ru' /> text", "a", "href")[0]); string html = @"text <a href='http://www.ya1.ru'>yandex</a> text <a href = 'http://www.ya2.ru' /> text"; Assert.AreEqual(@"http://www.ya1.ru", HtmlProcessor.GetTagAttributeValues(html, "a", "href")[0]); Assert.AreEqual(@"http://www.ya2.ru", HtmlProcessor.GetTagAttributeValues(html, "a", "href")[1]); }
public void HtmlProcessor_GetText_TextElelementProperlyCreatedForInlineEditng() { string dummyContent = "Lorem ipsum dolor sit amet, consectetuer adipiscing elit"; var htmlProcessor = new HtmlProcessor(); var dummyWidgetModel = new DummyWidgetModel { EditableContent = dummyContent, NonEditableContent = dummyContent }; string fieldName = "DummyWidget"; string type = "LongText"; var fieldAttribute = "data-sf-field"; var typeAttribute = "data-sf-ftype"; var inlineeditingAwareContent = htmlProcessor.GetStringContent(dummyWidgetModel, "EditableContent"); var nonInlineeditingAwareContent = htmlProcessor.GetStringContent(dummyWidgetModel, "NonEditableContent"); Assert.AreEqual <string>(dummyContent, nonInlineeditingAwareContent); using (HtmlParser parser = new HtmlParser(inlineeditingAwareContent)) { var chunk = parser.ParseNext(); Assert.IsNotNull(chunk); //checks if the HTML tag is of type div and if it has the required attributes Assert.IsTrue(chunk.TagName.Equals("div", StringComparison.InvariantCultureIgnoreCase)); Assert.IsTrue(chunk.HasAttribute(fieldAttribute)); Assert.IsTrue(chunk.HasAttribute(typeAttribute)); //checks if the required attributes has proper values assigned to them Assert.AreEqual <string>(fieldName, chunk.GetParamValue(fieldAttribute)); Assert.AreEqual <string>(type, chunk.GetParamValue(typeAttribute)); string content = null; HtmlChunk nextChunk = null; while ((nextChunk = parser.ParseNext()) != null) { chunk = nextChunk; if (nextChunk.Type == HtmlChunkType.Text) { content = nextChunk.GenerateHtml(); } } //checks if the region inner content is what it should be Assert.IsTrue(content.StartsWith(dummyContent, StringComparison.InvariantCultureIgnoreCase)); //checks if the region is properly closed Assert.IsTrue(chunk.TagName.Equals("div", StringComparison.InvariantCultureIgnoreCase)); Assert.IsTrue(chunk.Type == HtmlChunkType.CloseTag); } }
public void TransformTest(String input, string expected) { // arrange var writer = new StringWriter(); var tagHelperProvider = new TagHelperContainer(); tagHelperProvider.Register<TagC>("c"); tagHelperProvider.Register<TagWithNs>("xn:div"); var mng = new HtmlProcessor(writer, tagHelperProvider); // act foreach(var ch in input) mng.Write(ch); // assert Assert.AreEqual(expected, writer.GetStringBuilder().ToString()); }
public void ActionLinkTest(String input, string expected) { // arrange var writer = new StringWriter(); var tagHelperProvider = new TagHelperContainer(); tagHelperProvider.Register<TestTagHelper>("foo"); var mng = new HtmlProcessor(writer, tagHelperProvider); var bytes = writer.Encoding.GetBytes(input); // act mng.Write(bytes, 0, bytes.Length); // assert Assert.AreEqual(expected, writer.GetStringBuilder().ToString()); }
private async Task <string> Publish(string html) { _codeFormatter = Substitute.For <ICodeFormatter>(); _codeFormatter.FormatAsync(Arg.Any <string>(), Arg.Any <string>()).Returns(x => Task.FromResult((string)x[1])); _imageProcessor = Substitute.For <IImageProcessor>(); _imageProcessor.Minimize(Arg.Any <string>()).Returns("minImage"); _processor = new HtmlProcessor(_codeFormatter, _imageProcessor); var renderer = new ImageRenderer("the-post"); var result = renderer.Render(html); return(await _processor.ProcessAsync(result.Html)); }
public void GetStringContent_WithoutInlineEditingAttribute_PreservesContent() { // Arrange var htmlProcessor = new HtmlProcessor(); var dummyWidgetModel = new DummyWidgetModel { EditableContent = this.dummyContent, NonEditableContent = this.dummyContent }; // Act var nonInlineeditingAwareContent = htmlProcessor.GetStringContent(dummyWidgetModel, "NonEditableContent"); // Assert Assert.AreEqual(this.dummyContent, nonInlineeditingAwareContent, "The content is not preserved correctly."); }
public ActionResult ReadUrl(Guid id) { var _model = _urlManager.GetById(id); _model.LoadFull(); HtmlProcessor processor = new HtmlProcessor(_model.HtmlContent); string content = processor.PlanText; if (Request.IsAjaxRequest()) { return(PartialView("Messages/_HtmlString", content)); } return(View("Messages/_HtmlString", content)); }
public RssChannelProvider( IProjectService projectService, IBlogService blogService, IHttpContextAccessor contextAccessor, IUrlHelperFactory urlHelperFactory, IActionContextAccessor actionContextAccesor, HtmlProcessor htmlProcessor) { this.projectService = projectService; this.blogService = blogService; this.contextAccessor = contextAccessor; this.urlHelperFactory = urlHelperFactory; this.actionContextAccesor = actionContextAccesor; this.htmlProcessor = htmlProcessor; }
public void TestStripHtml() { Assert.AreEqual(" text yandex text ", HtmlProcessor.StripHtml(@"<script language='javascript'>js</script><style type=""text/css"">css</style>text <a href='http://www.ya.ru'>yandex</a> text <!-- comments -->")); string yandex = @"<head><title>Яндекс</title><link rel=""shortcut icon"" href=""http://img.yandex.net/i/favicon.ico""><link rel=""alternate"" type=""application/rss+xml"" title=""Новости Яндекса"" href=""http://company.yandex.ru/news/news.rss""><link rel=""alternate"" type=""application/rss+xml"" title=""Блог Яндекса"" href=""http://company.yandex.ru/blog/index.rss""><link rel=""search"" href=""http://www.yandex.ru/opensearch.xml"" title=""Яндекс"" type=""application/opensearchdescription+xml""><style type=""text/css"">*{font-size:100%;margin:0}body,table,th,td,ul,ol,li{padding:0;border-collapse:collapse;list-style:none;vertical-align:top}h1,b{font-weight:400}input{vertical-align:middle}body{font:.8em Arial,sans-serif;background:#fff url(http://www.tns-counter.ru/V13a****yandex_ru/ru/CP1251/tmsec=yandex_main/0)}h2 a:link,#head .m a,#news h2 a:visited,#body h2 a:visited,#head a,#tabs a,#mail p a,#right h2 a:link,#right h2 a:visited,#foot .kbd{color:#000}a:link,#adv h2 a:link{color:#1A3DC1}a{text-decoration:underline}a:hover{color:#f00!important}img{border:0}#sample{cursor:pointer;text-decoration:none;border-bottom:1px dashed #000}i{position:absolute;width:16px;height:16px;margin-left:-19px;background:url(http://img.yandex.net/i/icons.png) no-repeat}ol li{position:relative;padding-bottom:.3em;line-height:1.2em}ol u{position:absolute;z-index:1;left:-1.4em;text-decoration:none}#head{width:100%;margin-bottom:10px;background:#e8e9ec}#head td{font:85% Verdana,sans-serif;padding:5px 10px 7px 0}#head .s{width:18%;padding-left:27px}#head .s a{float:left;color:#ee7b00}#head .s i{background-position:-255px}#head .h{width:34%}#head .h a{color:#808080}#head .m{width:1px}#head .u{text-align:right}#head .user,#head .user *{font-weight:bold;text-decoration:underline}#head .user b{color:#f00}#head .exit{color:#E03A3A}#neck{width:100%}#neck th{width:18%}#neck .news td{vertical-align:bottom}#news{width:50%}#news h2,#news ul,#news ul li,#news ul a,#news .p{float:left}#news h2{padding-bottom:5px}#news ul{padding-left:1em}#news ul li{margin:0 .5em 0 -.5em;padding:0 .5em 4px}#news.all li.all,#news.reg li.reg{background:#fff68d url(http://img.yandex.net/i/arr-news-chooser.gif) no-repeat 50% 100%}#js #news ul a{text-decoration:none;color:#000;border-bottom:1px dashed #000}#js #news ul a:hover{border-bottom-color:#f00}#news.all div.reg,#news.reg div.all{display:none}#js #news.all li.all a,#js #news.reg li.reg a{cursor:default;color:#ee7b00!important;border:0}#news .p{margin-left:12px;padding-left:1.5em;color:#FF5223}#news .p i{background-position:0}#news ol{clear:left;margin-right:15px}#news .hot u,#news .hot strong span{text-decoration:none;color:#ee7b00}#news .hot strong,#news .hot b{font-weight:normal;position:absolute;top:0;right:100%;margin-right:.2em;padding:0 2.55em .15em .5em;background:#fff68d url(http://img.yandex.net/i/arr.png) no-repeat 100% 50%;text-decoration:underline;color:#fff68d}#news .hot b{z-index:2;left:-3.2em;height:1.2em;background:none}#adv{width:32%}#adv table{width:80%;margin-bottom:.2em}#neck #adv td{vertical-align:middle}#adv img{margin-right:7px}#adv .c{width:100%}#adv h2{font:140% Arial,sans-serif;margin:0 0 .25em 0}#adv div{font-size:0;width:160px}h1 img{font:3.9em serif;margin-top:2px}#form{margin:12px 9% 12px 0;background:#fc0}#form .arr{padding:6px 26px 6px 8px;background:url(http://img.yandex.net/i/arr.png) no-repeat 100% 50%}[id]#form table{width:100%}#form th{width:100%;height:33px;padding-right:5px}#form th input{width:100%}#form .tune{padding-right:5px;line-height:18px}#form .ua{position:relative;margin:-9px 0 3px}#form .l{float:left;padding-right:1em}#form .r{float:right}#form .r a:link{color:#666}[id]#tabs{width:100%}#tabs th,#tabs td{width:auto;height:auto;padding:5px 8px 6px}#tabs td{background:#fff}#tabs .all{width:100%;padding-left:1.5em}#tabs .all a{color:#1a3dc1} #body{position:relative;width:100%;margin-top:-4px}#body .l,#body .r{float:left}#body:lang(ru) .l,#left:lang(ru) .r{overflow:hidden}#body .l{margin-right:5%}#body span{white-space:nowrap}em,#catalog strong a{font:100% Arial,sans-serif}#right h2{margin:1.2em 0 .1em}.info a:link,#adresa a:link,#rates a:link{color:#37496d}#banner{margin:6px 0 11px}#banner div{margin-bottom:-19px}#foot{font-size:85%;clear:left;margin:1.7em 0 0 0;padding-top:10px;border-top:1px solid #C5C8D0}#foot li{position:absolute;overflow:hidden;padding:0 0 1.6em}#foot .mob:link{color:#393}#foot .d{left:2%;width:31%}#foot .d img{float:left;margin:-1.05em 1.1em 0 0}#foot .d div{margin-left:90px;padding-left:1.1em}#foot .i{left:35%;width:15%}#foot .i div{margin:.15em 0 1em}#foot .s{left:52%;width:26%}#foot .c{left:80%;overflow:visible;width:18%}#foot i{background-position:-16px}#foot .c div{margin:.15em 0} #region{float:left;width:100%}h2#area{font:140% Arial,sans-serif;clear:left;margin:0}#weather{margin:.3em 0 .1em}#weather *{display:inline}#weather dl{margin-left:.5em}#weather img,#weather b{position:relative;top:8px}#weather b{font-size:140%;vertical-align:top}#weather dd{margin-right:.6em}.l #weather h2{display:block;margin:.4em 0 .55em}.l #weather dl{margin:0}.info{overflow:hidden}#right .info h2 em a{color:#393}.info ul{color:#666}#right .info li{margin-top:.4em}.info li a,.info li span{margin-right:.3em;white-space:normal}.info i{margin-left:-16px;background-image:url(http://img.yandex.net/i/i-tick.gif)}#tv ul{padding-left:3em}#tv ul em{position:absolute;margin-left:-3em}#tv dt{margin:.6em 0 -.3em}#traffic h2{margin:.9em 0 .55em}#traffic i{width:21px;height:18px;margin:.3em 0 0 2px}#traffic.rd i{background-position:-192px}#traffic.yw i{background-position:-213px}#traffic.gn i{background-position:-234px}#traffic b{font-size:140%;position:relative;left:28px}#traffic.rd em a{color:#ea0503}#traffic.yw em a{color:#c90}#traffic.gn em a{color:#067e06}#adresa{color:#666}#adresa div{margin-top:.3em}#rates{float:left;clear:left}#rates .r{clear:none}#rates h2{margin:1.5em 0 .3em}#rates th,#rates td{font-weight:400;width:auto;padding:0 .4em .2em 0;text-align:left}#rates td{text-align:right;color:#666}#rates strong{color:#000}#rates a{white-space:nowrap}#left{float:left;clear:left;width:44%;margin:.35em 1% 0 0}.wide #left{width:56.5%}#left .l{width:45%;margin-right:0}#left .r{width:50%;margin-bottom:1.5em}#right{float:left;width:52%}.wide #right{width:39%}#right .l{width:50%}#right .r{width:45%}#mail,#fotki{width:178px;margin:0 auto 1.5em;padding:0 20px}[id^=mail]#mail,[id^=fotki]#fotki{width:138px}#mail{margin-top:6px}#mail form{padding-top:.85em;background:#d4dff3 url(http://img.yandex.net/i/icons.png) no-repeat -272px 0}#mail .f,#mail p{font:85% Verdana,sans-serif;background:#d4dff3}#mail.form .f{padding:0 1em}#mail .f div{margin:.5em 0}#mail .i{position:relative;height:3.2em}#js #mail .i{height:2.1em}#mail .i label,#mail .i input{position:absolute;left:0;width:116px;padding:.2em 0}#mail .i input{font:100% Verdana,sans-serif;margin-top:1.6em;padding:.1em 0;text-align:center}#js #mail .i input{margin:0}#js #mail .i label{z-index:2;cursor:text;color:#999}#js #mail .t{margin-top:-.3em}#mail.form .f a:link{color:#666}#mail ul{padding:.1em .5em 1em}#mail li{margin:.6em 0 0 19px;text-align:left;line-height:1.2em}#mail ul a:link{color:#1f3250}#mail .user{font:120%/1.15em Arial,sans-serif;margin-left:0}#mail .user b{height:1%;margin-left:-.3em}#mail .none{color:#8396b6}#mail .mail i{background-position:-32px}#mail .count{margin-top:.3em}#mail .none i{background-position:-48px}#mail .post i{background-position:-64px}#mail p{margin:1em 0 0;padding:.3em 0 .55em;background:#fff}#fotki h2{position:relative;left:9px;display:table;margin:0 auto}#fotki i{background-position:-80px}#fotki table,#fotki div{margin:.3em auto 0;background:url(http://img.yandex.net/i/bg-fotki.png)}#fotki div{display:inline;float:left;margin:0 0 -1px 5px;padding-top:9px;background-position:100% 0}#fotki div div{margin:0;padding:0 5px 0 0;background-position:100% 100%}#fotki div div div{position:relative;left:-5px;padding:0;background-position:0 100%}#fotki div a{position:relative;top:-6px;left:5px;display:block}#fotki .k{font-weight:400}#fotki img{border:6px solid #fff}.list{position:relative;left:-1.4em}.list h2,.list ul,.list ol{padding-left:1.4em}#catalog{margin:0 0 1.5em}#catalog h2,#catalog li{margin:0 1em .4em 0}#catalog b{height:1%;margin-left:-.3em}#catalog div a{font-size:85%;color:#393}#catalog div i{background:url(http://img.yandex.net/i/i-tick.gif) no-repeat 100% 0}#blogs h2,#blogs ol{margin-bottom:.4em}#services div{margin:0 0 .7em 21px;color:#666}#services i{margin-left:-21px}#market i{background-position:-96px}#moikrug i{background-position:-144px}#games i{background-position:-112px}#cards i{background-position:-128px}#narod i{background-position:-160px}#money i{background-position:-176px}#direct{margin-top:3.2em!important}#direct i{background-position:-411px}#partners i{background-position:-427px} .no-bar #bar{display:none}#bar{font-size:.89em;margin:0 0 1px;line-height:1.5em;white-space:nowrap;background:#c8e4f4 no-repeat}#bar div{min-height:18px;padding-left:60px}#bar.ie6{background:#ffb700 url(http://img.yandex.net/i/bar-ie6-bg.gif) repeat-x}#bar.ff div{background:url(http://img.yandex.net/i/bar-ff.png) no-repeat}#bar.ie6 div{padding-left:90px;background:url(http://img.yandex.net/i/bar-ie6.png) no-repeat}#bar.ie div{background:url(http://img.yandex.net/i/bar-ie.png) no-repeat}#bar a{margin:0 .7em;cursor:pointer}#bar.ie6 a{color:#fff}#bar .x{float:right;margin:0 10px;padding-left:10px;color:#666;background:url(http://img.yandex.net/i/bar-x.gif) no-repeat 0 65%}#bar.ie6 .x{color:#fff82e;background-image:url(http://img.yandex.net/i/bar-ie6-x.gif)} </style><!--[if IE]><style>body{font-size:80%}a i,#news .hot a,#sample{cursor:hand}ol u{top:.1em}#head .s a{float:none}#news .hot b{backgr\ound:red;filter:progid:DXImageTransform.Microsoft.Alpha(opacity=0)}#news ul li{display:inline}#news .hot strong,#news .hot b{padding-bottom:.2em}#adv img{display:block}#form div{height:1%} #body .l,#left .r{position:relative;overflow:hidden}#body .r,#right{clear:right}#region,#mail form,#mail.mail p,#foot{float:none;height:1%}#js #mail .t{margin-top:-.7em}#mail .user{margin-left:1px}#catalog i{margin-top:1px}#bar div{height:18px}#bar a{cursor:hand} </style><![endif]--><script type=""text/javascript"">g_sid=207731200829025087149;if(self.parent.frames.length!=0){self.parent.location=document.location}function p(p){new Image().src=location.protocol+'//clck.yandex.ru/click/dtype=clck/lid='+p+'/sid='+g_sid+'/*'}var g_tabs;function tabs(shadowTabs,p,c1,c2){if(!g_tabs&&document.getElementById&&(typeof(encodeURIComponent)=='function')){g_tabs=document.getElementById(""tabs"");if(g_tabs){var input=document.forms[0].getElementsByTagName(""input"")[0];g_tabs=g_tabs.getElementsByTagName(""a"");var t=input.value.replace(/^\s+|\s+$/g,"");if(t!=''&&(shadowTabs!=0)){var img=new Image();img.onload=function(){for(var i=1,p=1;i<7;p*=2,i++){if(!(img.height&p)){g_tabs[i].style.color=""#999""}}};img.src=""http://www.yandex.ru/yandsearch?rpt=tabsonly&img=1&text=""+encodeURIComponent(t)}setInterval(function(){var t=input.value.replace(/^\s+|\s+$/g,"");for(var i=0,l=g_tabs.length-1;i<l;i++){var url="";var a=g_tabs[i];var counter=c1;if(t!=""){var h=a.host.split(""."")[0];if(h==""www""){url=""yandsearch?text=""}else if(h==""market""){url=""search.xml?clid=505&cvredirect=1&text="";}else if(h==""news""){url=""yandsearch?rpt=nnews&grhow=clutop&text=""}else if(h==""images""){url=""yandsearch?stype=image&text=""}else if(h==""blogs""){url=""search.xml?how=tm&rd=2&text=""}else if(h==""maps""){url=""?text=""}else{url=""search.xml?text=""}url+=encodeURIComponent(t);counter=c2}else if(h==""market""){url=""?clid=505&cvredirect=1&text=""}var tmp=a.protocol+'//'+a.host+'/'+url;if(a.href!=tmp){a.href=tmp}if(counter&&!a.onclick){a.onclick=function(){return r(this,'stred/pid='+p+'/cid='+counter)}}}},200)}}}function sample(a,name){name=name||""text"";var f=document.forms[0];if(!f.nl){var z=document.createElement(""input"");z.name=""nl"";z.type=""hidden"";z.value=1;a.appendChild(z)}f[name].value=a.innerText?a.innerText:a.firstChild.nodeValue;f[name].focus()}function clean(a){var p=a.previousSibling;if(p){a.onblur=function(){if(!a.value){p.style.top=""}};p.style.top=""-9999px""}}if(document.documentElement){document.documentElement.id=""js""}function $(i){return document.all?document.all[i]:document.getElementById(i)}function init(){if(!document.all){onunload=init};setTimeout(function(){document.forms[0].text.focus()},1);tabs(0)}function setHome(a,url){a.style.behavior='url(#default#homepage)';a.setHomePage(url||'http://www.yandex.ru');return false}function cleanLogin(){$('t').checked=0;var l=$('l');var p=$('s');var f=function(){if(l.value)clean(l);if(p.value)clean(p)};setInterval(f,100)}function rate(a,href){window.open(href||a.href,'popup','width=560,height=420,status=yes,menubar=no,resizable=yes,scrollbars=1').focus();return false}function news(a){a.blur();init();$(""news"").className=a.parentNode.className;return false} </script></head>"; Assert.AreEqual(" Яндекс ", HtmlProcessor.StripHtml(yandex)); }
public HtmlProcessor SgmlSetup() { HtmlProcessor html = new HtmlProcessor(); html.Filter.Mode = HtmlFilteringMode.DenyByDefault; html.Filter.DefaultOptions = HtmlElementOptions.AllowContent | HtmlElementOptions.UseTypography; html.Filter.Treat(a => a.Attributes("xmlns", "ns").As(HtmlAttributeOptions.Denied)) .RegularContent().Links().Images().Abstract().Youtube(); html.Typographics.EncodeSpecialSymbols = false; html.Typographics.Replaces(); return(html); }
public DraftSaveCommandTests(ITestOutputHelper output) { _context = new TestContext(output); var context = _context.GetDb(); _fs = new MockFileSystem(); _imageContext = new ImageContext(_fs); var mapper = new MapperConfiguration(cfg => cfg.AddProfile <MappingProfile>()).CreateMapper(); _dateProvider = Substitute.For <IDateProvider>(); _dateProvider.Now.Returns(DateTime.Now); _imageProcessor = Substitute.For <IImageProcessor>(); _htmlProcessor = new HtmlProcessor(Substitute.For <ICodeFormatter>(), _imageProcessor); _handler = new Handler(context, _imageContext, mapper, _dateProvider, _htmlProcessor); }
public void ActionLinkTest(String input, string expected) { // arrange var writer = new StringWriter(); var tagHelperProvider = new TagHelperContainer(); tagHelperProvider.Register <TestTagHelper>("foo"); var mng = new HtmlProcessor(writer, tagHelperProvider); var bytes = writer.Encoding.GetBytes(input); // act mng.Write(bytes, 0, bytes.Length); // assert Assert.AreEqual(expected, writer.GetStringBuilder().ToString()); }
public void Get_Filename_From_Url() { // Arrange string url = "https://owain.codes/something/"; string domain = "https://www.owain.codes/"; string expected = "something"; // Act HtmlProcessor test = new HtmlProcessor(url, domain); // Assert string actual = test.GetFilename(url); Assert.AreEqual(expected, actual); TestContext.WriteLine(actual); }
public void HtmlProcessorer() { Uri uri = new Uri("http://www.google.com"); HttpWebRequest request = (HttpWebRequest)WebRequest.Create(uri); request.CookieContainer = new CookieContainer(); request.Method = "GET"; using (HttpWebResponse response = (HttpWebResponse)request.GetResponse()) { if (!response.ResponseUri.Equals(uri)) { uri = response.ResponseUri; } HtmlProcessor processor = new HtmlProcessor( uri.ToString(), response.GetResponseStream(), new TldParser()); Assert.IsTrue(!processor.Links.IsNull()); Assert.IsTrue(processor.Links.Count > 0); } }
static void CollectorProcessor(object s) { TaskState state = (TaskState)s; CollectorProcessorEventArgs progress = new CollectorProcessorEventArgs(); progress.CollectorID = state.CollectorID; string link = state.PoolManagerHandler(state.Pool, state.VisitedUrls, state.VisitedDomains); while (!link.IsNullOrEmpty() && !state.CancelToken.IsCancellationRequested) { ManualResetEvent done = new ManualResetEvent(false); Dictionary<string, object> properties = new Dictionary<string, object>(); properties.Add("TldParser", state.TldParser); properties.Add("State", done); Uri uri = link.ToUri(); bool isPageLoadAllowed = false; bool isAllowedCrawl = true; //to be changed //targetTime set to 4 seconds TimeSpan targetTime = new TimeSpan(0, 0, 0, 4); TimeSpan checkTime = HtmlPageLoadCheck(uri.ToString()); //if checkTime is less than 4 seconds set isPageLoadAllowed to True if (TimeSpan.Compare(checkTime, targetTime) == -1) { isPageLoadAllowed = true; } //TODO //isAllowedCrawl = state.MongoPersistence.isToCrawl(uri.ToString()); if (uri != null && isPageLoadAllowed == true && isAllowedCrawl == true) { HtmlProcessor.LinkInfo currentUrlLinkInfo = new HtmlProcessor.LinkInfo((state.TldParser)) { Href = uri.ToString() }; progress.Message = string.Format("{0} (fetching)", uri); state.ProgressHandler(progress); WebDownloader web = new WebDownloader(uri, properties, ea => { progress.Message = string.Format( "{0} [{1}] ({2})", uri.ToUriShort(), ea.Status, (ea.Exception.IsNull() ? "responded: " + ea.Stream.Length + " bytes" : "exception: " + ea.Exception.Message)); state.ProgressHandler(progress); currentUrlLinkInfo.Status = (int)ea.Status; //Thread.Sleep(2000); if (ea.Stream.IsNull()) { state.Repository.SaveLink(uri.ToString(), string.Empty, string.Empty, currentUrlLinkInfo); //Thread.Sleep(5000); } else { HtmlProcessor processor = new HtmlProcessor( uri.ToString(), ea.Stream, ((TldParser)ea.Properties["TldParser"])); progress.Message = string.Format( "{0} (found={1})", uri, processor.Links.Count); state.ProgressHandler(progress); //Thread.Sleep(2000); // Check if there is an external link bool hasExternalLink = false; int countPage = 0; foreach (var l in processor.Links) { if (l.Domain != currentUrlLinkInfo.Domain) { hasExternalLink = true; break; } countPage++; if (countPage > 400 && hasExternalLink == false) { break; } } // There is at least one external link if (hasExternalLink == true) { // Save the current link state.Repository.SaveLink(uri.ToString(), string.Empty, string.Empty, currentUrlLinkInfo); state.VisitedUrls.Add(uri.ToString()); state.VisitedDomains.Add(uri.ToString()); // Save the kids int pushedLinks = 0; int linkCounter = 1; processor.Links.ForEach(l => { progress.Message = string.Format( "{0} (processing={1} of {2})", uri, linkCounter, processor.Links.Count); state.ProgressHandler(progress); //if (state.Robots.IsAllowed(string.Empty, l.Href.ToUri())) //{ ++pushedLinks; state.VisitedUrls.Add(l.Href); state.VisitedDomains.Add(l.Href); state.Pool.Store(l.Href); HtmlProcessor.LinkInfo childLinkInfo = currentUrlLinkInfo; childLinkInfo.AnchorText = l.AnchorText; childLinkInfo.AnchorRel = l.AnchorRel; childLinkInfo.AnchorKind = l.AnchorKind; HtmlProcessor.LinkInfo backLinkInfo = l; backLinkInfo.AnchorText = string.Empty; backLinkInfo.AnchorRel = string.Empty; backLinkInfo.AnchorKind = string.Empty; state.Repository.SaveLink(uri.ToString(), l.Href, string.Empty, childLinkInfo); state.Repository.SaveLink(l.Href, string.Empty, uri.ToString(), backLinkInfo); state.VisitedDomains.Remove(l.Href); //} ++linkCounter; }); } progress.Message = string.Format("{0} [DONE]", uri); state.ProgressHandler(progress); ea.Stream.Close(); } ((ManualResetEvent)ea.Properties["State"]).Set(); }); web.Download(); done.WaitOne(); // Remove from the buffer so that collectors can crawl again the domain state.VisitedDomains.Remove(link); } // Fetch next link link = state.PoolManagerHandler(state.Pool, state.VisitedUrls, state.VisitedDomains); } //state.Countdown.Signal(); progress.Message = "NO URL IN THE POOL"; state.ProgressHandler(progress); }
private static void ApplyTemplate(string contentFolder, string siteFolder, FileInfo fi) { if (Verbose) Console.WriteLine("Processing "+ fi.Extension + " " + fi.Name); if (fi.Extension == ".html") { HtmlProcessor processor = new HtmlProcessor(); processor.Consume(contentFolder, siteFolder, fi.Name, fi.Extension); FileUtils.WriteFile(processor.SiteFile, processor.Template, processor.Content); SearchProcessor.TagSearchFile(processor.Content, contentFolder, siteFolder, fi); } else if (fi.Extension == ".xml") { string title = BijouUtils.ParsePageTitle(siteFolder+"/bogus.xxx"); XmlProcessor processor = new XmlProcessor(); processor.XslArgs = XmlProcessor.BuildXsltArgumentList(title); processor.Consume(contentFolder, siteFolder, fi.Name, fi.Extension); FileUtils.WriteFile(processor.SiteFile, processor.Content); } else if (fi.Extension == ".csv") { CsvProcessor processor = new CsvProcessor(); processor.Consume(contentFolder, siteFolder, fi.Name, fi.Extension); FileUtils.WriteFile(processor.SiteFile, processor.Template, processor.Content); } else if (fi.Extension == ".md") { MdProcessor processor = new MdProcessor(); processor.Consume(contentFolder, siteFolder, fi.Name, fi.Extension); FileUtils.WriteFile(processor.SiteFile, processor.Template, processor.Content); } else if (fi.Extension == ".rss") { RssProcessor processor = new RssProcessor(); processor.Consume(contentFolder, siteFolder, fi.Name, fi.Extension); FileUtils.WriteFile(processor.SiteFile, processor.Template, processor.Content); FileUtils.HtmlClone(contentFolder, siteFolder, fi.Name, fi.Extension, processor.Clone); } else if (fi.Extension == ".ics") { IcsProcessor processor = new IcsProcessor(); processor.Consume(contentFolder, siteFolder, fi.Name, fi.Extension); FileUtils.WriteFile(processor.SiteFile, processor.Template, processor.Content); FileUtils.HtmlClone(contentFolder, siteFolder, fi.Name, fi.Extension, processor.Clone); } }
private static void TestRun1(int WORK_AREA_TOP, int parallelCount, Repository repository, RobotService robots, CollectorPool pool, VisitedUrls history, TldParser tldParser) { CreateCollectorPool(pool, history, parallelCount).ForEach( c => { //ManualResetEvent done = new ManualResetEvent(false); Dictionary<string, object> properties = new Dictionary<string, object>(); properties.Add("TldParser", tldParser); Uri uri = c.Link.ToUri(); WriteXY(0, WORK_AREA_TOP + c.SeqNo, "{0} (fetching)", uri.ToUriShort()); WebDownloader web = new WebDownloader(uri, properties, ea => { WriteXY(0, WORK_AREA_TOP + c.SeqNo, "{0} [{1}] ({2})", uri.ToUriShort(), ea.Status, (ea.Exception.IsNull() ? "responded: " + ea.Stream.Length + " bytes" : "exception: " + ea.Exception.Message)); if (ea.Stream.IsNull()) { Thread.Sleep(5000); } else { HtmlProcessor processor = new HtmlProcessor( uri.ToString(), ea.Stream, ((TldParser)ea.Properties["TldParser"])); WriteXY(0, WORK_AREA_TOP + c.SeqNo, "{0} (found={1})", uri.ToUriShort(), processor.Links.Count); int pushedLinks = 0; int linkCounter = 1; processor.Links.ForEach(l => { WriteXY(0, WORK_AREA_TOP + c.SeqNo, "{0} (found={1}, prc={2} {3} ({4}))", uri.ToUriShort(), processor.Links.Count, (l.Domain.Length > 10 ? l.Domain.Substring(0, 10) : l.Domain), l.Tld, linkCounter); if (robots.IsAllowed(string.Empty, l.Href.ToUri())) { ++pushedLinks; pool.Store(l.Href); repository.SaveLink(uri.ToString(), l.Href, string.Empty, l); repository.SaveLink(l.Href, string.Empty, uri.ToString(), l); history.Add(uri.ToString()); } ++linkCounter; }); WriteXY(0, WORK_AREA_TOP + c.SeqNo, "{0} (found={1}, added={2} links) [DONE]", uri.ToUriShort(), processor.Links.Count, pushedLinks); ea.Stream.Close(); } //((ManualResetEvent)ea.Properties["State"]).Set(); }); web.Download(); //done.WaitOne(); }); }
private async Task OnResponseReceivedAsync(HttpResponseMessage response, Guid requestId) { if (response == null) { return; } var content = response.Content; if (content != null && content.Headers.ContentType != null) { var contentType = content.Headers.ContentType; var mediaTypeParts = contentType.MediaType.Split(new[] { '/' }, 2); var mediaType = mediaTypeParts.Length > 0 ? mediaTypeParts[0].ToLowerInvariant() : null; var mediaSubType = mediaTypeParts.Length > 1 ? mediaTypeParts[1].ToLowerInvariant() : null; if (mediaType == "text" || (mediaType == "application" && (mediaSubType == "json" || mediaSubType == "xhtml+xml"))) { var htmlContent = await response.Content.ReadAsStringAsync(); // raise TextResponseReceived event and allow subscribers to modify the response var responseReceived = TextResponseReceivedRegistrationTokenTable.InvocationList; if (responseReceived != null) { var eventArgs = new ResponseReceivedEventArgs(response.RequestMessage.RequestUri, contentType.MediaType, htmlContent); Trace.Verbose(requestId, "Raising TextResponseReceived event for URI: {0}", response.RequestMessage.RequestUri); await RunThroughDispatcherAsync(() => responseReceived(this, eventArgs)); htmlContent = eventArgs.Content; } if (configuration.Mode == DefenseMode.PoneyAugmentedProtectionAnalyzer) { htmlContent = htmlContent.Replace("background-image", "pony-image"); } if (mediaType == "text" || mediaType == "application") { if (mediaSubType == "html" || mediaSubType == "xhtml+xml") { var processor = new HtmlProcessor(htmlContent, this, baseUri); processor.RedirectLinks(response.RequestMessage.RequestUri); if (configuration.WhiteList.Count(u => string.Compare(baseUri.Authority, u, StringComparison.CurrentCultureIgnoreCase) == 0) == 0) { if (configuration.Mode == DefenseMode.NoSlangAnalyzer || configuration.Mode == DefenseMode.PoneyAugmentedProtectionAnalyzer) { await processor.PonyfyTextsAsync(); } if (configuration.Mode == DefenseMode.PoneyAugmentedProtectionAnalyzer) { processor.ChangeImages(); processor.RemoveFlashAndObjects(); processor.ChangeVideos(); } } // Inject preload scripts in HTML content foreach (var preloadScript in PreloadScripts.OrderByDescending(p => p.Priority)) { processor.InjectHtml(preloadScript.Script); } htmlContent = processor.GetContent(); } else if (mediaSubType == "css") { htmlContent = Regex.Replace( htmlContent, @"(?<=url\((?<quote>['""])?)(?<url>[^'""]+?)(?=(\k<quote>)?\))", (match) => { var rewriteUri = BuildCurrentProxyUri(response.RequestMessage.RequestUri, match.Groups["url"].Value); return rewriteUri.ToString(); }); } } response.Content.Dispose(); response.Content = new StringContent(htmlContent); response.Content.Headers.ContentType = contentType; } } }