Ejemplo n.º 1
0
        public void TestInnerText()
        {
            XmlLightDocument doc = new HtmlLightDocument(document);
            XmlLightElement  e   = doc.SelectSingleNode("/html/body");

            Assert.AreEqual("this is > cdata! Hi, this is content.", Normalize(e.InnerText));
            Assert.AreEqual("Hi", e.SelectSingleNode(".//div[@id='two']").InnerText);
            Assert.AreEqual("this is > cdata!", Normalize(e.SelectSingleNode("text()").InnerText));
        }
Ejemplo n.º 2
0
        public void TestDocToXml()
        {
            HtmlLightDocument doc  = new HtmlLightDocument();
            XmlLightElement   body = new XmlLightElement(new XmlLightElement(doc, "html"), "body");

            body.IsEmpty = false;
            body.Attributes.Add("id", "bdy");
            Assert.AreEqual("<html> <body id=\"bdy\"> </body> </html>", Normalize(doc.InnerXml));
        }
Ejemplo n.º 3
0
        public void TestComments()
        {
            XmlLightDocument doc = new HtmlLightDocument(document);
            XmlLightElement  e   = doc.SelectSingleNode("/html/head");

            e = e.NextSibling;
            Assert.IsTrue(e.IsComment);
            Assert.AreEqual("<!-- comments included -->", e.InnerXml);
        }
Ejemplo n.º 4
0
        public void HtmlHelp(ICommandInterpreter _ci)
        {
            CommandInterpreter ci   = ((CommandInterpreter)_ci);
            HtmlLightDocument  doc  = new HtmlLightDocument(ci.GetHtmlHelp("help"));
            XmlLightElement    e    = doc.SelectRequiredNode("/html/body/h1[2]");
            XmlLightElement    body = e.Parent;
            int i = body.Children.IndexOf(e);

            body.Children.RemoveRange(i, body.Children.Count - i);

            StringWriter sw = new StringWriter();

            // Command index
            sw.WriteLine("<html><body>");
            sw.WriteLine("<h1>All Commands:</h1>");
            sw.WriteLine("<blockquote><ul>");
            ILookup <string, ICommand> categories = ci.Commands.Where(c => c.Visible).ToLookup(c => c.Category ?? "Unk");

            foreach (IGrouping <string, ICommand> group in categories.OrderBy(g => g.Key))
            {
                sw.WriteLine("<li><a href=\"#{0}\">{0}</a></li>", group.Key);
                sw.WriteLine("<ul>");
                foreach (ICommand cmd in group)
                {
                    sw.WriteLine("<li><a href=\"#{0}\">{0}</a> - {1}</li>", cmd.DisplayName, HttpUtility.HtmlEncode(cmd.Description));
                }
                sw.WriteLine("</ul>");
            }
            sw.WriteLine("</ul></blockquote>");

            // Command Help
            foreach (IGrouping <string, ICommand> group in categories.OrderBy(g => g.Key))
            {
                sw.WriteLine("<h2><a name=\"{0}\"></a>{0} Commands:</h2>", group.Key);
                sw.WriteLine("<blockquote>");
                foreach (ICommand cmd in group)
                {
                    e = new HtmlLightDocument(ci.GetHtmlHelp(cmd.DisplayName)).SelectRequiredNode("/html/body/h3");
                    sw.WriteLine("<a name=\"{0}\"></a>", cmd.DisplayName);
                    sw.WriteLine(e.InnerXml);
                    sw.WriteLine(e.NextSibling.NextSibling.InnerXml);
                }
                sw.WriteLine("</blockquote>");
            }

            e = new HtmlLightDocument(sw.ToString()).SelectRequiredNode("/html/body");
            body.Children.AddRange(e.Children);

            string html = body.Parent.InnerXml;
            string path = Path.Combine(Path.GetTempPath(), "HttpClone.Help.html");

            File.WriteAllText(path, html);
            System.Diagnostics.Process.Start(path);
        }
Ejemplo n.º 5
0
        public void TestParseDocument()
        {
            XmlLightDocument doc = new HtmlLightDocument(document);
            XmlLightDocument doc2;

            using (TempFile t = new TempFile())
            {
                using (TextWriter tw = new StreamWriter(t.Open()))
                    doc.WriteXml(tw);
                new XhtmlValidation(XhtmlDTDSpecification.XhtmlTransitional_10).Validate(t.TempPath);
                doc2 = new XmlLightDocument(t.ReadAllText());

                Assert.AreEqual(doc.InnerXml, doc2.InnerXml);
            }
        }
Ejemplo n.º 6
0
        public void HtmlHelp(ICommandInterpreter _ci)
        {
            CommandInterpreter ci = ((CommandInterpreter)_ci);
            HtmlLightDocument doc = new HtmlLightDocument(ci.GetHtmlHelp("help"));
            XmlLightElement e = doc.SelectRequiredNode("/html/body/h1[2]");
            XmlLightElement body = e.Parent;
            int i = body.Children.IndexOf(e);
            body.Children.RemoveRange(i, body.Children.Count - i);

            StringWriter sw = new StringWriter();
            // Command index
            sw.WriteLine("<html><body>");
            sw.WriteLine("<h1>All Commands:</h1>");
            sw.WriteLine("<blockquote><ul>");
            ILookup<string, ICommand> categories = ci.Commands.Where(c => c.Visible).ToLookup(c => c.Category ?? "Unk");
            foreach (IGrouping<string, ICommand> group in categories.OrderBy(g => g.Key))
            {
                sw.WriteLine("<li><a href=\"#{0}\">{0}</a></li>", group.Key);
                sw.WriteLine("<ul>");
                foreach (ICommand cmd in group)
                    sw.WriteLine("<li><a href=\"#{0}\">{0}</a> - {1}</li>", cmd.DisplayName, HttpUtility.HtmlEncode(cmd.Description));
                sw.WriteLine("</ul>");
            }
            sw.WriteLine("</ul></blockquote>");

            // Command Help
            foreach (IGrouping<string, ICommand> group in categories.OrderBy(g => g.Key))
            {
                sw.WriteLine("<h2><a name=\"{0}\"></a>{0} Commands:</h2>", group.Key);
                sw.WriteLine("<blockquote>");
                foreach (ICommand cmd in group)
                {
                    e = new HtmlLightDocument(ci.GetHtmlHelp(cmd.DisplayName)).SelectRequiredNode("/html/body/h3");
                    sw.WriteLine("<a name=\"{0}\"></a>", cmd.DisplayName);
                    sw.WriteLine(e.InnerXml);
                    sw.WriteLine(e.NextSibling.NextSibling.InnerXml);
                }
                sw.WriteLine("</blockquote>");
            }

            e = new HtmlLightDocument(sw.ToString()).SelectRequiredNode("/html/body");
            body.Children.AddRange(e.Children);

            string html = body.Parent.InnerXml;
            string path = Path.Combine(Path.GetTempPath(), "HttpClone.Help.html");
            File.WriteAllText(path, html);
            System.Diagnostics.Process.Start(path);
        }
Ejemplo n.º 7
0
        public bool TryGetPingbackFromHtml(out Uri pingbackApi)
        {
            HttpRequestUtil http = new HttpRequestUtil(_targetLink);

            if (http.Get(_targetLink.PathAndQuery) != System.Net.HttpStatusCode.OK)
            {
                LogError(String.Format("GET {0}: {1}/{2}", _targetLink, (int)http.StatusCode, http.StatusCode));
            }
            else if (!http.ContentType.StartsWith("text/html", StringComparison.OrdinalIgnoreCase))
            {
                LogError("Invalid content-type, expected text/html, found: " + http.ContentType);
            }
            else
            {
                try
                {
                    HtmlLightDocument htmlDoc = new HtmlLightDocument(Encoding.UTF8.GetString(http.Content));
                    XmlLightElement   link    = htmlDoc.SelectSingleNode("/html/head/link[@rel='pingback']");
                    if (link == null)
                    {
                        LogError("Unable to locate <link rel=\"pingback\" ... in header.");
                    }
                    else
                    {
                        string pingback;
                        if (!link.Attributes.TryGetValue("href", out pingback))
                        {
                            LogError("Link for rel=pingback is missing the href attribute.");
                        }
                        else
                        {
                            LogInfo("Found rel=pingback: " + pingback);
                            return(Uri.TryCreate(pingback, UriKind.Absolute, out pingbackApi));
                        }
                    }
                }
                catch (Exception e)
                {
                    LogError(e.Message);
                }
            }
            pingbackApi = null;
            return(false);
        }
Ejemplo n.º 8
0
        private string CreateTemplate(string html)
        {
            HtmlLightDocument doc = new HtmlLightDocument(html);

            //Add css link:

            XmlLightElement cssLink = new XmlLightElement(doc.SelectRequiredNode("/html/head"), "link");

            cssLink.Attributes["type"] = "text/css";
            cssLink.Attributes["rel"]  = "stylesheet";
            cssLink.Attributes["href"] = new Uri(_baseUri, "search.css").AbsoluteUri;

            XmlLightElement startFrom = doc.Root;

            if (_config.Searching.XPathBase != null)
            {
                startFrom = startFrom.SelectRequiredNode(_config.Searching.XPathBase.XPath);
            }

            if (_config.Searching.FormXPath != null)
            {
                XmlLightElement form = startFrom.SelectRequiredNode(_config.Searching.FormXPath.XPath);
                foreach (XmlLightElement textbox in form.Select(".//input[@type='text']"))
                {
                    textbox.Attributes["value"] = String.Empty;
                }
            }
            if (_config.Searching.TermsXPath != null)
            {
                InsertTag(startFrom, _config.Searching.TermsXPath.XPath, _config.Searching.TermsXPath.ReplaceOption, "search-terms");
            }

            if (_config.Searching.ResultXPath != null)
            {
                InsertTag(startFrom, _config.Searching.ResultXPath.XPath, _config.Searching.ResultXPath.ReplaceOption, "search-result");
            }

            using (StringWriter sw = new StringWriter())
            {
                doc.WriteUnformatted(sw);
                return(sw.ToString());
            }
        }
Ejemplo n.º 9
0
        public void TestXPath()
        {
            XmlDocument      xdoc     = new XmlDocument();
            XmlLightDocument doc      = new HtmlLightDocument(document);
            string           testpath = "/html/body[@id='one' and @class='cls']/../body/div[@id='two' and text() = 'Hi']/@id";

            xdoc.LoadXml(doc.CreateNavigator().InnerXml);
            Assert.IsNotNull(xdoc.SelectSingleNode(testpath));
            XPathNavigator nav = doc.CreateNavigator().SelectSingleNode(testpath);

            Assert.IsNotNull(nav);
            Assert.IsTrue(nav.NodeType == XPathNodeType.Attribute);
            Assert.AreEqual("id", nav.Name);
            Assert.AreEqual("two", nav.Value);

            XmlLightElement e = doc.SelectSingleNode("/html/Head");

            Assert.IsNull(e);
            e = doc.SelectSingleNode("/html/head");
            Assert.IsNotNull(e);
        }
Ejemplo n.º 10
0
        public void TestXmlNavigator()
        {
            XPathNavigator nav = new HtmlLightDocument(document).CreateNavigator().SelectSingleNode("/html/body//p[@class='1']");
            XPathNavigator pos = nav.Clone();

            Assert.IsFalse(nav.MoveToPrevious());
            Assert.IsTrue(nav.MoveToNext());
            Assert.IsTrue(nav.MoveToPrevious());
            Assert.IsTrue(nav.IsSamePosition(pos));

            Assert.IsFalse(nav.MoveToFirstNamespace());
            Assert.IsFalse(nav.MoveToNextNamespace());

            Assert.IsTrue(Object.ReferenceEquals(nav.NameTable, pos.NameTable));
            Assert.IsNotNull(nav.BaseURI);
            Assert.AreEqual(nav.BaseURI, pos.BaseURI);

            Assert.IsTrue(nav.MoveToId("one"));
            Assert.AreEqual("body", nav.Name);
            Assert.IsFalse(nav.MoveToId("none-exists"));
            Assert.AreEqual("body", nav.Name);
        }
Ejemplo n.º 11
0
        public void TestHtmlEntityRef()
        {
            string html   = @"<html>
            <body attrib=""this & that ><&nbsp;&#32;!"">
                this char '<' and this one '>' and this one '&' should be encoded.  
                We encoded ' &nbsp; ' and &Atilde; and '&#32;' and '&#x20;' all by ourselves.
                This in not valid xml &#xffffffff;, nor is &#123456789;, but we still allow it.
                This entity name will pass-through &unknown; this will not &whateverthatmeans;
                and nor will these &; &#; &h; &l t; &1two; &234; &#x00fg; &#-123;.
            </body>
            </html>";
            string expect = @"<html><body attrib=""this &amp; that &gt;&lt;" + (Char)160 + @" !"">
                this char '&lt;' and this one '&gt;' and this one '&amp;' should be encoded.  
                We encoded ' &nbsp; ' and &Atilde; and '&#32;' and '&#x20;' all by ourselves.
                This in not valid xml &#xffffffff;, nor is &#123456789;, but we still allow it.
                This entity name will pass-through &unknown; this will not &amp;whateverthatmeans;
                and nor will these &amp;; &amp;#; &amp;h; &amp;l t; &amp;1two; &amp;234; &amp;#x00fg; &amp;#-123;.
            </body></html>";

            XmlLightDocument  doc      = new HtmlLightDocument(html);
            XmlWriterSettings settings = new XmlWriterSettings()
            {
                CheckCharacters    = true,
                Indent             = false,
                IndentChars        = "",
                NewLineChars       = "",
                NewLineHandling    = NewLineHandling.None,
                OmitXmlDeclaration = true,
                CloseOutput        = false
            };
            StringWriter sw  = new StringWriter();
            XmlWriter    wtr = XmlWriter.Create(sw, settings);

            doc.WriteXml(wtr);
            wtr.Flush();
            string xml = sw.ToString();

            Assert.AreEqual(expect, xml);
        }
Ejemplo n.º 12
0
        public void TestXmlElement()
        {
            XmlLightDocument doc = new HtmlLightDocument(document);

            Assert.IsNull(doc.PrevSibling);
            Assert.IsNull(doc.Children[0].PrevSibling);
            Assert.IsNull(doc.NextSibling);
            Assert.IsNull(doc.Children[doc.Children.Count - 1].NextSibling);

            XmlLightElement e = doc.SelectSingleNode("/html/body//*[@class='2']");

            Assert.IsNotNull(e);
            Assert.AreEqual("p", e.TagName);
            Assert.IsNotNull(e.PrevSibling);
            Assert.AreEqual("p", e.PrevSibling.TagName);

            Assert.AreEqual("", e.Namespace);
            Assert.AreEqual("p", e.LocalName);

            e = new XmlLightElement(null, "a:b");
            Assert.AreEqual("a", e.Namespace);
            Assert.AreEqual("b", e.LocalName);
        }
Ejemplo n.º 13
0
        public void TestParsers()
        {
            string notxml = "<html id=a ><body foo='bar' bar=\"foo\" />";

            HtmlLightDocument html = new HtmlLightDocument();

            XmlLightParser.Parse(notxml, html);
            Assert.AreEqual("html", html.Root.TagName);
            Assert.AreEqual(1, html.Root.Attributes.Count);
            Assert.AreEqual("a", html.Root.Attributes["id"]);
            Assert.AreEqual(1, html.Root.Children.Count);
            Assert.AreEqual("body", html.Root.Children[0].TagName);
            Assert.AreEqual("foo", html.Root.Children[0].Attributes["bar"]);
            Assert.AreEqual("bar", html.Root.Children[0].Attributes["foo"]);

            XmlLightDocument xml = new XmlLightDocument();

            XmlLightParser.Parse(notxml, XmlLightParser.AttributeFormat.Xml, xml);
            Assert.AreEqual(2, xml.Root.Attributes.Count);
            //Not recognized: xml.Root.Attributes["id"]
            Assert.AreEqual("body", xml.Root.TagName);
            Assert.AreEqual("foo", xml.Root.Attributes["bar"]);
            Assert.AreEqual("bar", xml.Root.Attributes["foo"]);
        }
Ejemplo n.º 14
0
		public void TestXPath()
		{
			XmlDocument xdoc = new XmlDocument();
			XmlLightDocument doc = new HtmlLightDocument(document);
			string testpath = "/html/body[@id='one' and @class='cls']/../body/div[@id='two' and text() = 'Hi']/@id";

			xdoc.LoadXml(doc.CreateNavigator().InnerXml);
			Assert.IsNotNull(xdoc.SelectSingleNode(testpath));
			XPathNavigator nav = doc.CreateNavigator().SelectSingleNode(testpath);

			Assert.IsNotNull(nav);
			Assert.IsTrue(nav.NodeType == XPathNodeType.Attribute);
			Assert.AreEqual("id", nav.Name);
			Assert.AreEqual("two", nav.Value);

			XmlLightElement e = doc.SelectSingleNode("/html/Head");
			Assert.IsNull(e);
			e = doc.SelectSingleNode("/html/head");
			Assert.IsNotNull(e);
		}
Ejemplo n.º 15
0
		public void TestXmlElement()
		{
			XmlLightDocument doc = new HtmlLightDocument(document);
			Assert.IsNull(doc.PrevSibling);
			Assert.IsNull(doc.Children[0].PrevSibling);
			Assert.IsNull(doc.NextSibling);
			Assert.IsNull(doc.Children[doc.Children.Count - 1].NextSibling);

			XmlLightElement e = doc.SelectSingleNode("/html/body//*[@class='2']");
			Assert.IsNotNull(e);
			Assert.AreEqual("p", e.TagName);
			Assert.IsNotNull(e.PrevSibling);
			Assert.AreEqual("p", e.PrevSibling.TagName);

			Assert.AreEqual("", e.Namespace);
			Assert.AreEqual("p", e.LocalName);

			e = new XmlLightElement(null, "a:b");
			Assert.AreEqual("a", e.Namespace);
			Assert.AreEqual("b", e.LocalName);
		}
Ejemplo n.º 16
0
		public void TestXmlNavigator()
		{
			XPathNavigator nav = new HtmlLightDocument(document).CreateNavigator().SelectSingleNode("/html/body//p[@class='1']");
			XPathNavigator pos = nav.Clone();
			Assert.IsFalse(nav.MoveToPrevious());
			Assert.IsTrue(nav.MoveToNext());
			Assert.IsTrue(nav.MoveToPrevious());
			Assert.IsTrue(nav.IsSamePosition(pos));

			Assert.IsFalse(nav.MoveToFirstNamespace());
			Assert.IsFalse(nav.MoveToNextNamespace());

			Assert.IsTrue(Object.ReferenceEquals(nav.NameTable, pos.NameTable));
			Assert.IsNotNull(nav.BaseURI);
			Assert.AreEqual(nav.BaseURI, pos.BaseURI);

			Assert.IsTrue(nav.MoveToId("one"));
			Assert.AreEqual("body", nav.Name);
			Assert.IsFalse(nav.MoveToId("none-exists"));
			Assert.AreEqual("body", nav.Name);
		}
Ejemplo n.º 17
0
		public void TestParseDocument()
		{
			XmlLightDocument doc = new HtmlLightDocument(document);
			XmlLightDocument doc2;
			using (TempFile t = new TempFile())
			{
				using (TextWriter tw = new StreamWriter(t.Open()))
					doc.WriteXml(tw);
				new XhtmlValidation(XhtmlDTDSpecification.XhtmlTransitional_10).Validate(t.TempPath);
				doc2 = new XmlLightDocument(t.ReadAllText());

				Assert.AreEqual(doc.InnerXml, doc2.InnerXml);
			}
		}
Ejemplo n.º 18
0
		public void TestInnerText()
		{
			XmlLightDocument doc = new HtmlLightDocument(document);
			XmlLightElement e = doc.SelectSingleNode("/html/body");
			Assert.AreEqual("this is > cdata! Hi, this is content.", Normalize(e.InnerText));
			Assert.AreEqual("Hi", e.SelectSingleNode(".//div[@id='two']").InnerText);
			Assert.AreEqual("this is > cdata!", Normalize(e.SelectSingleNode("text()").InnerText));
		}
Ejemplo n.º 19
0
        public bool TryGetPingbackFromHtml(out Uri pingbackApi)
        {
            HttpRequestUtil http = new HttpRequestUtil(_targetLink);

            if (http.Get(_targetLink.PathAndQuery) != System.Net.HttpStatusCode.OK)
                LogError(String.Format("GET {0}: {1}/{2}", _targetLink, (int)http.StatusCode, http.StatusCode));
            else if (!http.ContentType.StartsWith("text/html", StringComparison.OrdinalIgnoreCase))
                LogError("Invalid content-type, expected text/html, found: " + http.ContentType);
            else
            {
                try
                {
                    HtmlLightDocument htmlDoc = new HtmlLightDocument(Encoding.UTF8.GetString(http.Content));
                    XmlLightElement link = htmlDoc.SelectSingleNode("/html/head/link[@rel='pingback']");
                    if (link == null)
                        LogError("Unable to locate <link rel=\"pingback\" ... in header.");
                    else
                    {
                        string pingback;
                        if (!link.Attributes.TryGetValue("href", out pingback))
                            LogError("Link for rel=pingback is missing the href attribute.");
                        else
                        {
                            LogInfo("Found rel=pingback: " + pingback);
                            return Uri.TryCreate(pingback, UriKind.Absolute, out pingbackApi);
                        }
                    }
                }
                catch (Exception e)
                {
                    LogError(e.Message);
                }
            }
            pingbackApi = null;
            return false;
        }
Ejemplo n.º 20
0
		public void TestDocToXml()
		{
			HtmlLightDocument doc = new HtmlLightDocument();
			XmlLightElement body = new XmlLightElement(new XmlLightElement(doc, "html"), "body");
            body.IsEmpty = false;
            body.Attributes.Add("id", "bdy");
			Assert.AreEqual("<html> <body id=\"bdy\"> </body> </html>", Normalize(doc.InnerXml));
		}
Ejemplo n.º 21
0
        public void TestParsers()
        {
            string notxml = "<html id=a ><body foo='bar' bar=\"foo\" />";

            HtmlLightDocument html = new HtmlLightDocument();
            XmlLightParser.Parse(notxml, html);
            Assert.AreEqual("html", html.Root.TagName);
            Assert.AreEqual(1, html.Root.Attributes.Count);
            Assert.AreEqual("a", html.Root.Attributes["id"]);
            Assert.AreEqual(1, html.Root.Children.Count);
            Assert.AreEqual("body", html.Root.Children[0].TagName);
            Assert.AreEqual("foo", html.Root.Children[0].Attributes["bar"]);
            Assert.AreEqual("bar", html.Root.Children[0].Attributes["foo"]);

            XmlLightDocument xml = new XmlLightDocument();
            XmlLightParser.Parse(notxml, XmlLightParser.AttributeFormat.Xml, xml);
            Assert.AreEqual(2, xml.Root.Attributes.Count);
            //Not recognized: xml.Root.Attributes["id"]
            Assert.AreEqual("body", xml.Root.TagName);
            Assert.AreEqual("foo", xml.Root.Attributes["bar"]);
            Assert.AreEqual("bar", xml.Root.Attributes["foo"]);
        }
Ejemplo n.º 22
0
		public void TestComments()
		{
			XmlLightDocument doc = new HtmlLightDocument(document);
			XmlLightElement e = doc.SelectSingleNode("/html/head");
			e = e.NextSibling;
			Assert.IsTrue(e.IsComment);
			Assert.AreEqual("<!-- comments included -->", e.InnerXml);
		}
Ejemplo n.º 23
0
        private string CreateTemplate(string html)
        {
            HtmlLightDocument doc = new HtmlLightDocument(html);

            //Add css link:

            XmlLightElement cssLink = new XmlLightElement(doc.SelectRequiredNode("/html/head"), "link");
            cssLink.Attributes["type"] = "text/css";
            cssLink.Attributes["rel"] = "stylesheet";
            cssLink.Attributes["href"] = new Uri(_baseUri, "search.css").AbsoluteUri;

            XmlLightElement startFrom = doc.Root;
            if (_config.Searching.XPathBase != null)
                startFrom = startFrom.SelectRequiredNode(_config.Searching.XPathBase.XPath);

            if(_config.Searching.FormXPath != null)
            {
                XmlLightElement form = startFrom.SelectRequiredNode(_config.Searching.FormXPath.XPath);
                foreach (XmlLightElement textbox in form.Select(".//input[@type='text']"))
                    textbox.Attributes["value"] = String.Empty;
            }
            if(_config.Searching.TermsXPath != null)
            {
                InsertTag(startFrom, _config.Searching.TermsXPath.XPath, _config.Searching.TermsXPath.ReplaceOption, "search-terms");
            }

            if (_config.Searching.ResultXPath != null)
            {
                InsertTag(startFrom, _config.Searching.ResultXPath.XPath, _config.Searching.ResultXPath.ReplaceOption, "search-result");
            }

            using (StringWriter sw = new StringWriter())
            {
                doc.WriteUnformatted(sw);
                return sw.ToString();
            }
        }
Ejemplo n.º 24
0
        public void BuildIndex()
        {
            if (_config == null)
            {
                throw new InvalidOperationException("The <search> element is missing from the configuration.");
            }

            Dictionary <string, string> hashes = new Dictionary <string, string>(StringComparer.Ordinal);

            foreach (KeyValuePair <string, ContentRecord> item in _content)
            {
                if (item.Key == SearchTemplate.SearchPath || item.Key == SearchTemplate.TemplatePath ||
                    item.Key == _config.TemplateUri)
                {
                    continue;
                }
                if (item.Value.HasContentStoreId == false)
                {
                    continue;
                }
                if (!_mimeInfo[item.Value.MimeType].Indexed || _mimeInfo[item.Value.MimeType].Type != ContentFormat.Html)
                {
                    continue;
                }
                if (item.Value.HasHashContents)
                {
                    if (hashes.ContainsKey(item.Value.HashContents))
                    {
                        continue;
                    }
                    hashes.Add(item.Value.HashContents, item.Key);
                }

                string            title = null, blurb = null, date = null;
                string            content = Encoding.UTF8.GetString(_content.ReadContent(item.Value, true));
                HtmlLightDocument xdoc = new HtmlLightDocument(content);
                XmlLightElement   found, selectFrom = _config.XPathBase == null ? xdoc.Root
                    : xdoc.SelectRequiredNode(_config.XPathBase.XPath);

                bool ignore = false;
                foreach (var xpath in _config.Conditions.SafeEnumeration())
                {
                    if (null != selectFrom.SelectSingleNode(xpath.XPath))
                    {
                        ignore = true;
                        break;
                    }
                }
                if (ignore)
                {
                    continue;
                }

                if (_config.TitlePath != null && selectFrom.TrySelectNode(_config.TitlePath.XPath, out found))
                {
                    title = found.InnerText.Trim();
                }
                else if (_config.TitlePath == null && false == _mimeInfo.TryGetTitle(item.Value.MimeType, content, out title))
                {
                    title = null;
                }
                if (String.IsNullOrEmpty(title))
                {
                    continue;
                }

                if (_config.BlubXPath != null)
                {
                    StringBuilder tmp = new StringBuilder();
                    foreach (XmlLightElement e in selectFrom.Select(_config.BlubXPath.XPath))
                    {
                        if (e.IsText)
                        {
                            tmp.Append(e.Value);
                        }
                        else
                        {
                            foreach (XmlLightElement txt in e.Select(".//text()"))
                            {
                                tmp.Append(txt.Value);
                            }
                        }
                    }
                    if (tmp.Length == 0)
                    {
                        tmp.Append(selectFrom.SelectRequiredNode(_config.BlubXPath.XPath).InnerText);
                    }
                    blurb = tmp.ToString();
                }
                DateTime dtvalue = item.Value.DateCreated;
                if (_config.DateXPath != null && selectFrom.TrySelectNode(_config.DateXPath.XPath, out found))
                {
                    DateTime contentDate;
                    string   dtText = found.InnerText.Trim();
                    dtText = DateTimeClean.Replace(dtText, m => m.Value.Substring(0, m.Length - 2));

                    if (!String.IsNullOrEmpty(_config.DateXPath.DateFormat))
                    {
                        if (DateTime.TryParseExact(dtText, _config.DateXPath.DateFormat, CultureInfo.InvariantCulture,
                                                   DateTimeStyles.AllowWhiteSpaces, out contentDate))
                        {
                            dtvalue = contentDate;
                        }
                        else
                        {
                            throw new FormatException("Unable to parse date/time: " + dtText);
                        }
                    }
                    else if (DateTime.TryParse(dtText, CultureInfo.InvariantCulture, DateTimeStyles.AllowWhiteSpaces, out contentDate))
                    {
                        dtvalue = contentDate;
                    }
                    else
                    {
                        throw new FormatException("Unable to parse date/time: " + dtText);
                    }
                }
                date = dtvalue.ToString("yyyy-MM-dd HH:mm:ss");

                StringWriter indexed = new StringWriter();
                indexed.WriteLine(title);
                foreach (var xpath in _config.Indexed.SafeEnumeration())
                {
                    foreach (var indexItem in selectFrom.Select(xpath.XPath))
                    {
                        string innerText = indexItem.InnerText;
                        indexed.WriteLine(innerText);
                        indexed.WriteLine(NonAlphaNum.Replace(innerText, " "));//again, removing all special characters.
                    }
                }

                if (String.IsNullOrEmpty(blurb))
                {
                    blurb = indexed.ToString().Substring(title.Length).Trim();
                }

                title = WhiteSpaces.Replace(TrimString(title, _config.TitlePath != null ? (uint)_config.TitlePath.MaxLength : BlurbLength), " ");
                blurb = WhiteSpaces.Replace(TrimString(blurb, _config.BlubXPath != null ? (uint)_config.BlubXPath.MaxLength : BlurbLength), " ");

                string text = indexed.ToString();

                using (TextReader rdr = new StringReader(text))
                    AddToIndex(item.Key, date, title, blurb, rdr);
            }
        }
Ejemplo n.º 25
0
        public void BuildIndex()
        {
            if(_config == null)
                throw new InvalidOperationException("The <search> element is missing from the configuration.");

            Dictionary<string, string> hashes = new Dictionary<string, string>(StringComparer.Ordinal);

            foreach (KeyValuePair<string, ContentRecord> item in _content)
            {
                if (item.Key == SearchTemplate.SearchPath || item.Key == SearchTemplate.TemplatePath 
                    || item.Key == _config.TemplateUri)
                    continue;
                if (item.Value.HasContentStoreId == false)
                    continue;
                if (!_mimeInfo[item.Value.MimeType].Indexed || _mimeInfo[item.Value.MimeType].Type != ContentFormat.Html)
                    continue;
                if (item.Value.HasHashContents)
                {
                    if (hashes.ContainsKey(item.Value.HashContents))
                        continue;
                    hashes.Add(item.Value.HashContents, item.Key);
                }

                string title = null, blurb = null, date = null;
                string content = Encoding.UTF8.GetString(_content.ReadContent(item.Value, true));
                HtmlLightDocument xdoc = new HtmlLightDocument(content);
                XmlLightElement found, selectFrom = _config.XPathBase == null ? xdoc.Root
                    : xdoc.SelectRequiredNode(_config.XPathBase.XPath);

                bool ignore = false;
                foreach(var xpath in _config.Conditions.SafeEnumeration())
                {
                    if(null != selectFrom.SelectSingleNode(xpath.XPath))
                    {
                        ignore = true;
                        break;
                    }
                }
                if (ignore)
                    continue;

                if (_config.TitlePath != null && selectFrom.TrySelectNode(_config.TitlePath.XPath, out found))
                    title = found.InnerText.Trim();
                else if (_config.TitlePath == null && false == _mimeInfo.TryGetTitle(item.Value.MimeType, content, out title))
                    title = null;
                if (String.IsNullOrEmpty(title))
                    continue;

                if (_config.BlubXPath != null)
                {
                    StringBuilder tmp = new StringBuilder();
                    foreach (XmlLightElement e in selectFrom.Select(_config.BlubXPath.XPath))
                    {
                        if (e.IsText)
                            tmp.Append(e.Value);
                        else
                        {
                            foreach (XmlLightElement txt in e.Select(".//text()"))
                                tmp.Append(txt.Value);
                        }
                    }
                    if (tmp.Length == 0)
                        tmp.Append(selectFrom.SelectRequiredNode(_config.BlubXPath.XPath).InnerText);
                    blurb = tmp.ToString();
                }
                DateTime dtvalue = item.Value.DateCreated;
                if (_config.DateXPath != null && selectFrom.TrySelectNode(_config.DateXPath.XPath, out found))
                {
                    DateTime contentDate;
                    string dtText = found.InnerText.Trim();
                    dtText = DateTimeClean.Replace(dtText, m => m.Value.Substring(0, m.Length - 2));

                    if (!String.IsNullOrEmpty(_config.DateXPath.DateFormat))
                    {
                        if (DateTime.TryParseExact(dtText, _config.DateXPath.DateFormat, CultureInfo.InvariantCulture,
                                               DateTimeStyles.AllowWhiteSpaces, out contentDate))
                            dtvalue = contentDate;
                        else
                            throw new FormatException("Unable to parse date/time: " + dtText);
                    }
                    else if (DateTime.TryParse(dtText, CultureInfo.InvariantCulture, DateTimeStyles.AllowWhiteSpaces, out contentDate))
                        dtvalue = contentDate;
                    else
                        throw new FormatException("Unable to parse date/time: " + dtText);
                }
                date = dtvalue.ToString("yyyy-MM-dd HH:mm:ss");

                StringWriter indexed = new StringWriter();
                indexed.WriteLine(title);
                foreach(var xpath in _config.Indexed.SafeEnumeration())
                {
                    foreach (var indexItem in selectFrom.Select(xpath.XPath))
                    {
                        string innerText = indexItem.InnerText;
                        indexed.WriteLine(innerText);
                        indexed.WriteLine(NonAlphaNum.Replace(innerText, " "));//again, removing all special characters.
                    }
                }

                if (String.IsNullOrEmpty(blurb))
                    blurb = indexed.ToString().Substring(title.Length).Trim();

                title = WhiteSpaces.Replace(TrimString(title, _config.TitlePath != null ? (uint)_config.TitlePath.MaxLength : BlurbLength), " ");
                blurb = WhiteSpaces.Replace(TrimString(blurb, _config.BlubXPath != null ? (uint)_config.BlubXPath.MaxLength : BlurbLength), " ");

                string text = indexed.ToString();

                using (TextReader rdr = new StringReader(text))
                    AddToIndex(item.Key, date, title, blurb, rdr);
            }
        }
Ejemplo n.º 26
0
        public void TestHtmlEntityRef()
        {
            string html = @"<html>
            <body attrib=""this & that ><&nbsp;&#32;!"">
                this char '<' and this one '>' and this one '&' should be encoded.  
                We encoded ' &nbsp; ' and &Atilde; and '&#32;' and '&#x20;' all by ourselves.
                This in not valid xml &#xffffffff;, nor is &#123456789;, but we still allow it.
                This entity name will pass-through &unknown; this will not &whateverthatmeans;
                and nor will these &; &#; &h; &l t; &1two; &234; &#x00fg; &#-123;.
            </body>
            </html>";
            string expect = @"<html><body attrib=""this &amp; that &gt;&lt;" + (Char)160 + @" !"">
                this char '&lt;' and this one '&gt;' and this one '&amp;' should be encoded.  
                We encoded ' &nbsp; ' and &Atilde; and '&#32;' and '&#x20;' all by ourselves.
                This in not valid xml &#xffffffff;, nor is &#123456789;, but we still allow it.
                This entity name will pass-through &unknown; this will not &amp;whateverthatmeans;
                and nor will these &amp;; &amp;#; &amp;h; &amp;l t; &amp;1two; &amp;234; &amp;#x00fg; &amp;#-123;.
            </body></html>";

            XmlLightDocument doc = new HtmlLightDocument(html);
            XmlWriterSettings settings = new XmlWriterSettings()
            {
                CheckCharacters = true,
                Indent = false,
                IndentChars = "",
                NewLineChars = "",
                NewLineHandling = NewLineHandling.None,
                OmitXmlDeclaration = true,
                CloseOutput = false
            };
            StringWriter sw = new StringWriter();
            XmlWriter wtr = XmlWriter.Create(sw, settings);
            doc.WriteXml(wtr);
            wtr.Flush();
            string xml = sw.ToString();

            Assert.AreEqual(expect, xml);
        }