Ejemplo n.º 1
0
        public void TestToString()
        {
            var e = HtmlDoc.Parse(test2);

            Assert.AreEqual(test2Formated, e.ToString());
            Assert.AreEqual(test2, e.ToString(false));
        }
Ejemplo n.º 2
0
        /// <summary>
        /// Learns a program to extract the surname from a given table row (rather than a whole document).
        /// </summary>
        public static void LearnSurnameWithRespectToTableRow()
        {
            string    s   = File.ReadAllText(@"..\..\SampleDocuments\sample-document-1.html");
            HtmlDoc   doc = HtmlDoc.Create(s);
            WebRegion referenceRegion1 = doc.GetRegion("tr:nth-child(1)");                 //1st table row
            WebRegion exampleRegion1   = doc.GetRegion("tr:nth-child(1) td:nth-child(2)"); //2nd cell in 1st table row
            WebRegion referenceRegion2 = doc.GetRegion("tr:nth-child(2)");                 //2nd table row
            WebRegion exampleRegion2   = doc.GetRegion("tr:nth-child(2) td:nth-child(2)"); //2nd cell in 2nd table row
            ExtractionExample <WebRegion> exampleSpec1 = new ExtractionExample <WebRegion>(referenceRegion1, exampleRegion1);
            ExtractionExample <WebRegion> exampleSpec2 = new ExtractionExample <WebRegion>(referenceRegion2, exampleRegion2);

            Web.Program prog = Web.Learner.Instance.LearnRegion(new[] { exampleSpec1, exampleSpec2 },
                                                                Enumerable.Empty <ExtractionExample <WebRegion> >());
            if (prog != null)
            {
                //run the program on 5th table row
                WebRegion fifthRowRegion = doc.GetRegion("tr:nth-child(5)"); //5th table row
                IEnumerable <WebRegion> executionResult = prog.Run(fifthRowRegion);
                foreach (WebRegion region in executionResult)
                {
                    Console.WriteLine("Learn surname with respect to table row: ");
                    Console.WriteLine(region.GetSpecificSelector());
                    Console.WriteLine(region.Text());
                    Console.WriteLine();
                }
            }
        }
Ejemplo n.º 3
0
        private void DownloadHtmlContent3(String _content)
        {
            HtmlDoc.LoadHtml(_content);
            HtmlNode node = HtmlDoc.DocumentNode.SelectSingleNode("//head//title");

            PageTitle = (node != null ? node.InnerText : HtmlDoc.DocumentNode.InnerText).Trim();
        }
Ejemplo n.º 4
0
        /// <summary>
        /// Learns a program to extract the surname from a given table row (rather than a whole document).
        /// </summary>
        public static void LearnSurnameWithRespectToTableRow()
        {
            string    s   = File.ReadAllText(Path.Combine(_sampleDocs, "sample-document-1.html"));
            HtmlDoc   doc = HtmlDoc.Create(s);
            WebRegion referenceRegion1 = doc.GetRegion("tr:nth-child(1)");                 //1st table row
            WebRegion exampleRegion1   = doc.GetRegion("tr:nth-child(1) td:nth-child(2)"); //2nd cell in 1st table row
            WebRegion referenceRegion2 = doc.GetRegion("tr:nth-child(2)");                 //2nd table row
            WebRegion exampleRegion2   = doc.GetRegion("tr:nth-child(2) td:nth-child(2)"); //2nd cell in 2nd table row
            CorrespondingMemberEquals <WebRegion, WebRegion> exampleSpec1 = new CorrespondingMemberEquals <WebRegion, WebRegion>(referenceRegion1, exampleRegion1);
            CorrespondingMemberEquals <WebRegion, WebRegion> exampleSpec2 = new CorrespondingMemberEquals <WebRegion, WebRegion>(referenceRegion2, exampleRegion2);

            Web.RegionProgram prog = Web.RegionLearner.Instance.Learn(new[] { exampleSpec1, exampleSpec2 });
            if (prog == null)
            {
                return;
            }
            //run the program on 5th table row
            WebRegion fifthRowRegion = doc.GetRegion("tr:nth-child(5)"); //5th table row
            WebRegion region         = prog.Run(new [] { fifthRowRegion })?.SingleOrDefault();

            Console.WriteLine("Learn surname with respect to table row: ");
            Console.WriteLine(region.GetSpecificSelector());
            Console.WriteLine(region.Text());
            Console.WriteLine();
        }
Ejemplo n.º 5
0
 private DownloadedNodes(HtmlDoc doc) : this(new[] { doc.FirstElement })
 {
     if (doc.IsEmpty) //no nodes in root
     {
         _nodes = new HtmlElement[0];
     }
 }
Ejemplo n.º 6
0
        /// <summary>
        /// 格式化习题
        /// </summary>
        /// <param name="selecter"></param>
        /// <returns></returns>
        public void formatQuestion(string color, string title, int index, int size)
        {
            this.RemoveAttr("p", "style", "class");
            this.RemoveAttr("span", "style", "class");

            var body = HtmlDoc.QuerySelector("body");

            if (body == null)
            {
                body           = HtmlDoc.CreateElement("body");
                body.InnerHtml = HtmlDoc.DocumentNode.InnerHtml;
                HtmlDoc.DocumentNode.RemoveAll();
                HtmlDoc.DocumentNode.AppendChild(body);
            }

            var css = new StringBuilder();

            css.AppendLine("body {" + $"color:{color};font-size:30px;font-weight:bold;line-height:1.5;font-family:'微软雅黑';" + "}");
            css.AppendLine("p {margin:0 0 20px 0;}");
            css.AppendLine("p:first-child {margin-bottom:50px;}");
            var style = HtmlDoc.CreateElement("style");

            style.InnerHtml = css.ToString();

            var head = HtmlDoc.QuerySelector("head");

            if (head == null)
            {
                head = HtmlDoc.CreateElement("head");
                HtmlDoc.DocumentNode.InsertBefore(head, body);
            }
            head.AppendChild(style);

            body.InnerHtml = $"<p><span style=\"color:#226cfb\">{index}</span>/{size}.{title}</p>" + body.InnerHtml;
        }
Ejemplo n.º 7
0
        /// <summary>
        /// Learns a program to extract the first surname in the document from two examples
        /// from two different documents.
        /// </summary>
        public static void LearnFirstSurnameInDocumentUsingMultipleExamples()
        {
            string    s1               = File.ReadAllText(Path.Combine(_sampleDocs, "sample-document-1.html"));
            HtmlDoc   doc1             = HtmlDoc.Create(s1);
            string    s2               = File.ReadAllText(Path.Combine(_sampleDocs, "sample-document-2.html"));
            HtmlDoc   doc2             = HtmlDoc.Create(s2);
            WebRegion referenceRegion1 = new WebRegion(doc1);
            WebRegion referenceRegion2 = new WebRegion(doc2);
            WebRegion exampleRegion1   = doc1.GetRegion("tr:nth-child(1) td:nth-child(2)"); //2nd cell in 1st table row of doc1
            WebRegion exampleRegion2   = doc2.GetRegion("tr:nth-child(1) td:nth-child(2)"); //2nd cell in 1st table row of doc2
            CorrespondingMemberEquals <WebRegion, WebRegion> exampleSpec1 = new CorrespondingMemberEquals <WebRegion, WebRegion>(referenceRegion1, exampleRegion1);
            CorrespondingMemberEquals <WebRegion, WebRegion> exampleSpec2 = new CorrespondingMemberEquals <WebRegion, WebRegion>(referenceRegion2, exampleRegion2);

            Web.RegionProgram prog = Web.RegionLearner.Instance.Learn(new[] { exampleSpec1, exampleSpec2 });
            if (prog == null)
            {
                return;
            }
            //run the program on the second document
            WebRegion region = prog.Run(new [] { referenceRegion2 })?.SingleOrDefault();

            Console.WriteLine("Learn first surname in document from multiple examples: ");
            Console.WriteLine(region.GetSpecificSelector());
            Console.WriteLine(region.Text());
            Console.WriteLine();
        }
Ejemplo n.º 8
0
        /// <summary>
        /// Learns a program and then serializes and deserializes it.
        /// </summary>
        public static void SerializeProgram()
        {
            string    s               = File.ReadAllText(Path.Combine(_sampleDocs, "sample-document-1.html"));
            HtmlDoc   doc             = HtmlDoc.Create(s);
            WebRegion referenceRegion = new WebRegion(doc);
            WebRegion exampleRegion   = doc.GetRegion("tr:nth-child(1) td:nth-child(2)"); //2nd cell in 1st table row
            CorrespondingMemberEquals <WebRegion, WebRegion> exampleSpec = new CorrespondingMemberEquals <WebRegion, WebRegion>(referenceRegion, exampleRegion);

            Web.RegionProgram prog = Web.RegionLearner.Instance.Learn(new[] { exampleSpec });
            if (prog == null)
            {
                return;
            }
            string progText = prog.Serialize();

            Web.RegionProgram       loadProg        = Web.Loader.Instance.Region.Load(progText);
            IEnumerable <WebRegion> executionResult = loadProg.Run(new[] { referenceRegion });

            Console.WriteLine("Run first surname extraction program after serialization and deserialization: ");
            foreach (WebRegion region in executionResult)
            {
                Console.WriteLine(region.GetSpecificSelector());
                Console.WriteLine(region.Text());
            }
            Console.WriteLine();
        }
Ejemplo n.º 9
0
        public void TestAttributes()
        {
            var e = HtmlDoc.Parse(test3);

            Assert.AreEqual("Content-Type", e.Attributes["http-equiv"]);
            Assert.AreEqual("text/html; charset=utf-8", e.Attributes["content"]);
        }
Ejemplo n.º 10
0
        /// <summary>
        /// Learns a program to extract the first surname in the document from two examples
        /// from two different documents.
        /// </summary>
        public static void LearnFirstSurnameInDocumentUsingMultipleExamples()
        {
            string    s1               = File.ReadAllText(@"..\..\SampleDocuments\sample-document-1.html");
            HtmlDoc   doc1             = HtmlDoc.Create(s1);
            string    s2               = File.ReadAllText(@"..\..\SampleDocuments\sample-document-2.html");
            HtmlDoc   doc2             = HtmlDoc.Create(s2);
            WebRegion referenceRegion1 = new WebRegion(doc1);
            WebRegion referenceRegion2 = new WebRegion(doc2);
            WebRegion exampleRegion1   = doc1.GetRegion("tr:nth-child(1) td:nth-child(2)"); //2nd cell in 1st table row of doc1
            WebRegion exampleRegion2   = doc2.GetRegion("tr:nth-child(1) td:nth-child(2)"); //2nd cell in 1st table row of doc2
            ExtractionExample <WebRegion> exampleSpec1 = new ExtractionExample <WebRegion>(referenceRegion1, exampleRegion1);
            ExtractionExample <WebRegion> exampleSpec2 = new ExtractionExample <WebRegion>(referenceRegion2, exampleRegion2);

            Web.Program prog = Web.Learner.Instance.LearnRegion(new[] { exampleSpec1, exampleSpec2 },
                                                                Enumerable.Empty <ExtractionExample <WebRegion> >());
            if (prog != null)
            {
                //run the program on the second document
                IEnumerable <WebRegion> executionResult = prog.Run(referenceRegion2);
                foreach (WebRegion region in executionResult)
                {
                    Console.WriteLine("Learn first surname in document from multiple examples: ");
                    Console.WriteLine(region.GetSpecificSelector());
                    Console.WriteLine(region.Text());
                    Console.WriteLine();
                }
            }
        }
Ejemplo n.º 11
0
        public void TestDescendants()
        {
            var e = HtmlDoc.Parse(test1);

            Assert.IsTrue(e.Descendants().Count() > 1);
            Assert.AreEqual(6, e.Element("ul").Descendants("li").Count());
        }
Ejemplo n.º 12
0
        private async void VisitUrlAsync()
        {
            if (!enableRefresh)
            {
                return;
            }
            if (hasInit == false)
            {
                return;
            }

            URLHTML = await MainFrm.RunBusyWork(() =>
            {
                HttpStatusCode code;
                RequestManager.Instance.RequestCount++;
                return(GetHtml(URL, out code));
            });

            if (URLHTML.Contains("尝试自动重定向") &&
                MessageBox.Show("网站提示: " + URLHTML + "\n 通常原因是网站对请求合法性做了检查, 建议填写关键字对网页内容进行自动嗅探", "提示信息",
                                MessageBoxButton.OK) == MessageBoxResult.OK)

            {
                return;
            }


            ControlExtended.SafeInvoke(() =>
            {
                HtmlDoc.LoadHtml(URLHTML);
                if (MainDescription.IsUIForm)
                {
                    var dock    = MainFrm as IDockableManager ?? ControlExtended.DockableManager;
                    var control = dock?.ViewDictionary.FirstOrDefault(d => d.Model == this);
                    if (control != null)
                    {
                        dynamic invoke = control.View;
                        if (IsSuperMode == false)
                        {
                            invoke.UpdateHtml(URLHTML);
                            OnPropertyChanged("HtmlDoc");
                        }
                        else
                        {
                            invoke.UpdateHtml("超级模式下内置浏览器不展示内容,请查看左侧的文本内容");
                        }
                    }
                }
            },
                                       name: "解析html文档");


            if (string.IsNullOrWhiteSpace(selectText) == false)
            {
                currentXPaths = HtmlDoc.SearchXPath(SelectText, () => IsAttribute).GetEnumerator();
                GetXPathAsync();
            }
            OnPropertyChanged("URLHTML");
        }
Ejemplo n.º 13
0
        private async void VisitUrlAsync()
        {
            if (!enableRefresh)
            {
                return;
            }
            if (hasInit == false)
            {
                return;
            }

            URLHTML = await MainFrm.RunBusyWork(() =>
            {
                HttpStatusCode code;
                ConfigFile.GetConfig <DataMiningConfig>().RequestCount++;
                return(GetHtml(URL, out code));
            }, title : GlobalHelper.Get("long_visit_web"));

            if (URLHTML.Contains(GlobalHelper.Get("key_671")) &&
                MessageBox.Show(GlobalHelper.Get("key_672") + URLHTML + GlobalHelper.Get("key_673"), GlobalHelper.Get("key_99"),
                                MessageBoxButton.OK) == MessageBoxResult.OK)

            {
                return;
            }


            ControlExtended.SafeInvoke(() =>
            {
                HtmlDoc.LoadHtml(URLHTML);
                if (MainDescription.IsUIForm)
                {
                    var dock    = MainFrm as IDockableManager ?? ControlExtended.DockableManager;
                    var control = dock?.ViewDictionary.FirstOrDefault(d => d.Model == this);
                    if (control != null)
                    {
                        dynamic invoke = control.View;
                        if (IsSuperMode == false)
                        {
                            invoke.UpdateHtml(URLHTML);
                            OnPropertyChanged("HtmlDoc");
                        }
                        else
                        {
                            invoke.UpdateHtml(GlobalHelper.Get("key_674"));
                        }
                    }
                }
            },
                                       name: GlobalHelper.Get("key_675"));


            if (string.IsNullOrWhiteSpace(selectText) == false)
            {
                currentXPaths = HtmlDoc.SearchXPath(SelectText, () => IsAttribute).GetEnumerator();
                GetXPathAsync();
            }
            OnPropertyChanged("URLHTML");
        }
Ejemplo n.º 14
0
        //[TestMethod]
        public void MyTestMethod()
        {
            FileStream file = new FileStream("Data/simple.htm", FileMode.Open);
            var        e    = new HtmlDoc(file).RootElement;
            var        s    = e.ToString();

            Assert.AreEqual("", s);
        }
Ejemplo n.º 15
0
        public void TestMismatch()
        {
            var e = HtmlDoc.Parse(test4);

            Assert.AreEqual(6, e.Descendants().Count());
            e = HtmlDoc.Parse(test44).Element().Element();
            Assert.AreEqual("ul", e.Name);
            //Assert.AreEqual<int>(3, e.Descendants().Count()); ///TODO: look up html tags
            //Assert.AreEqual<string>("ul", e.Descendants().Last().Name);
        }
Ejemplo n.º 16
0
        public void TestElement()
        {
            var doc = new HtmlDoc(stream).RootElement;
            HtmlElement node;
            node = doc.Element("h2");
            Assert.IsNull(node);

            node = doc.Element("hello");
            Assert.AreEqual("hello", node.Name);

            node = node.Element("h2");
            Assert.AreEqual("h2", node.Name);
        }
Ejemplo n.º 17
0
 private void Search()
 {
     if (string.IsNullOrWhiteSpace(selectText) == false)
     {
         var xpaths = HtmlDoc.SearchXPath(SelectText, () => true).ToList();
         CrawlItems.Clear();
         xpaths.Execute(d => CrawlItems.Add(new CrawlItem
         {
             XPath       = d,
             SampleData1 = HtmlDoc.DocumentNode.SelectSingleNodePlus(d, SelectorFormat.XPath).InnerText
         }));
     }
 }
Ejemplo n.º 18
0
        public void TestElement()
        {
            var         doc = new HtmlDoc(stream).RootElement;
            HtmlElement node;

            node = doc.Element("h2");
            Assert.IsNull(node);

            node = doc.Element("hello");
            Assert.AreEqual("hello", node.Name);

            node = node.Element("h2");
            Assert.AreEqual("h2", node.Name);
        }
Ejemplo n.º 19
0
        private void VerifyCssSelector(string selector, Semantic.LineInfo lineInfo)
        {
            if (string.IsNullOrEmpty(selector))
            {
                return;
            }

            HtmlDoc doc   = Config.DomFactory.Create();
            bool    valid = doc.ValidateCss(selector);

            if (!valid)
            {
                Errors.Add(new BadCssSelector(selector, lineInfo));
            }
        }
Ejemplo n.º 20
0
        private void BuildContent(PostItem post, S1PostItem item)
        {
            post.Message = post.Message ?? "";

            //work around
            post.Message = post.Message.Replace("<imgwidth=", "<img width=").Replace("\n", "");

            FillAttachment(post);

            var content =
                new HtmlDoc(string.Format("<div>{0}</div>", S1Resource.HttpUtility.HtmlDecode(post.Message)))
                    .RootElement;

            if (content != null)
                item.AddRange(SimpleParser.SimpleThreadParser.ReGroupContent(content));
        }
Ejemplo n.º 21
0
        public void TestFindElement()
        {
            FileStream file   = new FileStream("Data/simple.htm", FileMode.Open);
            var        doc    = new HtmlDoc(file).RootElement;
            var        tables = from table in doc.FindElements("table")
                                where table.Attributes["width"] == "98%"
                                where table.Attributes["cellpadding"] == "7"
                                select table;

            Assert.AreEqual(1, tables.Count());

            tables = from table in doc.FindElements()
                     where table.Name == "table"
                     select table;

            Assert.AreEqual(3, tables.Count());
        }
Ejemplo n.º 22
0
        private IDockContent GetContentFromPersistString(string persistString)
        {
            if (persistString == typeof(HelpAndExplainWindow).ToString())
            {
                return(m_helpExplorer);
            }
            else if (persistString == typeof(PropertyWindow).ToString())
            {
                return(m_propertyWindow);
            }
            else if (persistString == typeof(ToolWindow).ToString())
            {
                return(m_toolbox);
            }
            else
            {
                string[] parsedStrings = persistString.Split(new char[] { ',' });
                if (parsedStrings.Length != 3)
                {
                    return(null);
                }

                if (parsedStrings[0] != typeof(HtmlDoc).ToString())
                {
                    return(null);
                }
                // assure that we deal with just one instance only
                m_docWindow = EditorDocument;
                if (parsedStrings[1] != string.Empty)
                {
                    m_docWindow.FileName = parsedStrings[1];
                }
                if (parsedStrings[2] != string.Empty)
                {
                    m_docWindow.Text = parsedStrings[2];
                }

                if (String.IsNullOrEmpty(m_docWindow.FileName) || m_docWindow.Text.Equals("New Document"))
                {
                    ShowCommonHelp();
                }

                return(m_docWindow);
            }
        }
Ejemplo n.º 23
0
        private void GenerateXml()
        {
            FileStream sourcePage = new FileStream("Data/face.htm", FileMode.Open);
            FileStream rankPage   = new FileStream("Data/faceRank.htm", FileMode.Open);

            EmotionParser.Init(sourcePage);
            XDocument doc  = new XDocument();
            XElement  root = new XElement("Root");

            doc.Add(root);

            var ranks = new HtmlDoc(rankPage).RootElement.Descendants("img");

            foreach (var image in ranks)
            {
                XElement e = null;
                foreach (var item in EmotionParser.EmotionList)
                {
                    if (image.Attributes["src"] == item.Value.Path)
                    {
                        e = new XElement("img");
                        XAttribute id   = new XAttribute("Id", item.Value.Id);
                        XAttribute Path = new XAttribute("Path", item.Value.Path);
                        e.Add(id);
                        e.Add(Path);
                        System.Diagnostics.Debug.WriteLine(item.Value.Path);
                        break;
                    }
                }
                if (e != null)
                {
                    root.Add(e);
                }
            }
            var writer = XmlWriter.Create("emotion_list.xml",
                                          new XmlWriterSettings {
                Indent = true,
                NewLineOnAttributes = false
            });

            doc.WriteTo(writer);
            writer.Flush();

            WelcomeTitle = "Done";
        }
Ejemplo n.º 24
0
        private void BuildContent(PostItem post, S1PostItem item)
        {
            post.Message = post.Message ?? "";

            //work around
            post.Message = post.Message.Replace("<imgwidth=", "<img width=").Replace("\n", "");

            FillAttachment(post);

            var content =
                new HtmlDoc(string.Format("<div>{0}</div>", S1Resource.HttpUtility.HtmlDecode(post.Message)))
                .RootElement;

            if (content != null)
            {
                item.AddRange(SimpleParser.SimpleThreadParser.ReGroupContent(content));
            }
        }
Ejemplo n.º 25
0
        /// <summary>
        /// 移出标签
        /// </summary>
        /// <param name="tag">标签</param>
        /// <returns></returns>
        public HtmlDocHelper RemoveNode(string tag)
        {
            if (HtmlDoc == null)
            {
                return(this);
            }

            if (tag.IsEmpty())
            {
                throw new Exception("请指定参数.");
            }
            var nodes = HtmlDoc.QuerySelectorAll(tag);

            foreach (var item in nodes)
            {
                item.Remove();
            }
            return(this);
        }
Ejemplo n.º 26
0
        /// <summary>
        /// 为指定的标签添加属性
        /// </summary>
        /// <param name="tag"></param>
        /// <param name="attr"></param>
        /// <param name="attrValue"></param>
        /// <returns></returns>
        public HtmlDocHelper AddAttr(string tag, string attr, string attrValue)
        {
            if (HtmlDoc == null)
            {
                return(this);
            }

            if (tag.IsEmpty() || attr.IsEmpty())
            {
                throw new Exception("请指定参数.");
            }
            var nodes = HtmlDoc.QuerySelectorAll(tag);

            foreach (var item in nodes)
            {
                item.Attributes.Add(attr, attrValue);
            }
            return(this);
        }
Ejemplo n.º 27
0
        private static HtmlDoc GetDocument(IHttpRequestFactory factory, IHttpWire wire, out int length)
        {
            var request = CreateRequest(factory, wire);
            var bytes   = request.Download() as byte[];

            if (bytes == null)
            {
                bytes = new byte[0];
            }

            string html = string.Empty;

            length = bytes.Length;
            html   = Encoding.UTF8.GetString(bytes, 0, bytes.Length);

            HtmlDoc doc = Config.DomFactory.Create();

            doc.Load(html);
            return(doc);
        }
Ejemplo n.º 28
0
        public static async Task <string> GetVerifyString(this S1WebClient client)
        {
            string verify = "";

            //use DownloadString will just return cached data, which is not what i want
            //post dummy data to disable cache
            var privacyPage = await client.PostDataTaskAsync(new Uri(UserAction.PrivacyUrl));

            var root  = new HtmlDoc(privacyPage).RootElement;
            var input = root.FindFirst("input", (e) => e.Attributes["name"] == "verify");

            if (input != null)
            {
                verify = input.Attributes["value"];
            }
            else
            {
                throw new S1UserException(ErrorParser.Parse(root));
            }
            return(verify);
        }
Ejemplo n.º 29
0
        /// <summary>
        /// Learns a program to extract the surname from a given table row (rather than a whole document)
        /// using a negative example.
        /// </summary>
        public static void LearnSurnameWithRespectToTableRowUsingNegativeExample()
        {
            string    s   = File.ReadAllText(Path.Combine(_sampleDocs, "sample-document-1.html"));
            HtmlDoc   doc = HtmlDoc.Create(s);
            WebRegion referenceRegion1 = doc.GetRegion("tr:nth-child(1)"); //1st table row
            WebRegion referenceRegion2 = doc.GetRegion("tr:nth-child(2)"); //2nd table row
            var       posExampleSpec   = new CorrespondingMemberEquals <WebRegion, WebRegion>(referenceRegion1, doc.GetRegion("tr:nth-child(1) td:nth-child(2)"));
            var       negExampleSpec   = new CorrespondingMemberDoesNotEqual <WebRegion, WebRegion>(referenceRegion2, doc.GetRegion("tr:nth-child(2) td:nth-child(1)"));

            Web.RegionProgram prog = Web.RegionLearner.Instance.Learn(new Constraint <IEnumerable <WebRegion>, IEnumerable <WebRegion> >[] { posExampleSpec, negExampleSpec });
            if (prog == null)
            {
                return;
            }
            WebRegion region = prog.Run(new [] { referenceRegion1 })?.SingleOrDefault();

            Console.WriteLine("Learn surname with respect to table row using negative example: ");
            Console.WriteLine(region.GetSpecificSelector());
            Console.WriteLine(region.Text());
            Console.WriteLine();
        }
Ejemplo n.º 30
0
        /// <summary>
        /// Learns a program to extract the first surname in the document from one example.
        /// </summary>
        public static void LearnFirstSurnameInDocumentUsingOneExample()
        {
            string    s               = File.ReadAllText(Path.Combine(_sampleDocs, "sample-document-1.html"));
            HtmlDoc   doc             = HtmlDoc.Create(s);
            WebRegion referenceRegion = new WebRegion(doc);
            WebRegion exampleRegion   = doc.GetRegion("tr:nth-child(1) td:nth-child(2)"); //2nd cell in 1st table row
            CorrespondingMemberEquals <WebRegion, WebRegion> exampleSpec = new CorrespondingMemberEquals <WebRegion, WebRegion>(referenceRegion, exampleRegion);

            Web.RegionProgram prog = Web.RegionLearner.Instance.Learn(new[] { exampleSpec });
            if (prog == null)
            {
                return;
            }
            //run the program to extract first surname from the document
            WebRegion region = prog.Run(new [] { referenceRegion })?.SingleOrDefault();

            Console.WriteLine("Learn first surname in document from one example: ");
            Console.WriteLine(region.GetSpecificSelector());
            Console.WriteLine(region.Text());
            Console.WriteLine();
        }
Ejemplo n.º 31
0
        public async void TestServer()
        {
            Status = "Connecting";
            try{
                client = new S1WebClient();
                var result = await client.DownloadStringTaskAsync(Addr + path);

                Status = "Wrong Data";
                if (result.Length > 0)
                {
                    var root            = new HtmlDoc(result).RootElement;
                    var serverDownTitle = ServerListViewModel.ServerDownTitle;
                    if (serverDownTitle != null &&
                        root.FindFirst("title").InnerHtml.Contains(serverDownTitle))
                    {
                        Status = "Server Down";
                        if (NotifySuccess != null)
                        {
                            NotifySuccess();
                        }
                    }
                    else
                    {
                        if (NotifySuccess != null)
                        {
                            NotifySuccess();
                        }
                        Status = "Success";
                    }
                }
            }
            catch (TaskCanceledException)
            {
                Status = "Cancled";
            }
            catch (Exception)
            {
                Status = "Failed";
            }
        }
Ejemplo n.º 32
0
        /// <summary>
        /// 移出指定的标签的属性
        /// </summary>
        /// <param name="tag">标签名</param>
        /// <param name="attrs">属性</param>
        /// <returns></returns>
        public HtmlDocHelper RemoveAttr(string tag, params string[] attrs)
        {
            if (HtmlDoc == null)
            {
                return(this);
            }

            if (tag.IsEmpty() || attrs.Length == 0)
            {
                throw new Exception("请指定参数.");
            }
            var nodes = HtmlDoc.QuerySelectorAll(tag);

            foreach (var item in nodes)
            {
                foreach (var attr in attrs)
                {
                    item.Attributes.Remove(attr);
                }
            }
            return(this);
        }
Ejemplo n.º 33
0
        static void ProcessHandles()
        {
            var users = new List<UserStatus>();
            foreach (string handle in handles)
            {
                var request = WebRequest.Create(string.Format(URL, handle));
                var stream = request.GetResponse().GetResponseStream();

                string line;
                using (var reader = new StreamReader(stream))
                {
                    line = reader.ReadToEnd();
                }

                users.Add(new UserStatus(handle, line, time));
                Thread.Sleep(500);
            }

            var htmlDoc = new HtmlDoc();
            htmlDoc.AddUsersToTable(users);
               htmlDoc.WriteHtmlDoc();
        }
Ejemplo n.º 34
0
        public void TestFindElement()
        {
            FileStream file = new FileStream("Data/simple.htm", FileMode.Open);
            var doc = new HtmlDoc(file).RootElement;
            var tables = from table in doc.FindElements("table")
                        where table.Attributes["width"] == "98%"
                        where table.Attributes["cellpadding"] == "7"
                        select table;

            Assert.AreEqual(1, tables.Count());

            tables = from table in doc.FindElements()
                    where table.Name == "table"
                    select table;
            Assert.AreEqual(3, tables.Count());
        }
Ejemplo n.º 35
0
 //[TestMethod]
 public void MyTestMethod()
 {
     FileStream file = new FileStream("Data/simple.htm", FileMode.Open);
     var e = new HtmlDoc(file).RootElement;
     var s = e.ToString();
     Assert.AreEqual("", s);
 }