Beispiel #1
0
        internal void PushQueueAsDocInfo()
        {
            ProducerBasic producerBasic =
                new ProducerBasic(RabbitMQManager.GetRabbitMQServer(ConfigDocMan.KeyRabbitMqWaitDl),
                                  ConfigDocMan.QueueDocInfo);
            DocManAdapter docManAdapter = new DocManAdapter();
            DataTable     tbl           = docManAdapter.GetUrls();

            foreach (DataRow row in tbl.Rows)
            {
                producerBasic.PublishString(Newtonsoft.Json.JsonConvert.SerializeObject(new JobResetDocInfo()
                {
                    Url = Convert.ToString(row["Url"]),
                    Id  = Convert.ToInt64(row["Id"])
                }));
            }
        }
Beispiel #2
0
        private static void Main(string[] args)
        {
            string str = "1. PushLinkParse. 2. RunWorkerProcess. 3.PushDocInfo. 4.WorkerDocInfo";

            Console.WriteLine(str);
            int a = Convert.ToInt32(Console.ReadLine());

            switch (a)
            {
            case 1: Test t = new Test();
                t.PushQueueAs();
                break;

            case 3:
                Test t1 = new Test();
                t1.PushQueueAsDocInfo();
                break;

            case 2:
                var v = new WorkerCrawler();
                v.StartConsume();
                break;

            case 4:
                var v1 = new WorkerDocInfo();
                v1.StartConsume();
                break;
            }
            return;

            //var v = new WorkerCrawler();
            //v.StartConsume();
            //return;



            ////    Test t = new Test();
            ////    t.PushQueueAs();
            DocManAdapter docManAdapter = new DocManAdapter();
            string        url           = @"http://moj.gov.vn/vbpq/Lists/Vn%20bn%20php%20lut/View_Detail.aspx?ItemID=30517";

            url = @"http://moj.gov.vn/vbpq/Lists/Vn%20bn%20php%20lut/View_Detail.aspx?ItemID=6527";
            //url = @"http://moj.gov.vn/vbpq/Lists/Vn%20bn%20php%20lut/View_Detail.aspx?ItemID=20516#Chuong_I";

            url = @"http://moj.gov.vn/vbpq/Pages/View_Propertes.aspx?ItemID=8070";

            string       html         = System.Web.HttpUtility.HtmlDecode(GABIZ.Base.HtmlUrl.HTMLTransmitter.getHTML(url, 42, 2));
            HtmlDocument htmlDocument = new HtmlDocument();

            html = Common.ChuanHoaTextOfHtml(html);
            htmlDocument.LoadHtml(html);

            htmlDocument.DocumentNode.Descendants()
            .Where(n => n.Name == "script" || n.Name == "style")
            .ToList()
            .ForEach(n => n.Remove());

            Documet document = new Documet();

            //string urlInfo = @"http://moj.gov.vn/vbpq/Pages/View_Propertes.aspx?ItemID=3001";
            //HtmlDocument htmlDocumentInfo = new HtmlDocument();
            //htmlDocumentInfo.LoadHtml(GABIZ.Base.HtmlUrl.HTMLTransmitter.getHTML(url, 42, 2));

            ParserData p       = new ParserData();
            DocInfo    docInfo = new DocInfo();
            //p.Parse(ref document, htmlDocument, url);
            DocInfo di = p.ParseInfoDoc(htmlDocument, url);

            Console.Write(di);
            if (document.IsValidData())
            {
                docManAdapter.InsertData(document);
            }
        }