public DocInfo ParseInfoDoc(HtmlDocument doc, string url) { DocInfo info = new DocInfo(); var trNodes = doc.DocumentNode.SelectNodes("//table[@class='tableproperties']//tr"); if (trNodes != null) { foreach (var VARIABLE in trNodes) { var tdNodes = VARIABLE.SelectNodes("./td"); if (tdNodes != null) { for (int i = 0; i < tdNodes.Count; i++) { if (tdNodes[i].Attributes.Contains("class") && tdNodes[i].GetAttributeValue("class", "") == "headerproperties") { if (tdNodes[i + 1].GetAttributeValue("class", "") == "contentproperties") { string file = tdNodes[i].InnerText.Trim().ToLower(); string properties = tdNodes[i + 1].InnerText.Trim(); ParseData(file, properties, info); } } } } } return(info); } else { return(null); } }
public void UpdateDocData(DocInfo docInfo) { string query = @"Update Documents Set DateEnable = @DateEnable, DateNoEnable = @DateNoEnable, DatePost = @DatePost, DatePublish = @DatePublish, DateUsed = @DateUsed, DocNumber = @DocNumber, DocRef = @DocRef, Enable = @Enable, PartNoEnable = @PartNoEnable, ReasonNoEnable = @ReasonNoEnable, Source = @Source, TypeDoc = @TypeDoc, DocFrom = @DocFrom, Scope = @Scope, LastUpdate = Getdate() Where Id = @Id"; this.sqlDb.RunQuery(query, CommandType.Text, new SqlParameter[] { SqlDb.CreateParamteterSQL("Id", docInfo.Id, SqlDbType.BigInt), SqlDb.CreateParamteterSQL("DateEnable", docInfo.DateEnable, SqlDbType.NVarChar), SqlDb.CreateParamteterSQL("DateNoEnable", docInfo.DateNoEnable, SqlDbType.NVarChar), SqlDb.CreateParamteterSQL("DatePost", docInfo.DatePost, SqlDbType.NVarChar), SqlDb.CreateParamteterSQL("DatePublish", docInfo.DatePublish, SqlDbType.NVarChar), SqlDb.CreateParamteterSQL("DateUsed", docInfo.DateUsed, SqlDbType.NVarChar), SqlDb.CreateParamteterSQL("DocNumber", docInfo.DocNumber, SqlDbType.NVarChar), SqlDb.CreateParamteterSQL("DocRef", docInfo.DocRef, SqlDbType.NVarChar), SqlDb.CreateParamteterSQL("Enable", docInfo.Enable, SqlDbType.NVarChar), SqlDb.CreateParamteterSQL("PartNoEnable", docInfo.PartNoEnable, SqlDbType.NVarChar), SqlDb.CreateParamteterSQL("ReasonNoEnable", docInfo.ReasonNoEnable, SqlDbType.NVarChar), SqlDb.CreateParamteterSQL("Source", docInfo.Source, SqlDbType.NVarChar), SqlDb.CreateParamteterSQL("TypeDoc", docInfo.TypeDoc, SqlDbType.NVarChar), SqlDb.CreateParamteterSQL("DocFrom", docInfo.DocFrom, SqlDbType.NVarChar), SqlDb.CreateParamteterSQL("Scope", docInfo.Scope, SqlDbType.NVarChar), }); }
private static void Main(string[] args) { string str = "1. PushLinkParse. 2. RunWorkerProcess. 3.PushDocInfo. 4.WorkerDocInfo"; Console.WriteLine(str); int a = Convert.ToInt32(Console.ReadLine()); switch (a) { case 1: Test t = new Test(); t.PushQueueAs(); break; case 3: Test t1 = new Test(); t1.PushQueueAsDocInfo(); break; case 2: var v = new WorkerCrawler(); v.StartConsume(); break; case 4: var v1 = new WorkerDocInfo(); v1.StartConsume(); break; } return; //var v = new WorkerCrawler(); //v.StartConsume(); //return; //// Test t = new Test(); //// t.PushQueueAs(); DocManAdapter docManAdapter = new DocManAdapter(); string url = @"http://moj.gov.vn/vbpq/Lists/Vn%20bn%20php%20lut/View_Detail.aspx?ItemID=30517"; url = @"http://moj.gov.vn/vbpq/Lists/Vn%20bn%20php%20lut/View_Detail.aspx?ItemID=6527"; //url = @"http://moj.gov.vn/vbpq/Lists/Vn%20bn%20php%20lut/View_Detail.aspx?ItemID=20516#Chuong_I"; url = @"http://moj.gov.vn/vbpq/Pages/View_Propertes.aspx?ItemID=8070"; string html = System.Web.HttpUtility.HtmlDecode(GABIZ.Base.HtmlUrl.HTMLTransmitter.getHTML(url, 42, 2)); HtmlDocument htmlDocument = new HtmlDocument(); html = Common.ChuanHoaTextOfHtml(html); htmlDocument.LoadHtml(html); htmlDocument.DocumentNode.Descendants() .Where(n => n.Name == "script" || n.Name == "style") .ToList() .ForEach(n => n.Remove()); Documet document = new Documet(); //string urlInfo = @"http://moj.gov.vn/vbpq/Pages/View_Propertes.aspx?ItemID=3001"; //HtmlDocument htmlDocumentInfo = new HtmlDocument(); //htmlDocumentInfo.LoadHtml(GABIZ.Base.HtmlUrl.HTMLTransmitter.getHTML(url, 42, 2)); ParserData p = new ParserData(); DocInfo docInfo = new DocInfo(); //p.Parse(ref document, htmlDocument, url); DocInfo di = p.ParseInfoDoc(htmlDocument, url); Console.Write(di); if (document.IsValidData()) { docManAdapter.InsertData(document); } }
public void ParseData(string nameProperties, string valueProperties, DocInfo docinfo) { if (nameProperties == "số, ký hiệu") { docinfo.DocNumber = valueProperties; } else if (nameProperties == "ngày ban hành") { docinfo.DatePublish = valueProperties; } else if (nameProperties == "loại văn bản") { docinfo.TypeDoc = valueProperties; } else if (nameProperties == "nguồn trích") { docinfo.Source = valueProperties; } else if (nameProperties == "phạm vi") { docinfo.Scope = valueProperties; } else if (nameProperties == "ngày đăng công báo") { docinfo.DatePost = valueProperties; } else if (nameProperties == "tình trạng hiệu lực") { docinfo.Enable = valueProperties; } else if (nameProperties == "ngày có hiệu lực") { docinfo.DateEnable = valueProperties; } else if (nameProperties == "ngày hết hiệu lực") { docinfo.DateNoEnable = valueProperties; } else if (nameProperties == "lí do hết hiệu lực") { docinfo.ReasonNoEnable = valueProperties; } else if (nameProperties == "phần hết hiệu lực") { docinfo.PartNoEnable = valueProperties; } else if (nameProperties == "ngày áp dụng") { docinfo.DateUsed = valueProperties; } else if (nameProperties == "văn bản dẫn chiếu") { docinfo.DocRef = valueProperties; } else if (nameProperties == "văn bản căn cứ") { docinfo.DocFrom = valueProperties; } else if (nameProperties == "văn bản bị thay thế") { docinfo.DocIsReplate = valueProperties; } else if (nameProperties == "văn bản bị sửa đổi bổ sung") { docinfo.DocIsAlter = valueProperties; } else { logInfo.Info(nameProperties); } }