public static void ProcessSingle(WebInDB web) { /* * 处理一个选中的html或htm文件 * 实现是否选择的判定机制 */ if (web.Filepath != null && web.IsDeal == 0) { String curPath = web.Filepath; string selectSQL = string.Format("SELECT id, myUrl FROM crawler.dbo.fileinfo where filepath = '{0}'", curPath); SqlConnection con = Connection.instance(AppConfiguration.GetConfigValue("serverIp"), "crawler", AppConfiguration.GetConfigValue("username"), AppConfiguration.GetConfigValue("password")); SqlCommand Command = con.CreateCommand(); Command.CommandText = selectSQL; SqlDataReader reader = Command.ExecuteReader(); if (reader.Read()) { curUrl = reader[1].ToString(); con.Close(); } else { updateData(2, web.Id); cannotDealNo++; con.Close(); return; } selectSQL = string.Format("SELECT * FROM XueBa.dbo.WebPage WHERE link = '{0}'", curUrl); con = Connection.instance(AppConfiguration.GetConfigValue("serverIp"), "XueBa", AppConfiguration.GetConfigValue("username"), AppConfiguration.GetConfigValue("password")); Command = con.CreateCommand(); Command.CommandText = selectSQL; reader = Command.ExecuteReader(); string selectSQL1 = string.Format("SELECT * FROM XueBa.dbo.c705questions WHERE link like '{0}'", curUrl); //MessageBox.Show(curItem); SqlConnection con1 = Connection.instance(AppConfiguration.GetConfigValue("serverIp"), "XueBa", AppConfiguration.GetConfigValue("username"), AppConfiguration.GetConfigValue("password")); SqlCommand Command1 = con1.CreateCommand(); Command1.CommandText = selectSQL1; SqlDataReader reader1 = Command1.ExecuteReader(); if (reader.Read() || reader1.Read()) { con.Close(); con1.Close(); return; } else { dealNo++; curPath = Regex.Split(curPath, "XueBaResources")[1]; curPath = @"\\10.2.28.78\XueBaResources" + curPath; FileInfo temp = new FileInfo(curPath); if (temp.Extension.Equals(".pdf")) { //PipeLine.OtherToHtml.IcDocument document = PipeLine.OtherToHtml.FactoryDocument.GetDocoment(@"F:\ziliao\C705\Pipeline\text", curPath); PipeLine.OtherToHtml.IcDocument document = PipeLine.OtherToHtml.FactoryDocument.GetDocoment(@"\\10.2.28.78\XueBaResources", curPath); document.TransformDocument(); //ProcessProcedure.Processpdf(@"F:\ziliao\C705\Pipeline\text\" + temp.Name + ".html", true); ProcessProcedure.Processpdf(@"\\10.2.28.78\XueBaResources\" + temp.Name + ".html", true); } else { if (ProcessProcedure.baiduzhidao(curUrl)) { ProcessProcedure.baiduzhidaoprocess(curPath, curUrl); } else if (ProcessProcedure.cnblogs(curUrl)) { ProcessProcedure.cnblogsprocess(curPath, curUrl); } else if (ProcessProcedure.dewen(curUrl)) { ProcessProcedure.dewenprocess(curPath, curUrl); } else if (ProcessProcedure.stackoverflow(curUrl)) { ProcessProcedure.stackoverflowprocess(curPath, curUrl);// process stackoverflow question answer pair } else if (ProcessProcedure.sosowenwen(curUrl)) { ProcessProcedure.sosowenwenprocess(curPath, curUrl);// process sosowenwen question answer pair } else { ProcessProcedure.Process(curPath, true);//@"C:\C705\Pipeline\TestFiles\" + } } updateData(1, web.Id); } } else if (web.IsDeal == 0 && web.Filepath == null) { cannotDealNo++; string updateSQL = string.Format("update crawler.dbo.fileinfo set isDeal='{0}' where id = '{1}'", 2, web.Id); SqlConnection con = Connection.instance(AppConfiguration.GetConfigValue("serverIp"), "crawler", AppConfiguration.GetConfigValue("username"), AppConfiguration.GetConfigValue("password")); SqlCommand cmd = con.CreateCommand(); cmd.CommandText = updateSQL; cmd.ExecuteNonQuery(); con.Close(); } }
private void Process_Sig_Click(object sender, EventArgs e) { /* * 处理一个选中的html或htm文件 * 实现是否选择的判定机制 */ bool needtoInsert = true; int id; if (filelistbox.SelectedItem == null) { MessageBox.Show("Please choose a file first!", "ERROR"); return; } String curPath = this.filelistbox.SelectedItem.ToString(); string selectSQL = string.Format("SELECT id, myUrl FROM crawler.dbo.fileinfo where filepath = '{0}'", curPath); SqlConnection con = Connection.instance(AppConfiguration.GetConfigValue("serverIp"), "crawler", AppConfiguration.GetConfigValue("username"), AppConfiguration.GetConfigValue("password")); SqlCommand Command = con.CreateCommand(); Command.CommandText = selectSQL; SqlDataReader reader = Command.ExecuteReader(); if (reader.Read()) { id = reader.GetInt32(0); curUrl = reader[1].ToString(); con.Close(); } else { updateData(2, curPath); MessageBox.Show("related url can not be found. "); con.Close(); return; } selectSQL = string.Format("SELECT * FROM XueBa.dbo.WebPage WHERE link = '{0}'", curUrl); con = Connection.instance(AppConfiguration.GetConfigValue("serverIp"), "XueBa", AppConfiguration.GetConfigValue("username"), AppConfiguration.GetConfigValue("password")); Command = con.CreateCommand(); Command.CommandText = selectSQL; reader = Command.ExecuteReader(); string selectSQL1 = string.Format("SELECT * FROM XueBa.dbo.c705questions WHERE link like '{0}'", curUrl); SqlConnection con1 = Connection.instance(AppConfiguration.GetConfigValue("serverIp"), "XueBa", AppConfiguration.GetConfigValue("username"), AppConfiguration.GetConfigValue("password")); SqlCommand Command1 = con1.CreateCommand(); Command1.CommandText = selectSQL1; SqlDataReader reader1 = Command1.ExecuteReader(); if (reader.Read() || reader1.Read()) { MessageBox.Show("This page has been processed!"); needtoInsert = false; } curPath = Regex.Split(curPath, "XueBaResources")[1]; curPath = @"\\10.2.28.78\XueBaResources" + curPath; FileInfo temp = new FileInfo(curPath); if (temp.Extension.Equals(".pdf")) { //PipeLine.OtherToHtml.IcDocument document = PipeLine.OtherToHtml.FactoryDocument.GetDocoment(@"F:\ziliao\C705\Pipeline\text", curPath); PipeLine.OtherToHtml.IcDocument document = PipeLine.OtherToHtml.FactoryDocument.GetDocoment(@"\\10.2.28.78\XueBaResources", curPath); document.TransformDocument(); //ProcessProcedure.Processpdf(@"F:\ziliao\C705\Pipeline\text\" + temp.Name + ".html", true); ProcessProcedure.Processpdf(@"\\10.2.28.78\XueBaResources\" + temp.Name + ".html", needtoInsert); } else if (temp.Extension.Equals(".doc")) { try { Microsoft.Office.Interop.Word.Application app = new Microsoft.Office.Interop.Word.Application(); Microsoft.Office.Interop.Word.Document doc = null; object unknow = Type.Missing; //app.Visible = true; string str = curPath; object file = str; doc = app.Documents.Open(ref file, ref unknow, ref unknow, ref unknow, ref unknow, ref unknow, ref unknow, ref unknow, ref unknow, ref unknow, ref unknow, ref unknow, ref unknow, ref unknow, ref unknow, ref unknow); //string temp = doc.Paragraphs[1].Range.Text.Trim(); string temp1 = doc.Content.Text.Trim(); //Console.WriteLine(temp); app.Quit(ref unknow, ref unknow, ref unknow); ProcessProcedure.Processwrd(temp1, false); } catch (Exception ex) { Console.WriteLine(ex.Message); } } else { if (ProcessProcedure.baiduzhidao(curUrl)) { ProcessProcedure.baiduzhidaoprocess(curPath, curUrl); } else if (ProcessProcedure.cnblogs(curUrl)) { ProcessProcedure.cnblogsprocess(curPath, curUrl); } else if (ProcessProcedure.dewen(curUrl)) { ProcessProcedure.dewenprocess(curPath, curUrl); } else if (ProcessProcedure.stackoverflow(curUrl)) { ProcessProcedure.stackoverflowprocess(curPath, curUrl);// process stackoverflow question answer pair } else if (ProcessProcedure.sosowenwen(curUrl)) { ProcessProcedure.sosowenwenprocess(curPath, curUrl);// process sosowenwen question answer pair } else { ProcessProcedure.Process(curPath, needtoInsert);//@"C:\C705\Pipeline\TestFiles\" + } } con.Close(); con1.Close(); if (needtoInsert) { dealNo++; updateData(1, this.filelistbox.SelectedItem.ToString()); } MessageBox.Show("Processing OK!"); }