private void MainWindow_Load(object sender, EventArgs e)
        {
            // For test and presentation
            //添加在爬虫下载文件夹下的所有htm或html文件

            /*
             * Regex re = new Regex(@"(?:\.htm|\.html)$", RegexOptions.IgnoreCase);
             *
             * DirectoryInfo TheFolder=new DirectoryInfo(_filepath);
             * foreach (FileInfo NextFile in TheFolder.GetFiles())
             * {
             *  if (re.IsMatch(NextFile.Name))
             *  {
             *      this.filelistbox.Items.Add(NextFile.Name);
             *  }
             * }
             *
             * this.totalfilebox.Text = this.filelistbox.Items.Count.ToString();
             * */
            //DB OPERATION
            //提取Crawler在数据库中存放的网页
            //和上面一样将网页链接存放在listbox中
            //此时修改Process方法,因为本地没有文件,应直接对Denoising实例化对象的_rawdata赋数据库中的值
            //原来的代码不要删掉,注释掉就可以
            string selectSQL = string.Format("SELECT id, myUrl, filepath, isDeal FROM crawler.dbo.fileinfo ORDER BY filepath");
            //MessageBox.Show(selectSQL);
            SqlConnection con     = Connection.instance(AppConfiguration.GetConfigValue("serverIp"), "crawler", AppConfiguration.GetConfigValue("username"), AppConfiguration.GetConfigValue("password"));
            SqlCommand    Command = con.CreateCommand();

            Command.CommandText = selectSQL;
            SqlDataReader reader = Command.ExecuteReader();

            while (reader.Read())
            {
                //  reader[0] : id, reader[1]:url, reader[2]:filepath, reader[3]:isDeal
                WebInDB widb = new WebInDB((int)reader[0], (string)reader[1], (string)reader[2], (int)reader[3]);
                _web.Add(widb);
                this.filelistbox.Items.Add((string)reader[2]);
            }
            this.totalfilebox.Text = this.filelistbox.Items.Count.ToString();
            con.Close();
        }
        public static void ProcessSingle(WebInDB web)
        {
            /*
             * 处理一个选中的html或htm文件
             * 实现是否选择的判定机制
             */
            if (web.Filepath != null && web.IsDeal == 0)
            {
                String        curPath   = web.Filepath;
                string        selectSQL = string.Format("SELECT id, myUrl FROM crawler.dbo.fileinfo where filepath = '{0}'", curPath);
                SqlConnection con       = Connection.instance(AppConfiguration.GetConfigValue("serverIp"), "crawler", AppConfiguration.GetConfigValue("username"), AppConfiguration.GetConfigValue("password"));
                SqlCommand    Command   = con.CreateCommand();
                Command.CommandText = selectSQL;
                SqlDataReader reader = Command.ExecuteReader();
                if (reader.Read())
                {
                    curUrl = reader[1].ToString();
                    con.Close();
                }
                else
                {
                    updateData(2, web.Id);
                    cannotDealNo++;
                    con.Close();
                    return;
                }

                selectSQL           = string.Format("SELECT * FROM XueBa.dbo.WebPage WHERE link = '{0}'", curUrl);
                con                 = Connection.instance(AppConfiguration.GetConfigValue("serverIp"), "XueBa", AppConfiguration.GetConfigValue("username"), AppConfiguration.GetConfigValue("password"));
                Command             = con.CreateCommand();
                Command.CommandText = selectSQL;
                reader              = Command.ExecuteReader();

                string selectSQL1 = string.Format("SELECT * FROM XueBa.dbo.c705questions WHERE link like '{0}'", curUrl);
                //MessageBox.Show(curItem);
                SqlConnection con1     = Connection.instance(AppConfiguration.GetConfigValue("serverIp"), "XueBa", AppConfiguration.GetConfigValue("username"), AppConfiguration.GetConfigValue("password"));
                SqlCommand    Command1 = con1.CreateCommand();
                Command1.CommandText = selectSQL1;
                SqlDataReader reader1 = Command1.ExecuteReader();
                if (reader.Read() || reader1.Read())
                {
                    con.Close();
                    con1.Close();
                    return;
                }
                else
                {
                    dealNo++;
                    curPath = Regex.Split(curPath, "XueBaResources")[1];
                    curPath = @"\\10.2.28.78\XueBaResources" + curPath;
                    FileInfo temp = new FileInfo(curPath);
                    if (temp.Extension.Equals(".pdf"))
                    {
                        //PipeLine.OtherToHtml.IcDocument document = PipeLine.OtherToHtml.FactoryDocument.GetDocoment(@"F:\ziliao\C705\Pipeline\text", curPath);
                        PipeLine.OtherToHtml.IcDocument document = PipeLine.OtherToHtml.FactoryDocument.GetDocoment(@"\\10.2.28.78\XueBaResources", curPath);
                        document.TransformDocument();
                        //ProcessProcedure.Processpdf(@"F:\ziliao\C705\Pipeline\text\" + temp.Name + ".html", true);
                        ProcessProcedure.Processpdf(@"\\10.2.28.78\XueBaResources\" + temp.Name + ".html", true);
                    }
                    else
                    {
                        if (ProcessProcedure.baiduzhidao(curUrl))
                        {
                            ProcessProcedure.baiduzhidaoprocess(curPath, curUrl);
                        }
                        else if (ProcessProcedure.cnblogs(curUrl))
                        {
                            ProcessProcedure.cnblogsprocess(curPath, curUrl);
                        }
                        else if (ProcessProcedure.dewen(curUrl))
                        {
                            ProcessProcedure.dewenprocess(curPath, curUrl);
                        }
                        else if (ProcessProcedure.stackoverflow(curUrl))
                        {
                            ProcessProcedure.stackoverflowprocess(curPath, curUrl);//  process stackoverflow question answer pair
                        }
                        else if (ProcessProcedure.sosowenwen(curUrl))
                        {
                            ProcessProcedure.sosowenwenprocess(curPath, curUrl);//  process sosowenwen question answer pair
                        }
                        else
                        {
                            ProcessProcedure.Process(curPath, true);//@"C:\C705\Pipeline\TestFiles\" +
                        }
                    }
                    updateData(1, web.Id);
                }
            }
            else if (web.IsDeal == 0 && web.Filepath == null)
            {
                cannotDealNo++;
                string        updateSQL = string.Format("update crawler.dbo.fileinfo set isDeal='{0}' where id = '{1}'", 2, web.Id);
                SqlConnection con       = Connection.instance(AppConfiguration.GetConfigValue("serverIp"), "crawler", AppConfiguration.GetConfigValue("username"), AppConfiguration.GetConfigValue("password"));
                SqlCommand    cmd       = con.CreateCommand();
                cmd.CommandText = updateSQL;
                cmd.ExecuteNonQuery();
                con.Close();
            }
        }