Beispiel #1
0
 /// <summary>
 /// 结果列表页面解析
 /// 解析出结果记录
 /// </summary>
 /// <param name="ExtractPage"></param>
 static private void ExtractPage(string html)
 {
     try
     {
         string          strRef  = "/view/.*?.html";
         MatchCollection matches = new Regex(strRef, RegexOptions.Compiled).Matches(html);
         foreach (Match match in matches)
         {
             try
             {
                 string href = match.Value.Replace("/view/", "");
                 href = href.Replace(".html", "");
                 DocInfo fi = new DocInfo();
                 fi.DownAddress = href;
                 if (!string.IsNullOrEmpty(fi.DownAddress) && !MSSQL.IsExistDoc(fi))
                 {
                     FInfoExtract.ExtractInfo(fi);
                 }
             }
             catch (Exception)
             {
             }
         }
     }
     catch (Exception)
     {
     }
 }
Beispiel #2
0
        public static bool IsExistDoc(DocInfo doc)
        {
            GetConnStr();
            Control.CheckForIllegalCrossThreadCalls = false;
            SqlConnection conn    = new SqlConnection(strConn);
            bool          isExist = true;

            try{
                conn.Open();
                SqlCommand cmd = new SqlCommand("procSelectInfo", conn);
                cmd.CommandType = CommandType.StoredProcedure;
                cmd.Parameters.Add("@DownAddress", SqlDbType.NVarChar).Value = doc.DownAddress;
                if (cmd.ExecuteScalar() == null)
                {
                    isExist = false;
                }
            }
            catch (Exception ex) {
                isExist = false;
            }
            finally{
                conn.Close();
                conn.Dispose();
            }
            return(isExist);
        }
Beispiel #3
0
        /// <summary>
        /// 局部页面解析
        /// 提取出页面信息
        /// </summary>
        /// <param name="ExtractLinks"></param>
        /// <returns></returns>
        static public bool ExtractInfo(DocInfo fi)
        {
            bool   Flag = false;
            string urls = baidu.wi.webUrl + "view/" + fi.DownAddress + ".html";
            string html = GetWebHtml(urls);

            if (string.IsNullOrEmpty(html))
            {
                return(Flag);
            }

            //fi.DocIntro = ExtractIntro(html);
            fi.DocKeyWord = ExtractKWord(html);

            /////////////////////////////////////////
            string DOC_INFO = Seprate(html);

            if (!string.IsNullOrEmpty(DOC_INFO))
            {
                fi.DocName = ExtractFileName(DOC_INFO);
                if (!string.IsNullOrEmpty(fi.DocName))
                {
                    fi.param = ExtractParam(DOC_INFO);
                    if (baidu.HT.ContainsKey(fi.param))
                    {
                        fi.ClassID = (string)baidu.HT[fi.param];

                        fi.Money   = ExtractMoney(DOC_INFO);
                        fi.DocType = ExtractType(DOC_INFO);
                        // fi.DocSize = ExtractLength(DOC_INFO);
                        if (MSSQL.AddExtractInfo(fi))
                        {
                            baidu.filecount++;
                            Flag = true;
                        }
                    }
                    else
                    {
                        MessageBox.Show("警告:标识为" + fi.DownAddress + "的文档类别参数" + fi.param + "无法查询到!请马上查看补充!   " + string.Format("{0:G}", DateTime.Now));
                    }
                }
            }
            return(Flag);
        }
Beispiel #4
0
        /// <summary>
        /// 写入文档记录
        /// </summary>
        /// <param name="doc"></param>
        /// <returns>写入成功or失败</returns>
        public static bool AddExtractInfo(DocInfo doc)
        {
            GetConnStr();
            Control.CheckForIllegalCrossThreadCalls = false;
            SqlConnection conn = new SqlConnection(strConn);
            bool          isok = false;

            try
            {
                conn.Open();
                SqlCommand cmd = new SqlCommand("procAddExtractInfo", conn);
                cmd.CommandType = CommandType.StoredProcedure;
                cmd.Parameters.Add("@ClassID", SqlDbType.NVarChar).Value     = doc.ClassID;
                cmd.Parameters.Add("@DocName", SqlDbType.NVarChar).Value     = doc.DocName;
                cmd.Parameters.Add("@DocType", SqlDbType.NVarChar).Value     = doc.DocType;
                cmd.Parameters.Add("@DownAddress", SqlDbType.NVarChar).Value = doc.DownAddress;
                cmd.Parameters.Add("@DocIntro", SqlDbType.Text).Value        = doc.DocIntro;
                cmd.Parameters.Add("@DocKeyWord", SqlDbType.NVarChar).Value  = doc.DocKeyWord;
                cmd.Parameters.Add("@DocSize", SqlDbType.BigInt).Value       = doc.DocSize;
                cmd.Parameters.Add("@Money", SqlDbType.Int).Value            = doc.Money;
                if (cmd.ExecuteNonQuery() > 0)
                {
                    isok = true;
                }
            }
            catch (System.Exception)
            {
                //dt = DateTime.Now;
                //lock (AgentLog)
                //{
                //    AgentLog.AppendText(doc.DocName + "----导入数据库出错:" + ex.Message + "   " + string.Format("{0:G}", dt) + "\n");
                //}
                isok = false;
            }
            finally
            {
                conn.Close();
                conn.Dispose();
            }
            return(isok);
        }