public void Write2DB(UrlElem oUrlElem)
        {
            SqlCommand    oSqlChk     = new SqlCommand("select * from UrlData where url = '" + oUrlElem.szUrl + "'", oSqlCon);
            SqlDataReader oDataReader = oSqlChk.ExecuteReader();
            bool          k           = oDataReader.Read();

            oDataReader.Close();
            if (k)
            {
                return;
            }
            SqlCommand oSqlCmd = new SqlCommand("insert UrlData(url,title,html) values('" + oUrlElem.szUrl + "','" +
                                                oUrlElem.szTitle + "','" + " " + "')", oSqlCon);

            //oUrlElem.szTitle + "','" + oUrlElem.szText + "')", oSqlCon);
            oSqlCmd.ExecuteNonQuery();
        }
 public void Write2DB(UrlElem oUrlElem)
 {
     SqlCommand oSqlChk = new SqlCommand("select * from UrlData where url = '" + oUrlElem.szUrl + "'", oSqlCon);
     SqlDataReader oDataReader = oSqlChk.ExecuteReader();
     bool k = oDataReader.Read();
     oDataReader.Close();
     if (k)
     {
         return;
     }
     SqlCommand oSqlCmd = new SqlCommand("insert UrlData(url,title,html) values('" + oUrlElem.szUrl + "','" +
         oUrlElem.szTitle + "','" + " " + "')", oSqlCon);
         //oUrlElem.szTitle + "','" + oUrlElem.szText + "')", oSqlCon);
     oSqlCmd.ExecuteNonQuery();
 }
        private void AnalyzeContent(string szPageUrl, string szContent)
        {
            string title;
            string link;
            Uri oUri = new Uri(szPageUrl);
            Regex oLinkReg = new Regex("<a(\\s+.+?\\s+|\\s+)href\\s*=\\s*\"?(.*?)[\"|>]", RegexOptions.IgnoreCase | RegexOptions.Compiled);
            Regex oTitleReg = new Regex("<title>((.|\\s)*?)</title>", RegexOptions.Compiled | RegexOptions.IgnoreCase);
            MatchCollection oMatchCol = oLinkReg.Matches(szContent);
            Match oMatchTitle = oTitleReg.Match(szContent);
            title = oMatchTitle.Groups[1].Value;
            title = title.Trim();
            //
            UrlElem urlElem = new UrlElem(title, szPageUrl, szContent.Replace("\r\n", " ").Replace('\'','\"'));
            s_oElemEnQueSema.WaitOne();
            s_oUrlElemQue.Enqueue(urlElem);
            s_oElemDeQueSema.Release();
            //
            s_oUrlElemList.Add(urlElem);
            foreach (Match omatch in oMatchCol)
            {
                link = omatch.Groups[2].Value;
                link = link.Trim();

                if (link.Length < 1)
                {
                    continue;
                }

                // inner link
                if (link.StartsWith("#"))
                {
                    continue;
                }

                // Email Address
                if (link.IndexOf("mailto:") != -1)
                {
                    continue;
                }

                // to be varified
                if (link.ToLower().IndexOf("javascript") != -1)
                {
                    continue;
                }

                if (link.IndexOf("://") == -1)
                {
                    if (link.StartsWith("/"))
                    {
                        // 转换成绝对路径
                        link = "http://" + oUri.Host + ":"
                          + oUri.Port + link;
                    }
                    else
                    {
                        String file = oUri.AbsoluteUri;
                        if (file.IndexOf('/') == -1)
                        {
                            // 处理相对地址
                            link = "http://" + oUri.Host + ":"
                              + oUri.Port + "/" + link;
                        }
                        else
                        {
                            String path = file.Substring(0,
                              file.LastIndexOf('/') + 1);
                            //link = "http://" + oUri.Host + ":"
                            // + oUri.Port + path + link;
                            link = path + link;
                        }
                    }
                }

                int index;
                if ((index = link.IndexOf('#')) != -1)
                {
                    link = link.Substring(0, index);
                }

                if ((link = FormatURL(link)) == null)
                {
                    continue;
                }

                if (s_oURLSet.Contains(link))
                {
                    continue;
                }
                s_oURLSet.Add(link);

                // EnQueue
                s_oUrlEnQueSema.WaitOne();
                s_oURLQue.Enqueue(link);
                s_oUrlDeQueSema.Release();
            }
        }
        private void AnalyzeContent(string szPageUrl, string szContent)
        {
            string          title;
            string          link;
            Uri             oUri        = new Uri(szPageUrl);
            Regex           oLinkReg    = new Regex("<a(\\s+.+?\\s+|\\s+)href\\s*=\\s*\"?(.*?)[\"|>]", RegexOptions.IgnoreCase | RegexOptions.Compiled);
            Regex           oTitleReg   = new Regex("<title>((.|\\s)*?)</title>", RegexOptions.Compiled | RegexOptions.IgnoreCase);
            MatchCollection oMatchCol   = oLinkReg.Matches(szContent);
            Match           oMatchTitle = oTitleReg.Match(szContent);

            title = oMatchTitle.Groups[1].Value;
            title = title.Trim();
            //
            UrlElem urlElem = new UrlElem(title, szPageUrl, szContent.Replace("\r\n", " ").Replace('\'', '\"'));

            s_oElemEnQueSema.WaitOne();
            s_oUrlElemQue.Enqueue(urlElem);
            s_oElemDeQueSema.Release();
            //
            s_oUrlElemList.Add(urlElem);
            foreach (Match omatch in oMatchCol)
            {
                link = omatch.Groups[2].Value;
                link = link.Trim();

                if (link.Length < 1)
                {
                    continue;
                }

                // inner link
                if (link.StartsWith("#"))
                {
                    continue;
                }

                // Email Address
                if (link.IndexOf("mailto:") != -1)
                {
                    continue;
                }

                // to be varified
                if (link.ToLower().IndexOf("javascript") != -1)
                {
                    continue;
                }

                if (link.IndexOf("://") == -1)
                {
                    if (link.StartsWith("/"))
                    {
                        // 转换成绝对路径
                        link = "http://" + oUri.Host + ":"
                               + oUri.Port + link;
                    }
                    else
                    {
                        String file = oUri.AbsoluteUri;
                        if (file.IndexOf('/') == -1)
                        {
                            // 处理相对地址
                            link = "http://" + oUri.Host + ":"
                                   + oUri.Port + "/" + link;
                        }
                        else
                        {
                            String path = file.Substring(0,
                                                         file.LastIndexOf('/') + 1);
                            //link = "http://" + oUri.Host + ":"
                            // + oUri.Port + path + link;
                            link = path + link;
                        }
                    }
                }

                int index;
                if ((index = link.IndexOf('#')) != -1)
                {
                    link = link.Substring(0, index);
                }

                if ((link = FormatURL(link)) == null)
                {
                    continue;
                }

                if (s_oURLSet.Contains(link))
                {
                    continue;
                }
                s_oURLSet.Add(link);

                // EnQueue
                s_oUrlEnQueSema.WaitOne();
                s_oURLQue.Enqueue(link);
                s_oUrlDeQueSema.Release();
            }
        }