Ejemplo n.º 1
0
        /// <summary>
        /// 附件下载
        /// </summary>
        /// <param name="infoUrl"></param>
        private void AddBaseFile(string infoUrl, string strFileName, CorpPunish info)
        {
            string strFileUrl  = ToolDb.DbServerPath + "SiteManage\\Files\\Corp_Attach\\";
            string strFile     = DateTime.Now.Year.ToString() + DateTime.Now.Month.ToString() + "\\"; //新建文件夹地址
            long   lStartPos   = 0;                                                                   //返回上次下载字节
            long   lCurrentPos = 0;                                                                   //返回当前下载文件长度
            long   lDownLoadFile;                                                                     //返回当前下载文件长度

            System.IO.FileStream fs;
            long length = 0;

            if (System.IO.File.Exists(strFileUrl + strFile))
            {
                fs        = System.IO.File.OpenWrite(strFileUrl + strFile);
                lStartPos = fs.Length;
                fs.Seek(lStartPos, System.IO.SeekOrigin.Current);
            }
            else
            {
                Directory.CreateDirectory(strFileUrl + strFile);
                fs        = new FileStream(strFileUrl + strFile + strFileName, System.IO.FileMode.OpenOrCreate);
                lStartPos = 0;
            }
            try
            {
                System.Net.HttpWebRequest request = System.Net.HttpWebRequest.Create(infoUrl) as System.Net.HttpWebRequest;
                length        = request.GetResponse().ContentLength;
                lDownLoadFile = length;
                if (lStartPos > 0)
                {
                    request.AddRange((int)lStartPos);
                }
                System.IO.Stream ns     = request.GetResponse().GetResponseStream();
                byte[]           nbytes = new byte[102];
                int nReadSize           = 0;
                nReadSize = ns.Read(nbytes, 0, 102);
                while (nReadSize > 0)
                {
                    fs.Write(nbytes, 0, nReadSize);
                    nReadSize   = ns.Read(nbytes, 0, 102);
                    lCurrentPos = fs.Length;
                }
                fs.Close();
                ns.Close();
                if (length > 1024)
                {
                    BaseAttach baseInfo = ToolDb.GenBaseAttach(ToolDb.NewGuid, strFileName, info.Id, strFile + strFileName, length.ToString(), "");
                    ToolDb.SaveEntity(baseInfo, string.Empty);
                }
                else
                {
                    File.Delete(strFileUrl + strFile + strFileName);
                }
            }
            catch
            {
                fs.Close();
                File.Delete(strFileUrl + strFile + strFileName);
            }
        }
Ejemplo n.º 2
0
        /// <summary>
        /// 企业处罚信息
        /// </summary>
        /// <param name="info"></param>
        /// <param name="html"></param>
        protected void AddCorpPunish(CorpInfo info, string html)
        {
            Parser   parser   = new Parser(new Lexer(html));
            NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "xzcf")));

            if (nodeList != null && nodeList.Count > 0)
            {
                parser = new Parser(new Lexer(nodeList.ToHtml().Replace("th", "td")));
                NodeList tableNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("table"));
                if (tableNode != null && tableNode.Count > 0)
                {
                    TableTag table = tableNode[0] as TableTag;
                    for (int i = 1; i < table.RowCount; i++)
                    {
                        TableRow tr = table.Rows[i];
                        if (tr.Columns[0].ToPlainTextString().Contains("没有显示结果"))
                        {
                            break;
                        }
                        string DocNo = string.Empty, PunishType = string.Empty, GrantUnit = string.Empty, DocDate = string.Empty, PunishCtx = string.Empty, IsShow = string.Empty;
                        DocNo      = tr.Columns[1].ToNodePlainString();
                        PunishType = tr.Columns[2].ToNodePlainString();
                        GrantUnit  = tr.Columns[3].ToNodePlainString();
                        DocDate    = tr.Columns[4].ToNodePlainString();

                        CorpPunish punish = ToolDb.GenCorpPunish(info.Id, DocNo, PunishType, GrantUnit, DocDate, PunishCtx, info.Url, "0");
                        ToolDb.SaveEntity(punish, string.Empty);
                    }
                }
            }
        }
Ejemplo n.º 3
0
 /// <summary>
 /// 保存行政处罚
 /// </summary>
 /// <param name="table"></param>
 /// <param name="id"></param>
 /// <param name="url"></param>
 private void AddCorpPunish(TableTag table, string id, string url)
 {
     for (int i = 1; i < table.RowCount; i++)
     {
         string   DocNo = string.Empty, PunishType = string.Empty, GrantUnit = string.Empty, DocDate = string.Empty, PunishCtx = string.Empty, IsShow = string.Empty;
         TableRow tr = table.Rows[i];
         PunishType = tr.Columns[0].ToNodePlainString();
         PunishCtx  = tr.Columns[4].ToPlainTextString().Replace("&nbsp;", "");
         DocDate    = tr.Columns[5].ToPlainTextString().GetDateRegex();
         GrantUnit  = tr.Columns[7].ToNodePlainString();
         if (string.IsNullOrEmpty(PunishType) && string.IsNullOrEmpty(PunishCtx) && string.IsNullOrEmpty(DocDate) && string.IsNullOrEmpty(GrantUnit))
         {
             continue;
         }
         CorpPunish punish = ToolDb.GenCorpPunish(id, DocNo, PunishType, GrantUnit, DocDate, PunishCtx, url, "0");
         ToolDb.SaveEntity(punish, string.Empty);
     }
 }
Ejemplo n.º 4
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new ArrayList();
            string htl             = string.Empty;
            int    sqlCount        = 0;
            string cookiestr       = string.Empty;
            string viewState       = string.Empty;
            int    pageInt         = 1;
            string eventValidation = string.Empty;

            try
            {
                htl = ToolWeb.GetHtmlByUrl(SiteUrl, Encoding.UTF8);
            }
            catch (Exception ex)
            {
                return(list);
            }
            if (htl.Contains("RowCount"))
            {
                try
                {
                    int     index   = htl.IndexOf("RowCount");
                    string  pageStr = htl.Substring(index, htl.Length - index).Replace("RowCount", "").Replace("}", "").Replace(":", "").Replace("\"", "");
                    decimal b       = decimal.Parse(pageStr) / 10;
                    if (b.ToString().Contains("."))
                    {
                        pageInt = Convert.ToInt32(b) + 1;
                    }
                    else
                    {
                        pageInt = Convert.ToInt32(b);
                    }
                }
                catch { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        htl = ToolWeb.GetHtmlByUrl("http://www.szjs.gov.cn/build/build.ashx?_=1353579439242&menu=%E8%A1%8C%E6%94%BF%E5%A4%84%E7%BD%9A&pageSize=10&pageIndex=" + i.ToString() + "&fileOrg=&fileDate=&fileId=&unitName=&timp=", Encoding.UTF8);
                    }
                    catch { }
                }
                JavaScriptSerializer        serializer  = new JavaScriptSerializer();
                Dictionary <string, object> smsTypeJson = (Dictionary <string, object>)serializer.DeserializeObject(htl);
                foreach (KeyValuePair <string, object> obj in smsTypeJson)
                {
                    if (obj.Key != "DataList")
                    {
                        continue;
                    }
                    object[] array = (object[])obj.Value;
                    foreach (object obj2 in array)
                    {
                        Dictionary <string, object> dicSmsType = (Dictionary <string, object>)obj2;
                        string DocNo = string.Empty, PunishType = string.Empty, GrantUnit = string.Empty, DocDate = string.Empty, PunishCtx = string.Empty, GrantName = string.Empty, InfoUrl = string.Empty;
                        try
                        {
                            DocNo      = Convert.ToString(dicSmsType["FileId"]);
                            PunishType = Convert.ToString(dicSmsType["PunTypeText"]);
                            GrantUnit  = Convert.ToString(dicSmsType["UnitName"]);
                            DocDate    = Convert.ToString(dicSmsType["ServiceDate"]);
                            InfoUrl    = "http://www.szjs.gov.cn/PUNhtml/" + Convert.ToString(dicSmsType["PunDoc"]);
                            CorpPunish info = ToolDb.GenCorpPunish(string.Empty, DocNo, PunishType, GrantUnit, DocDate, PunishCtx, InfoUrl, GrantName, "1");
                            if (sqlCount <= this.MaxCount)
                            {
                                if (ToolDb.SaveEntity(info, this.ExistCompareFields))
                                {
                                    string file = Convert.ToString(dicSmsType["PunDoc"]);
                                    AddBaseFile(InfoUrl, file, info);
                                }
                                sqlCount++;
                            }
                            else
                            {
                                return(list);
                            }
                        }
                        catch { continue; }
                    }
                }
            }
            return(list);
        }
Ejemplo n.º 5
0
        /// <summary>
        /// 企业处罚信息
        /// </summary>
        /// <param name="info"></param>
        /// <param name="html"></param>
        protected void AddCorpPunish(CorpInfo info, string param, string corpType)
        {
            string url = "http://portal.szjs.gov.cn:8888/publicShow/queryPunish.html";

            string[]            postParams = new string[] { "param", "corpType", "orgCode", "page" };
            string[]            postValues = new string[] { param, corpType, info.CorpCode, "1" };
            NameValueCollection nvc        = ToolWeb.GetNameValueCollection(postParams, postValues);
            string html = string.Empty;

            try
            {
                html = ToolWeb.GetHtmlByUrl(url, nvc, Encoding.UTF8);
            }
            catch
            {
                Thread.Sleep(12 * 60 * 1000);
                try
                {
                    html = ToolWeb.GetHtmlByUrl(url, nvc, Encoding.UTF8);
                }
                catch
                {
                    Thread.Sleep(8 * 60 * 1000);
                    return;
                }
            }
            JavaScriptSerializer        java        = new JavaScriptSerializer();
            Dictionary <string, object> jsonResults = (Dictionary <string, object>)java.DeserializeObject(html);
            int pageInt = 1;

            try
            {
                pageInt = (int)jsonResults["totalPage"];
            }
            catch { }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    postValues = new string[] { param, corpType, info.CorpCode, i.ToString() };
                    nvc        = ToolWeb.GetNameValueCollection(postParams, postValues);
                    try
                    {
                        html = ToolWeb.GetHtmlByUrl(url, nvc, Encoding.UTF8);
                    }
                    catch
                    {
                        Thread.Sleep(12 * 60 * 1000);
                        try
                        {
                            html = ToolWeb.GetHtmlByUrl(url, nvc, Encoding.UTF8);
                        }
                        catch
                        {
                            Thread.Sleep(8 * 60 * 1000);
                            continue;
                        }
                    }
                    jsonResults = (Dictionary <string, object>)java.DeserializeObject(html);
                }
                object[] dicRecords = (object[])jsonResults["records"];
                foreach (object dicRecord in dicRecords)
                {
                    string DocNo = string.Empty, PunishType = string.Empty, GrantUnit = string.Empty, DocDate = string.Empty, PunishCtx = string.Empty, IsShow = string.Empty;

                    Dictionary <string, object> dic = (Dictionary <string, object>)dicRecord;
                    DocNo      = Convert.ToString(dic["file_id"]);
                    PunishType = Convert.ToString(dic["pun_type_text"]);
                    GrantUnit  = Convert.ToString(dic["file_org"]);
                    DocDate    = Convert.ToString(dic["file_date"]);

                    CorpPunish punish = ToolDb.GenCorpPunish(info.Id, DocNo, PunishType, GrantUnit, DocDate, PunishCtx, info.Url, "0");

                    ToolDb.SaveEntity(punish, string.Empty);
                }
            }
        }
Ejemplo n.º 6
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            int    count           = 1;
            IList  list            = new List <CorpPunish>();
            string htl             = string.Empty;
            string cookiestr       = string.Empty;
            string viewState       = string.Empty;
            int    pageInt         = 1;
            string eventValidation = string.Empty;

            try
            {
                htl = ToolWeb.GetHtmlByUrl(SiteUrl, Encoding.Default);
            }
            catch (Exception ex)
            {
                return(list);
            }
            Parser   parser   = new Parser(new Lexer(htl));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("a"), new HasAttributeFilter("id", "lx")));

            if (pageNode != null && pageNode.Count > 0)
            {
                try
                {
                    string temp = pageNode.GetATagHref().GetRegexBegEnd("page=", "&");
                    pageInt = int.Parse(temp);
                }
                catch
                {
                }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        htl = ToolWeb.GetHtmlByUrl(this.SiteUrl + "&page=" + i.ToString(), Encoding.Default);
                    }
                    catch
                    {
                        continue;
                    }
                }
                parser = new Parser(new Lexer(htl));
                NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "bean")));
                if (nodeList != null && nodeList.Count > 0)
                {
                    TableTag table = nodeList[0] as TableTag;
                    for (int j = 1; j < table.RowCount; j++)
                    {
                        string   DocNo = string.Empty, PunishType = string.Empty, GrantUnit = string.Empty, DocDate = string.Empty, PunishCtx = string.Empty, GrantName = string.Empty, InfoUrl = string.Empty;
                        TableRow tr = table.Rows[j];
                        DocNo      = tr.Columns[1].ToNodePlainString();
                        GrantName  = tr.Columns[2].ToNodePlainString();
                        DocDate    = tr.Columns[3].ToPlainTextString().GetDateRegex();
                        PunishType = tr.Columns[5].ToNodePlainString();
                        InfoUrl    = tr.Columns[1].GetATagHref();

                        CorpPunish info = ToolDb.GenCorpPunish(string.Empty, DocNo, PunishType, GrantUnit, DocDate, PunishCtx, InfoUrl, GrantName, "1");

                        list.Add(info);
                        if (!crawlAll && list.Count >= this.MaxCount)
                        {
                            return(list);
                        }

                        count++;
                        if (count >= 50)
                        {
                            count = 1;
                            Thread.Sleep(480000);
                        }
                    }
                }
            }
            return(list);
        }
Ejemplo n.º 7
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new ArrayList();
            string htl             = string.Empty;
            string cookiestr       = string.Empty;
            string viewState       = string.Empty;
            int    page            = 1;
            string eventValidation = string.Empty;

            try
            {
                htl = ToolWeb.GetHtmlByUrl(ToolWeb.UrlEncode(SiteUrl), Encoding.Default, ref cookiestr);
            }
            catch (Exception ex)
            {
                return(list);
            }
            Parser   parser   = new Parser(new Lexer(htl));
            NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("class", "list_page")));

            if (nodeList != null && nodeList.Count > 0)
            {
                Regex regexPage = new Regex(@"\d+页");
                page = int.Parse(regexPage.Match(nodeList.AsString()).Value.Trim(new char[] { '共', '页' }));
            }
            for (int i = 1; i <= page; i++)
            {
                if (i > 1)
                {
                    viewState       = ToolWeb.GetAspNetViewState(htl);
                    eventValidation = ToolWeb.GetAspNetEventValidation(htl);
                    NameValueCollection nvc = ToolWeb.GetNameValueCollection(new string[] {
                        "__EVENTTARGET",
                        "__EVENTARGUMENT",
                        "__VIEWSTATE",
                        "DOC_ID",
                        "CORP_NAME",
                        "APPYEAR",
                        "ucPageNumControl:gotopage",
                        "ucPageNumControl:NEXTpage"
                    }, new string[] {
                        string.Empty,
                        string.Empty,
                        viewState,
                        string.Empty,
                        string.Empty,
                        "2012",
                        (i - 2).ToString(),
                        "下一页"
                    });
                    try
                    {
                        htl = ToolWeb.GetHtmlByUrl(SiteUrl, nvc, Encoding.Default, ref cookiestr);
                    }
                    catch (Exception ex) { continue; }
                }
                parser = new Parser(new Lexer(htl));
                NodeList tableNodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "dgConstBid")));
                if (tableNodeList.Count > 0)
                {
                    TableTag table = (TableTag)tableNodeList[0];
                    for (int j = 1; j < table.RowCount; j++)
                    {
                        string   DocNo = string.Empty, PunishType = string.Empty, GrantUnit = string.Empty, DocDate = string.Empty, PunishCtx = string.Empty, GrantName = string.Empty, InfoUrl = string.Empty;
                        TableRow tr = table.Rows[j];
                        DocNo      = tr.Columns[1].ToPlainTextString().Trim();
                        PunishType = tr.Columns[5].ToPlainTextString().Trim();
                        GrantUnit  = tr.Columns[2].ToPlainTextString().Replace("&nbsp;", "").Trim();
                        DocDate    = tr.Columns[3].ToPlainTextString().Trim();
                        if (GrantUnit.Length <= 5)
                        {
                            GrantName = GrantUnit;
                            GrantUnit = "";
                        }
                        else
                        {
                            GrantName = "";
                        }
                        ATag aTag = tr.Columns[1].SearchFor(typeof(ATag), true)[0] as ATag;
                        InfoUrl = "http://61.144.226.2/PUNHTML/" + aTag.Link.Replace("GoDetail('", "").Replace("');", "").Trim();
                        string htmldetail = string.Empty;
                        try
                        {
                            htmldetail = ToolWeb.GetHtmlByUrl(ToolWeb.UrlEncode(InfoUrl), Encoding.GetEncoding("GB2312")).Replace("= 602;", "罚");
                        }
                        catch (Exception)
                        {
                            continue;
                        }
                        Parser   parserdetail = new Parser(new Lexer(htmldetail));
                        NodeList dtnode       = parserdetail.ExtractAllNodesThatMatch(new HasParentFilter(new TagNameFilter("div")));
                        PunishCtx = dtnode.AsString().Replace("=\r\n", "").Replace("&nbsp;", "").Trim();
                        PunishCtx = System.Web.HttpUtility.HtmlDecode(PunishCtx).Replace("</p>", "").Replace("\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n").Trim();
                        if (GrantUnit == "")
                        {
                            Regex regGrantUnit = new Regex(@"(工程位置|被处罚单位)(:|:)[^\r\n]+\r\n");
                            GrantUnit = regGrantUnit.Match(PunishCtx).Value.Replace("被处罚单位", "").Replace(":", "").Replace(":", "").Trim();
                        }
                        if (GrantName == "")
                        {
                            Regex regGrantName = new Regex(@"(工程位置|企业负责人)(:|:)[^\r\n]+\r\n");
                            GrantName = regGrantName.Match(PunishCtx).Value.Replace("企业负责人", "").Replace(":", "").Replace(":", "").Trim();
                        }
                        CorpPunish info = ToolDb.GenCorpPunish(string.Empty, DocNo, PunishType, GrantUnit, DocDate, PunishCtx, InfoUrl, GrantName, "1");
                        list.Add(info);
                    }
                }
            }
            return(list);
        }