예제 #1
0
파일: Core.cs 프로젝트: ReinhardHsu/devfw
        /// <summary>
        /// 创建完整索引
        /// </summary>
        /// <param name="items"></param>
        public static void GenerateFullIndex(IList<Item> items)
        {
            XmlDocument xd = new XmlDocument();
            FileInfo file = new FileInfo(String.Format("{0}{1}fullindex.xml", Config.PhypicPath, Config.SavePath));
            if (file.Exists)
            {
                file.Delete();
            }

            xd.AppendChild(xd.CreateXmlDeclaration("1.0", null, null));

            XmlNode root = xd.CreateElement("root");

            Core.AppendNode(xd, root, "version", Config.Version);
            Core.AppendNode(xd, root, "modified", String.Format("{0:yyyy-MM-dd HH:mm:ss}", DateTime.Now));
            Core.AppendNode(xd, root, "seller_id", Config.Seller);
            Core.AppendNode(xd, root, "cat_url", String.Format("{0}{1}SellerCats.xml", Config.Domain, Config.SavePath));
            Core.AppendNode(xd, root, "dir", String.Format("{0}{1}items/", Config.Domain, Config.SavePath));

            XmlNode ids = xd.CreateElement("item_ids");

            MultiThreadProcess mp = new MultiThreadProcess(Config.Threads, items.Count);
            mp.Start<IList<Item>>(item =>
            {
                Item itm = item[0];

                XmlNode xn = xd.CreateElement("outer_id");

                XmlAttribute xat = xd.CreateAttribute("action");
                xat.Value = "upload";
                xn.Attributes.Append(xat);

                xn.InnerText = itm.outer_id;
                ids.AppendChild(xn);

                if (item.Count == 1)
                {
                    UploadItem(itm);
                }
                else
                {
                    Generate(itm);
                }

                item.Remove(itm);

            }, items);

            while (true)
            {
                if (!mp.IsAlive)
                {
                    root.AppendChild(ids);
                    xd.AppendChild(root);
                    xd.Save(file.FullName);
                    break;
                }
            }
        }
예제 #2
0
        /// <summary>
        /// �����б�ҳ��,���Խ��ִ�л�ִ����
        /// </summary>
        /// <param name="parameter"></param>
        private void AnalysisListPage(string pageUri, DataPackFunc func)
        {
            int taskCount = 0,
                //������
                taskNumbers = 0; //һ�����������ж������Ƿ����

            string html; //���ص��б�ҳ��Html

            int bufferLength = 1;
            byte[] buffer = new byte[bufferLength]; //���ص����ݻ�����
            StringBuilder sb = new StringBuilder(); //���췵�صĽ��
            MatchCollection listMatches; //�б��ƥ�估ҳ���ַƥ��

            #if DEBUG
            Console.WriteLine("��ʼ��:{0}��������...", pageUri);
            #endif

            //�����б�ҳ����
            HttpWebRequest request = (HttpWebRequest) WebRequest.Create(pageUri);
            request.Timeout = this.RequestTimeOut;

            Stream stream = request.GetResponse().GetResponseStream();

            using (StreamReader sr = new StreamReader(stream, this.Encode))
            {
                html = sr.ReadToEnd();
            }

            #if DEBUG
            Console.WriteLine("���ص�����Ϊ:{0}", html);
            #endif

            //�����б�ҳ����
            listMatches = Regex.Matches(html, RuleFormat.Format(this.ListBlockRule));

            //û���ҵ�ƥ��
            if (listMatches.Count == 0)
            {
            #if DEBUG
                Console.WriteLine("û�ҵ�ƥ��!");
            #endif
                return;
            }

            //����ƥ������

            #if DEBUGS
            Console.WriteLine("\r\n------------------------------\r\n�õ�ƥ����б�����Ϊ:\r\n");
            #endif

            Regex pageUriRegex = new Regex(this.FormatedPageUriRule);

            //�����ʵ�
            IList<string> pageUrls = new List<string>();

            foreach (Match m in listMatches)
            {
            #if DEBUG
                Console.WriteLine("\r\n------------------------------------------------\r\n{0}", m.Value);
            #endif
                foreach (Match pm in pageUriRegex.Matches(m.Value))
                {
            #if DEBUG
                    Console.WriteLine(pm.Value);
            #endif
                    pageUrls.Add(pm.Value);

                    //��ȡҳ�����ݣ��������ִ��������

                    //���̻߳�ȡ
                    //if (!UseSingleThread)
                    //{
                    //    new Thread(() =>
                    //    {
                    //        //���û�ִ����
                    //        GetPageData(pm.Value, ref taskNumbers, func);
                    //    }
                    //    ).Start();
                    //}
                    //else   //���̵߳���
                    //{
                    //    //���û�ִ����
                    //    GetPageData(pm.Value, ref taskNumbers, func);
                    //}
                }
            }

            //����������
            taskCount = pageUrls.Count;

            if (!this.UseMultiThread) //���߳�
            {
                foreach (string pageUrl in pageUrls)
                {
                    //���û�ִ����
                    GetPageData(pageUrl, ref taskNumbers, func);
                }
            }
            else
            {
                MultiThreadProcess mp = new MultiThreadProcess(5, taskCount);
                mp.Start<IList<string>>(urls =>
                {
                    lock (urls)
                    {
                        //���û�ִ����
                        GetPageData(urls[0], ref taskNumbers, func);
                        pageUrls.Remove(urls[0]);
                    }
                }, pageUrls);
            }

            //������������
            state.TotalCount = taskCount;

            //ֱ���߳̾�ִ����ϣ��򷵻�
            do
            {
            } while (taskNumbers != taskCount);

            #if DEBUG
            Console.WriteLine("�������....!���ɼ���{0}��", taskCount);
            #endif
        }
예제 #3
0
        /// <summary>
        /// 分析列表页面,并对结果执行回执操作
        /// </summary>
        /// <param name="parameter"></param>
        private void AnalysisListPage(string pageUri, DataPackFunc func)
        {
            int taskCount = 0,
            //任务数
                taskNumbers = 0; //一个计数用于判定任务是否完成

            string html;         //下载的列表页面Html

            int bufferLength = 1;

            byte[]          buffer = new byte[bufferLength]; //下载的数据缓冲区
            StringBuilder   sb     = new StringBuilder();    //构造返回的结果
            MatchCollection listMatches;                     //列表块匹配及页面地址匹配


#if DEBUG
            Console.WriteLine("开始从:{0}下载数据...", pageUri);
#endif


            //下载列表页内容
            HttpWebRequest request = (HttpWebRequest)WebRequest.Create(pageUri);
            request.Timeout = this.RequestTimeOut;

            Stream stream = request.GetResponse().GetResponseStream();


            using (StreamReader sr = new StreamReader(stream, this.Encode))
            {
                html = sr.ReadToEnd();
            }

#if DEBUG
            Console.WriteLine("返回的数据为:{0}", html);
#endif


            //分析列表页代码
            listMatches = Regex.Matches(html, RuleFormat.Format(this.ListBlockRule));


            //没有找到匹配
            if (listMatches.Count == 0)
            {
#if DEBUG
                Console.WriteLine("没找到匹配!");
#endif
                return;
            }


            //分析匹配数据

#if DEBUGS
            Console.WriteLine("\r\n------------------------------\r\n得到匹配的列表数据为:\r\n");
#endif

            Regex pageUriRegex = new Regex(this.FormatedPageUriRule);

            //创建词典
            IList <string> pageUrls = new List <string>();

            foreach (Match m in listMatches)
            {
#if DEBUG
                Console.WriteLine("\r\n------------------------------------------------\r\n{0}", m.Value);
#endif
                foreach (Match pm in pageUriRegex.Matches(m.Value))
                {
#if DEBUG
                    Console.WriteLine(pm.Value);
#endif
                    pageUrls.Add(pm.Value);


                    //获取页面数据,并添加已执行任务数

                    //多线程获取
                    //if (!UseSingleThread)
                    //{
                    //    new Thread(() =>
                    //    {
                    //        //调用回执方法
                    //        GetPageData(pm.Value, ref taskNumbers, func);
                    //    }
                    //    ).Start();
                    //}
                    //else   //单线程调用
                    //{
                    //    //调用回执方法
                    //    GetPageData(pm.Value, ref taskNumbers, func);
                    //}
                }
            }

            //增加任务数
            taskCount = pageUrls.Count;

            if (!this.UseMultiThread) //单线程
            {
                foreach (string pageUrl in pageUrls)
                {
                    //调用回执方法
                    GetPageData(pageUrl, ref taskNumbers, func);
                }
            }
            else
            {
                MultiThreadProcess mp = new MultiThreadProcess(5, taskCount);
                mp.Start <IList <string> >(urls =>
                {
                    lock (urls)
                    {
                        //调用回执方法
                        GetPageData(urls[0], ref taskNumbers, func);
                        pageUrls.Remove(urls[0]);
                    }
                }, pageUrls);
            }

            //设置任务总数
            state.TotalCount = taskCount;

            //直到线程均执行完毕,则返回
            do
            {
            } while (taskNumbers != taskCount);


#if DEBUG
            Console.WriteLine("任务完成....!共采集到{0}条", taskCount);
#endif
        }