コード例 #1
0
        //收集标题、作者、日期信息
        static void collectInformation(ref string HTML, ref resources res)
        {
            //title
            int indexOfTitle = HTML.IndexOf("<title>");

            if (indexOfTitle != -1 && HTML.IndexOf(" | 琉璃神社 ★ HACG</title>") != -1)
            {
                res.title = HTML.Substring(indexOfTitle + 7, HTML.IndexOf(" | 琉璃神社 ★ HACG</title>") - indexOfTitle - 7);
            }
            //datetime
            int indexOfDatetime = HTML.IndexOf("\"entry-date\" datetime=\"");

            if (indexOfDatetime != -1)
            {
                string datetime = HTML.Substring(indexOfDatetime + 23, HTML.IndexOf("\" pubdate>") - indexOfDatetime - 23);
                res.datetime = new DateTime(
                    Convert.ToInt32(datetime.Substring(0, 4)),
                    Convert.ToInt32(datetime.Substring(5, 2)),
                    Convert.ToInt32(datetime.Substring(8, 2)),
                    Convert.ToInt32(datetime.Substring(11, 2)),
                    Convert.ToInt32(datetime.Substring(14, 2)),
                    Convert.ToInt32(datetime.Substring(17, 2)));
            }
            //author
            int indexOfAuthor = HTML.IndexOf("发布的文章\" rel=\"author\">");

            if (indexOfAuthor != -1)
            {
                res.author = HTML.Substring(indexOfAuthor + 20, HTML.IndexOf("</a></span></span>") - indexOfAuthor - 20);
            }
        }
コード例 #2
0
        //保存收集到的信息和资源
        static void saveCollection(resources res)
        {
            string text = "";

            text += "index:" + res.index + "\nurl:" + res.url + "\ntitle:" + res.title + "\nanthor:" + res.author + "\ndatetime:" + res.datetime + "\n";
            if (res.numberOfMagnets != 0)
            {
                for (int i = 0; i < res.numberOfMagnets; i++)
                {
                    text += (res.magnets[i] + "\n");
                }
            }
            if (res.numberOfBaidupanLinks != 0)
            {
                for (int i = 0; i < res.numberOfBaidupanLinks; i++)
                {
                    text += res.baidupanLinks[i].link;
                    if (res.baidupanLinks[i].havePassword)
                    {
                        text += (" " + res.baidupanLinks[i].password + "\n");
                    }
                    else
                    {
                        text += "\n";
                    }
                }
            }
            text += "\n";
            FileStream file = new FileStream(fileLocation + "resourcesOfHACG.txt", FileMode.Append);

            byte[] data = System.Text.Encoding.Default.GetBytes(text);
            file.Write(data, 0, data.Length);
            file.Flush();
            file.Close();

            text = "";
            if (res.numberOfMagnets != 0)
            {
                for (int i = 0; i < res.numberOfMagnets; i++)
                {
                    text += (res.magnets[i] + "\n");
                }
            }
            FileStream file2 = new FileStream(fileLocation + "magnets.txt", FileMode.Append);

            data = System.Text.Encoding.Default.GetBytes(text);
            file2.Write(data, 0, data.Length);
            file2.Flush();
            file2.Close();
        }
コード例 #3
0
        //收集磁力链接
        static void collectMagnets(ref string HTML, ref resources res)
        {
            Regex           magnet40RE = new Regex("[^a-zA-Z0-9/\"\'-.;?\\[_=]([a-z0-9]{40}|[A-Z0-9]{40})[^a-zA-Z0-9/\"\'-.:;?\\[\\]_=]");
            Regex           magnet32RE = new Regex("[^a-zA-Z0-9/\"\'-.;?\\[_=]([a-z0-9]{32}|[A-Z0-9]{32})[^a-zA-Z0-9/\"\'-.:;?\\[\\]_=]");
            Regex           partOfMagnetRE = new Regex("[^a-zA-Z0-9/\"\'-.;?\\[_=]([A-Z0-9]{10,30}|[a-z0-9]{10,30})[^a-zA-Z0-9/\"\'-.:;?\\[\\]_=]");
            MatchCollection matches1, matches2, matches3;

            matches1 = magnet40RE.Matches(HTML);
            matches2 = magnet32RE.Matches(HTML);
            matches3 = partOfMagnetRE.Matches(HTML);
            if (matches1.Count != 0 || matches3.Count > 1)
            {
                for (int i = 0; i < matches1.Count; i++)
                {
                    if (isResourcesLink(matches1[i].ToString(), "magnet"))
                    {
                        res.numberOfMagnets++;
                        res.magnets[res.numberOfMagnets - 1] = "magnet:?xt=urn:btih:" + matches1[i].ToString().Substring(1, 40);
                    }
                }
                for (int i = 0; i < matches2.Count; i++)
                {
                    if (isResourcesLink(matches2[i].ToString(), "magnet"))
                    {
                        res.numberOfMagnets++;
                        res.magnets[res.numberOfMagnets - 1] = "magnet:?xt=urn:btih:" + matches2[i].ToString().Substring(1, 32);
                    }
                }
                for (int i = 0; i + 1 < matches3.Count;)
                {
                    int length1 = matches3[i].ToString().Length, length2 = matches3[i + 1].ToString().Length;
                    if (length1 + length2 == 40 || length1 + length2 == 32)
                    {
                        string combinedMagnet = matches3[i].ToString().Substring(1, length1 - 2) + matches3[i + 1].ToString().Substring(1, length2 - 2);
                        if (isResourcesLink(combinedMagnet, "magnet"))
                        {
                            res.numberOfMagnets++;
                            res.magnets[res.numberOfMagnets - 1] = "magnet:?xt=urn:btih:" + combinedMagnet;
                        }
                        i = i + 2;
                    }
                    else
                    {
                        i++;
                    }
                }
            }
        }
コード例 #4
0
        //收集百度盘链接
        static void collectBaidupan(ref string HTML, ref resources res)
        {
            Regex           baidupanRE = new Regex("[^a-zA-Z0-9\"\'-.;:?=\\[\\]_][a-zA-Z0-9]{8}[^a-zA-Z0-9/\"\'-.;:?=\\[\\]_]");
            Regex           passwordRE = new Regex("[^a-z0-9][a-z0-9]{4}[^a-z0-9]");
            MatchCollection matches;

            matches = baidupanRE.Matches(HTML);
            if (matches.Count != 0)
            {
                for (int i = 0; i < matches.Count; i++)
                {
                    if (!isResourcesLink(matches[i].ToString(), "baidupan"))
                    {
                        continue;
                    }
                    if (res.numberOfBaidupanLinks >= 1 && res.baidupanLinks[res.numberOfBaidupanLinks - 1].link.IndexOf(matches[i].ToString().Substring(1, 8)) >= 0)
                    {
                        continue;
                    }
                    res.numberOfBaidupanLinks++;
                    res.baidupanLinks[res.numberOfBaidupanLinks - 1].link = "http://pan.baidu.com/s/" + matches[i].ToString().Substring(1, 8);
                    string password = HTML.Substring(matches[i].Index + 9, 6);
                    if (passwordRE.IsMatch(password))
                    {
                        res.baidupanLinks[res.numberOfBaidupanLinks - 1].havePassword = true;
                        res.baidupanLinks[res.numberOfBaidupanLinks - 1].password     = password.Substring(1, 4);
                    }
                    else
                    {
                        password = HTML.Substring(matches[i].Index + 9, 20);
                        int indexOfPassword = password.IndexOf("密码");
                        if (indexOfPassword >= 0)
                        {
                            for (int j = 1; j < 3; j++)
                            {
                                if (passwordRE.IsMatch(password.Substring(indexOfPassword + j, 6)))
                                {
                                    res.baidupanLinks[res.numberOfBaidupanLinks - 1].havePassword = true;
                                    res.baidupanLinks[res.numberOfBaidupanLinks - 1].password     = password.Substring(indexOfPassword + j + 1, 4);
                                }
                            }
                        }
                    }
                }
            }
        }
コード例 #5
0
 //显示收集到的信息和资源
 static void display(ref resources res)
 {
     Console.WriteLine("title:" + res.title);
     Console.WriteLine("datetime:" + res.datetime);
     Console.WriteLine("author:" + res.author);
     for (int i = 0; i < res.numberOfMagnets; i++)
     {
         Console.WriteLine(res.magnets[i]);
     }
     for (int i = 0; i < res.numberOfBaidupanLinks; i++)
     {
         Console.Write(res.baidupanLinks[i].link);
         if (res.baidupanLinks[i].havePassword)
         {
             Console.WriteLine(" " + res.baidupanLinks[i].password);
         }
         else
         {
             Console.WriteLine();
         }
     }
     Console.WriteLine();
 }
コード例 #6
0
        static void Main(string[] args)
        {
            string HTML = "", url = "";

            if (MANUAL_INPUT)
            {
                input();
            }
            Stopwatch HTTPRequestTimer  = new Stopwatch();
            Stopwatch HTTPAnalysisTimer = new Stopwatch();
            int       retryTimes        = 0;
            int       iStatrt           = (INDEX_CUSTOMED ? INDEXS_START : START_INDEX);
            int       iEnd              = (INDEX_CUSTOMED ? INDEXS_LENGTH - 1 : END_INDEX);

            for (int i = iStatrt; i <= iEnd; i++)
            {
                int index = (INDEX_CUSTOMED ? INDEXS[i] : i);
                if (URL_CUSTOMED)
                {
                    url = URL;
                    i   = END_INDEX;
                }
                else
                {
                    url = "http://www.hacg.li/wp/" + index + ".html";
                }
                resources res = new resources();
                res.index = index;
                res.url   = url;
                if (retryTimes == 0)
                {
                    Console.WriteLine("正在加载 " + url);
                }
                try
                {
                    HTTPRequestTimer.Start();
                    HTML = GetWebClient(url);
                    HTTPRequestTimer.Stop();
                    HTTPAnalysisTimer.Start();
                    if (retryTimes > 0)
                    {
                        Console.WriteLine();
                    }
                    retryTimes = 0;
                    Console.WriteLine("加载成功");

                    collectInformation(ref HTML, ref res);
                    collectMagnets(ref HTML, ref res);
                    collectBaidupan(ref HTML, ref res);
                    display(ref res);
                    if (HTML.IndexOf("<pre>") >= 0)
                    {
                        exportCatalog(ref HTML, res.index, res.title);
                    }
                    saveCollection(res);
                    HTTPAnalysisTimer.Stop();
                }
                catch (WebException error)
                {
                    res.errorMessage = error.Message;
                    Console.WriteLine("加载失败," + error.Message);
                    if (retryTimes < MAX_RETRY_TIMES)
                    {
                        retryTimes++;
                        i--;
                        Console.Write("第" + retryTimes + "次重试...");
                    }
                    else
                    {
                        retryTimes = 0;
                        Console.WriteLine("已达最大重试次数,加载仍然失败\n");
                    }
                    //if (e.Status == WebExceptionStatus.ProtocolError)
                    //{
                    //    Console.WriteLine("Status Code : {0}", ((HttpWebResponse)error.Response).StatusCode);
                    //    Console.WriteLine("Status Description : {0}", ((HttpWebResponse)error.Response).StatusDescription);
                    //}
                }
            }
            Console.WriteLine("获取HTTP所花时间:" + HTTPRequestTimer.Elapsed.ToString() + " 即" + HTTPRequestTimer.ElapsedMilliseconds + "毫秒");
            Console.WriteLine("分析HTTP所花时间:" + HTTPAnalysisTimer.Elapsed.ToString() + " 即" + HTTPAnalysisTimer.ElapsedMilliseconds + "毫秒");
            //Console.WriteLine("分析所占百分比:" + HTTPAnalysisTimer.ElapsedMilliseconds/(HTTPRequestTimer.ElapsedMilliseconds+HTTPAnalysisTimer.ElapsedMilliseconds));
            Console.ReadKey();
        }