public static bool GetDownloadUrl(this BookInfo book, DependencyObject dispatcher = null) { if (book.ReadBookUrl == null || book.ReadBookUrl == "") { GetReadBookUrl(book); } string source = null; if (book.IsReadAll) { source = HttpWebResponseUtility.GetHtmlByWebBrowser(book.ReadBookUrl, dispatcher); } else { source = HttpWebResponseUtility.GetHtmlByHttpWebRequest(book.ReadBookUrl); } string regexStr = "did = \"(.*?)\"[\\s\\S]*?PdgPath = \"(.*?)\"[\\s\\S]*?var str = \"(.*?)\""; Match m = Regex.Match(source, regexStr); if (!m.Success) { throw new Exception("获取STR时出现错误!"); } book.did = m.Groups[1].Value; book.PdgPath = m.Groups[2].Value; String str = m.Groups[3].Value; if (!book.IsReadAll) { book.DownloadUrlTemp = String.Format(unreadableDownloadUrl, str, "{0}"); string pid = null; try { pid = BookDataBase.GetInstance().GetBookPID(book.DXID); } catch { return(false); } string regex = @"img\d*/(.*?)/"; m = Regex.Match(str, regex); if (pid != null && m.Success) { book.DownloadUrl = book.DownloadUrlTemp.Replace(m.Groups[1].Value, pid); } else { return(false); } } else { book.DownloadUrlTemp = String.Format(readableDownloadUrl, str, "{0}"); book.DownloadUrl = book.DownloadUrlTemp; } return(true); }
public static BookContentNode GetBookContent(String did, String codepdgpath) { String host = DataExtraction.GetPdgHost(did); String pdgpath = DecodePdgPath(codepdgpath); String path = "http://" + host + "/" + pdgpath + "BookContents.dat"; Stream stream = HttpWebResponseUtility.CreateGetHttpResponse(path); stream.Read(new byte[0x28], 0, 0x28); StreamReader reader = new StreamReader(ZipUnCode(stream), Encoding.Default); BookContentNode root = new BookContentNode() { Parent = null }; root.Bookcontent = new BookContentInfo() { Title = "root", Lever = "", Page = 0, Reserved = null, Type = 0 }; BookContentNode curnode = root; while (!reader.EndOfStream) { BookContentNode newnode = new BookContentNode(); String str = reader.ReadLine().Trim(); String[] strlist = str.Split('|'); newnode.Bookcontent = new BookContentInfo() { Title = strlist[0].Trim(), Lever = strlist[1].Trim(), Page = Int32.Parse(strlist[2].Trim()), Reserved = strlist[3].Trim(), Type = Int32.Parse(strlist[4].Trim()) }; int newIndentation = newnode.Bookcontent.Lever.Length; int curIndentation = curnode.Bookcontent.Lever.Length; if (newIndentation <= curIndentation) { curnode = curnode.Parent; for (int i = newIndentation; i != curIndentation; i += 2) { curnode = curnode.Parent; } } newnode.Parent = curnode; curnode.Children.Add(newnode); curnode = newnode; } return(root); }
public static String GetPdgHost(String did) { String source = HttpWebResponseUtility.GetHtmlByHttpWebRequest(PdgPathSever); string regexStr = String.Format("<td>{0}</td><td>(.*?)</td>", did); Match m = Regex.Match(source, regexStr); if (m.Success) { return(m.Groups[1].Value); } else { return(null); } }
private static BookInfo GetBriefBookInfo(this GroupCollection gc) { BookInfo book = new BookInfo(); book.DetailInfoUrl = gc[1].Value; book.SmallCoverImage = HttpWebResponseUtility.GetImage(gc[2].Value); book.DXID = gc[3].Value; book.SSID = gc[4].Value; book.Title = RemoveHTMLTab(gc[5].Value); book.ReadBookUrlTemp = Setting.platformHost[Setting.platformIndex] + gc[6].Value; book.IsReadAll = book.ReadBookUrlTemp.Contains("gobaoku"); book.BriefInfo = RemoveHTMLTab(gc[7].Value.Replace(" ", " ").Replace("<br>", "\n")); book.PagesNum = GetPageNum(book.BriefInfo); return(book); }
public static void AddNewBook(this BookList booklist, String keyword, int page) { String url = String.Format(Setting.searchUrl, UrlEncode(keyword), page); String regexStr = "url\" value=\"(.*?)\"[\\s\\S]*?封面 src='(.*?)'[\\s\\S]*?dxid\" value=\"(\\d*)[\\s\\S]*?ssid\" value=\"(\\d*)[\\s\\S]*?《(.*?)》</a>[\\s\\S]*?(/(?:gobaoku.jsp|readDetail.jsp).*?)\"[\\s\\S]*?(作者.*)"; String html = HttpWebResponseUtility.GetHtmlByHttpWebRequest(url); MatchCollection m = Regex.Matches(html, regexStr); m.AsParallel(); booklist.GetMoreable = html.Contains("下一页"); string[] s = new string[10]; Match[] ms = new Match[m.Count]; m.CopyTo(ms, 0); Parallel.ForEach(ms, match => { booklist.addBook(GetBriefBookInfo(match.Groups)); }); }
private bool DownloadPage(string url, string path) { System.Drawing.Image image = HttpWebResponseUtility.GetImage(url); if (image == null) { return(false); } Bitmap bitmap = new Bitmap(image); System.Drawing.Size size = bitmap.Size; if (size.Height == 1 && size.Width == 1) { return(false); } removeMark(bitmap); bitmap.Save(path); return(true); }
public static void GetReadBookUrl(this BookInfo book) { if (!book.IsReadAll) { book.ReadBookUrl = book.ReadBookUrlTemp; return; } string source = HttpWebResponseUtility.GetHtmlByHttpWebRequest(book.ReadBookUrlTemp); string regexStr = @"window\.location\.href='(.*?)'"; Match m = Regex.Match(source, regexStr); if (m.Success) { book.ReadBookUrl = m.Groups[1].Value; } else { throw new Exception("获取在线读地址时出现错误!"); } }
public static void GetDetailInfo(this BookInfo book) { if (book.isDetail) { return; } String coveReg = "(http://cover.*?)\""; string source = HttpWebResponseUtility.GetHtmlByHttpWebRequest(book.DetailInfoUrl); Match m = Regex.Match(source, coveReg); if (m.Success) { book.BigCoverImage = HttpWebResponseUtility.GetImage(m.Groups[1].Value); book.isDetail = true; } String regexStr = "<p>([\\s\\S]*?)</p>"; m = Regex.Match(source, regexStr); while (m.Success) { book.DetailInfo += DataExtraction.RemoveHTMLTab(m.Groups[1].Value).Replace("\n", "").Replace("\r", "").Replace("\t", "").Replace(" ", "").Replace(">", "->") + "\n"; m = m.NextMatch(); } }