public void downloadDocument(string parmuri, int parmlevel) { HashSet<string> linkset = new HashSet<string>(); HashSet<string> uriset = new HashSet<string>(); System.Net.WebClient client = new System.Net.WebClient(); try { if (allUris.Count > maxCount) { return; } byte[] bytes = client.DownloadData(parmuri); string sHtml = System.Text.Encoding.GetEncoding("utf-8").GetString(bytes); int charindex = sHtml.IndexOf("charset="); if (charindex > 0) { int endindex = sHtml.IndexOf("\"",charindex); string coding = sHtml.Substring(charindex + 8, endindex - charindex - 8); if (!coding.Equals("utf8", StringComparison.OrdinalIgnoreCase) && !coding.Equals("utf-8", StringComparison.OrdinalIgnoreCase)) { sHtml = System.Text.Encoding.GetEncoding(coding).GetString(bytes); } } uriset = GetImageUrlList(sHtml); linkset = Utils.GetHrefUrlList(sHtml); //linkset.RemoveWhere(isNotStartHost); uriset.ExceptWith(allUris); allUris.UnionWith(uriset); Console.WriteLine(" ****************\n\nurisetCount:" + uriset.Count); if (uriset.Count > 0) { downloadImageThread dlthread = new downloadImageThread(uriset, parmuri,this); uriList.AddLast(dlthread); //ImageDownload imageDownload = new ImageDownload(uriset, parmuri); //imageDownload.startThread(); //succCount += imageDownload.getSuccessCount(); } linkset.ExceptWith(allLinks); allLinks.UnionWith(linkset); Console.WriteLine(" ****************\n\nlinksetCount:" + linkset.Count); if (parmlevel == 0 || allUris.Count >= maxCount) { return; } else if(parmlevel < 0 || parmlevel >20) { parmlevel = 20; } parmlevel--; foreach(string link in linkset){ String uri = link; if (!uri.StartsWith("http://", StringComparison.OrdinalIgnoreCase)) { uri = Utils.getHost(parmuri) + link; } if (!isNotStartHost(uri)) { downloadDocument(uri, parmlevel); } } } catch (Exception ex) { } }
public void DownloadImageManager() { int count = 0; int thcount = 5; downloadImageThread[] dlImageths = new downloadImageThread[thcount]; while (!isfinishDocument || uriList.Count > 0) { if (uriList.Count > 0) { for (int i = 0; i < thcount; i++) { if (dlImageths[i] == null || dlImageths[i].isfinish) { lock (uriList) { dlImageths[i] = uriList.ElementAt(0); uriList.RemoveFirst(); dlImageths[i].startThread(); } break; } } } if (succCount > maxCount) { break; } } while (true) { count = 0; for (int i = 0; i < thcount; i++) { if (dlImageths[i] == null || dlImageths[i].isfinish) { count++; } } if (count == thcount) { break; } } }