public void Initialize() { data.Log("->Inicializando Vinculos Originales"); InitializeWorkers(); data.Status = State.Working; int fcount = 0; for (int c = 0; c < data.org_links.Count; c++) { try { data.Log("->VO: " + data.org_links[c]); Page page = new Page(ref data, new URL(data.org_links[c]), c, true); page.Proccess(); if (page.result.HasFlag(Result.Fail)) { data.UpdateStatus(page, UrlStatus.Failed); fcount++; } else { data.UpdateStatus(page, UrlStatus.ToDo); } } catch (Exception) { data.AddFailed(data.org_links[c]); } } if (fcount == data.org_links.Count) { Thread.Sleep(1000); data.Status = State.Iddle; } }
Result Download() { //TODO Get request configs by preferences data.Log("--->PAGE Iniciando: " + org_url.str); WebHeaderCollection headers = new WebHeaderCollection { { HttpRequestHeader.AcceptLanguage, data.accept_lang } }; HttpWebRequest request = WebRequest.CreateHttp(org_url.str); request.Headers = headers; request.Method = "GET"; request.Accept = "text/html,application/xhtml+xml,application/xml;q=0.9,image/*,*/*;q=0.8"; request.UserAgent = data.user_agent; request.AllowAutoRedirect = true; request.MaximumAutomaticRedirections = 10; request.Timeout = data.timeout_html; data.Log("--->PAGE Descargando: " + org_url.str); HttpWebResponse response = (HttpWebResponse)request.GetResponse(); final_url = new URL(response.ResponseUri.OriginalString); foreach (string rgx in data.links_regex) { if (Regex.IsMatch(final_url.str, rgx)) { return(Result.Exists); } } //HTML? if (!(response.ContentType.Contains("text/html") || response.ContentType.Contains("application/"))) { response.Close(); return(Result.IsAsset); } //SIZE if (response.ContentLength > data.max_size_html && data.max_size_html > 0) { response.Close(); return(Result.Fail); } //FREE? StatusReport status = data.CheckLink(final_url); switch (status.url_status) { case UrlStatus.Iprg: case UrlStatus.ToDo: case UrlStatus.Saved: ok_exists: response.Close(); final_url = status.page.final_url; file = status.page.file; filename = status.page.file; return(Result.Ok | Result.Exists); case UrlStatus.Failed: response.Close(); return(Result.Fail | Result.Exists); case UrlStatus.Free: lock (data) if (!data.UpdateStatus(this, UrlStatus.Iprg)) { goto ok_exists; } str_resp = new StreamReader(response.GetResponseStream()).ReadToEnd(); response.Close(); MakeFullPath(); return(Result.Ok); } return(Result.Fail); }
Result Download() { HttpWebRequest request = WebRequest.CreateHttp(org_url.str); request.Method = "GET"; request.Accept = "*/*"; request.UserAgent = data.user_agent; request.AllowAutoRedirect = true; request.MaximumAutomaticRedirections = 10; request.Timeout = 5000; data.Log("--->ASSET Descargando: " + org_url.str); HttpWebResponse response = (HttpWebResponse)request.GetResponse(); final_url = new URL(response.ResponseUri.OriginalString); StatusReport report = data.CheckAsset(final_url); switch (report.url_status) { case UrlStatus.Failed: return(Result.Fail | Result.Exists); case UrlStatus.Free: data.UpdateStatus(this, UrlStatus.Iprg); if (!MakeFullPath()) { return(Result.Exists); } Stream rs = response.GetResponseStream(); if (in_string) { str_resp = new StreamReader(rs).ReadToEnd(); File.WriteAllText(file, str_resp); } else { FileStream fs = File.OpenWrite(file); byte[] buffer = new byte[response.ContentLength]; long copied = 0; //TODO Make this bufer HD I/O friendly while (copied < response.ContentLength) { int readed = rs.Read(buffer, 0, buffer.Length); fs.Write(buffer, 0, readed); copied += readed; } fs.Close(); } response.Close(); return(Result.Ok); case UrlStatus.Iprg: case UrlStatus.Saved: return(Result.Exists); default: return(Result.Exists); } }
void LinkProc(HtmlNode node) { string link = node.GetAttributeValue("href", null); if (link != null) { link.Trim(); } else { return; } //Links Regex foreach (string rgx in data.links_regex) { if (Regex.IsMatch(link, rgx)) { return; } } URL url_link; if (URL.IsRelative(link)) { url_link = new URL(page.final_url.str, link); } else { url_link = new URL(link); } //Its same page? if (link == page.final_url.url_main.file) { node.SetAttributeValue("href", page.filename); return; } //Its internal? if (link.StartsWith("#")) { return; } //Nav Direction URL.NavType nt = url_link.Compare(data.org_links[page.org_link]); if (nt == URL.NavType.Same) { StatusReport sr = data.CheckLink(url_link); if (sr.page != null) { node.SetAttributeValue("href", sr.page.PathFromRelative(page.final_url)); } return; } switch (data.nav_direction) { case NavDirection.In: if (nt != URL.NavType.In && !(data.hnav && nt == URL.NavType.Side)) { return; } break; case NavDirection.Out: if (nt != URL.NavType.Out && !(data.hnav && nt == URL.NavType.Side)) { return; } break; case NavDirection.Static: if (nt != URL.NavType.Side) { return; } break; case NavDirection.Both: if (!data.hnav && nt == URL.NavType.Side) { break; } return; } //Its already dowloaded? StatusReport report = data.CheckLink(url_link); //FOR DOWNLOAD if (report.url_status == UrlStatus.Free) { data.Log("-->PROC: Link Pending"); Page link_page = new Page(ref data, url_link, page.org_link); link_page.Proccess(); switch (link_page.result) { case Result.Ok: string final_link = link_page.PathFromRelative(page.final_url); node.SetAttributeValue("href", final_link); data.UpdateStatus(link_page, UrlStatus.ToDo); break; case Result.IsAsset: //TODO Do something break; case Result.Fail: data.UpdateStatus(link_page, UrlStatus.Failed); break; } //DOWNLOADED } else if (report.url_status != UrlStatus.Failed) { //TODO string final_link = report.page.PathFromRelative(page.final_url); node.SetAttributeValue("href", final_link); } }