Example #1
0
        public void Initialize()
        {
            data.Log("->Inicializando Vinculos Originales");

            InitializeWorkers();
            data.Status = State.Working;

            int fcount = 0;

            for (int c = 0; c < data.org_links.Count; c++)
            {
                try {
                    data.Log("->VO: " + data.org_links[c]);
                    Page page = new Page(ref data, new URL(data.org_links[c]), c, true);
                    page.Proccess();

                    if (page.result.HasFlag(Result.Fail))
                    {
                        data.UpdateStatus(page, UrlStatus.Failed);
                        fcount++;
                    }
                    else
                    {
                        data.UpdateStatus(page, UrlStatus.ToDo);
                    }
                } catch (Exception) {
                    data.AddFailed(data.org_links[c]);
                }
            }

            if (fcount == data.org_links.Count)
            {
                Thread.Sleep(1000);
                data.Status = State.Iddle;
            }
        }
Example #2
0
        Result Download()
        {
            //TODO Get request configs by preferences
            data.Log("--->PAGE Iniciando: " + org_url.str);

            WebHeaderCollection headers = new WebHeaderCollection {
                { HttpRequestHeader.AcceptLanguage, data.accept_lang }
            };

            HttpWebRequest request = WebRequest.CreateHttp(org_url.str);

            request.Headers                      = headers;
            request.Method                       = "GET";
            request.Accept                       = "text/html,application/xhtml+xml,application/xml;q=0.9,image/*,*/*;q=0.8";
            request.UserAgent                    = data.user_agent;
            request.AllowAutoRedirect            = true;
            request.MaximumAutomaticRedirections = 10;
            request.Timeout                      = data.timeout_html;

            data.Log("--->PAGE Descargando: " + org_url.str);

            HttpWebResponse response = (HttpWebResponse)request.GetResponse();

            final_url = new URL(response.ResponseUri.OriginalString);

            foreach (string rgx in data.links_regex)
            {
                if (Regex.IsMatch(final_url.str, rgx))
                {
                    return(Result.Exists);
                }
            }

            //HTML?
            if (!(response.ContentType.Contains("text/html") ||
                  response.ContentType.Contains("application/")))
            {
                response.Close();
                return(Result.IsAsset);
            }

            //SIZE
            if (response.ContentLength > data.max_size_html && data.max_size_html > 0)
            {
                response.Close();
                return(Result.Fail);
            }


            //FREE?
            StatusReport status = data.CheckLink(final_url);

            switch (status.url_status)
            {
            case UrlStatus.Iprg:
            case UrlStatus.ToDo:
            case UrlStatus.Saved:
ok_exists:
                response.Close();

                final_url = status.page.final_url;
                file      = status.page.file;
                filename  = status.page.file;

                return(Result.Ok | Result.Exists);

            case UrlStatus.Failed:
                response.Close();
                return(Result.Fail | Result.Exists);

            case UrlStatus.Free:
                lock (data)
                    if (!data.UpdateStatus(this, UrlStatus.Iprg))
                    {
                        goto ok_exists;
                    }

                str_resp = new StreamReader(response.GetResponseStream()).ReadToEnd();
                response.Close();
                MakeFullPath();
                return(Result.Ok);
            }
            return(Result.Fail);
        }
Example #3
0
        Result Download()
        {
            HttpWebRequest request = WebRequest.CreateHttp(org_url.str);

            request.Method                       = "GET";
            request.Accept                       = "*/*";
            request.UserAgent                    = data.user_agent;
            request.AllowAutoRedirect            = true;
            request.MaximumAutomaticRedirections = 10;
            request.Timeout                      = 5000;

            data.Log("--->ASSET Descargando: " + org_url.str);

            HttpWebResponse response = (HttpWebResponse)request.GetResponse();

            final_url = new URL(response.ResponseUri.OriginalString);

            StatusReport report = data.CheckAsset(final_url);

            switch (report.url_status)
            {
            case UrlStatus.Failed:
                return(Result.Fail | Result.Exists);

            case UrlStatus.Free:
                data.UpdateStatus(this, UrlStatus.Iprg);

                if (!MakeFullPath())
                {
                    return(Result.Exists);
                }

                Stream rs = response.GetResponseStream();

                if (in_string)
                {
                    str_resp = new StreamReader(rs).ReadToEnd();
                    File.WriteAllText(file, str_resp);
                }
                else
                {
                    FileStream fs = File.OpenWrite(file);

                    byte[] buffer = new byte[response.ContentLength];
                    long   copied = 0;

                    //TODO Make this bufer HD I/O friendly
                    while (copied < response.ContentLength)
                    {
                        int readed = rs.Read(buffer, 0, buffer.Length);
                        fs.Write(buffer, 0, readed);

                        copied += readed;
                    }

                    fs.Close();
                }

                response.Close();
                return(Result.Ok);

            case UrlStatus.Iprg:
            case UrlStatus.Saved:
                return(Result.Exists);

            default:
                return(Result.Exists);
            }
        }
Example #4
0
        void LinkProc(HtmlNode node)
        {
            string link = node.GetAttributeValue("href", null);

            if (link != null)
            {
                link.Trim();
            }
            else
            {
                return;
            }

            //Links Regex
            foreach (string rgx in data.links_regex)
            {
                if (Regex.IsMatch(link, rgx))
                {
                    return;
                }
            }

            URL url_link;

            if (URL.IsRelative(link))
            {
                url_link = new URL(page.final_url.str, link);
            }
            else
            {
                url_link = new URL(link);
            }

            //Its same page?
            if (link == page.final_url.url_main.file)
            {
                node.SetAttributeValue("href", page.filename);
                return;
            }

            //Its internal?
            if (link.StartsWith("#"))
            {
                return;
            }

            //Nav Direction
            URL.NavType nt = url_link.Compare(data.org_links[page.org_link]);

            if (nt == URL.NavType.Same)
            {
                StatusReport sr = data.CheckLink(url_link);
                if (sr.page != null)
                {
                    node.SetAttributeValue("href", sr.page.PathFromRelative(page.final_url));
                }
                return;
            }

            switch (data.nav_direction)
            {
            case NavDirection.In:
                if (nt != URL.NavType.In &&
                    !(data.hnav && nt == URL.NavType.Side))
                {
                    return;
                }
                break;

            case NavDirection.Out:
                if (nt != URL.NavType.Out &&
                    !(data.hnav && nt == URL.NavType.Side))
                {
                    return;
                }
                break;

            case NavDirection.Static:
                if (nt != URL.NavType.Side)
                {
                    return;
                }
                break;

            case NavDirection.Both:
                if (!data.hnav && nt == URL.NavType.Side)
                {
                    break;
                }
                return;
            }

            //Its already dowloaded?
            StatusReport report = data.CheckLink(url_link);

            //FOR DOWNLOAD
            if (report.url_status == UrlStatus.Free)
            {
                data.Log("-->PROC: Link Pending");

                Page link_page = new Page(ref data, url_link, page.org_link);
                link_page.Proccess();

                switch (link_page.result)
                {
                case Result.Ok:
                    string final_link = link_page.PathFromRelative(page.final_url);
                    node.SetAttributeValue("href", final_link);

                    data.UpdateStatus(link_page, UrlStatus.ToDo);
                    break;

                case Result.IsAsset:
                    //TODO Do something
                    break;

                case Result.Fail:
                    data.UpdateStatus(link_page, UrlStatus.Failed);
                    break;
                }

                //DOWNLOADED
            }
            else if (report.url_status != UrlStatus.Failed)
            {
                //TODO
                string final_link = report.page.PathFromRelative(page.final_url);
                node.SetAttributeValue("href", final_link);
            }
        }