/**************************************************************************/

        public static async Task <string> GetMimeTypeOfUrl(MacroscopeJobMaster JobMaster, Uri TargetUri)
        {
            MacroscopeHttpTwoClient         Client   = JobMaster.GetHttpClient();
            MacroscopeHttpTwoClientResponse Response = null;
            string MimeType = null;

            try
            {
                Response = await Client.Head(TargetUri, ConfigureHeadRequestHeadersCallback, PostProcessRequestHttpHeadersCallback);

                if (Response != null)
                {
                    MimeType = Response.GetMimeType().ToString();
                }
            }
            catch (MacroscopeDocumentException ex)
            {
                DebugMsgStatic(string.Format("MacroscopeDocumentException: {0}", ex.Message));
                DebugMsgStatic(string.Format("MacroscopeDocumentException: {0}", TargetUri.ToString()));
            }
            catch (Exception ex)
            {
                DebugMsgStatic(string.Format("Exception: {0}", ex.Message));
                DebugMsgStatic(string.Format("Exception: {0}", TargetUri.ToString()));
            }

            return(MimeType);
        }
Ejemplo n.º 2
0
        public async Task TestHttpTwoClientGet()
        {
            MacroscopeHttpTwoClient Client = new MacroscopeHttpTwoClient();
            List <Uri> UrlList             = new List <Uri>();

            UrlList.Add(new Uri("https://nazuke.github.io/robots.txt"));

            foreach (Uri Url in UrlList)
            {
                this.DebugMsg(string.Format("Url: {0}", Url));

                MacroscopeHttpTwoClientResponse ClientResponse = await Client.Get(
                    Url,
                    this.PreProcessHeadRequestHeadersCallback,
                    this.PostProcessRequestHttpHeadersCallback
                    );

                HttpResponseMessage Response = ClientResponse.GetResponse();

                this.DebugMsg(string.Format("Response.Version: {0}", Response.Version));

                Assert.AreEqual(200, (int)Response.StatusCode);

                Assert.Greater(ClientResponse.GetContentAsString().Length, 0);
            }

            return;
        }
        /** Execute Head Request **************************************************/

        private void ConfigureRequestHeadersAddCookieHeaders(HttpRequestMessage Request)
        {
            CookieContainer BiscuitTin = MacroscopeHttpTwoClient.GetCookieMonster();
            string          Biscuit    = BiscuitTin.GetCookieHeader(uri: Request.RequestUri);

            Request.Headers.Add(name: "Cookie", value: Biscuit);

            return;
        }
Ejemplo n.º 4
0
        /**************************************************************************/

        public MacroscopeRobots()
        {
            this.SuppressDebugMsg = true;

            this.RobotSquad = new Dictionary <string, Robots>(8);

            this.BadRobots = new Dictionary <Uri, bool>(8);

            this.Client = new MacroscopeHttpTwoClient();
        }
        /**************************************************************************/

        public async Task <Image> LoadImageFromUri(MacroscopeJobMaster JobMaster, Uri TargetUri)
        {
            MacroscopeHttpTwoClient         Client   = JobMaster.GetHttpClient();
            MacroscopeHttpTwoClientResponse Response = null;
            Image LoadedImage = null;

            try
            {
                Response = await Client.Get(
                    TargetUri,
                    this.ConfigureHeadRequestHeadersCallback,
                    this.PostProcessRequestHttpHeadersCallback
                    );
            }
            catch (MacroscopeDocumentException ex)
            {
                this.DebugMsg(string.Format("MacroscopeDocumentException: {0}", ex.Message));
                this.DebugMsg(string.Format("MacroscopeDocumentException: {0}", TargetUri.ToString()));
            }
            catch (Exception ex)
            {
                this.DebugMsg(string.Format("Exception: {0}", ex.Message));
                this.DebugMsg(string.Format("Exception: {0}", TargetUri.ToString()));
            }

            if (Response != null)
            {
                try
                {
                    string ImageFilename = Path.GetTempFileName();
                    byte[] ByteData      = Response.GetContentAsBytes();

                    using (FileStream ImageStream = File.Create(ImageFilename))
                    {
                        foreach (byte b in ByteData)
                        {
                            ImageStream.WriteByte(b);
                        }
                        ImageStream.Close();
                    }

                    if (File.Exists(ImageFilename))
                    {
                        TemporaryFiles.Add(ImageFilename);
                        LoadedImage = Image.FromFile(ImageFilename);
                    }
                }
                catch (Exception ex)
                {
                    this.DebugMsg(string.Format("Exception: {0}", ex.Message));
                }
            }

            return(LoadedImage);
        }
Ejemplo n.º 6
0
        /** -------------------------------------------------------------------- **/

        private async Task <bool> _ExecuteHeadCheck()
        {
            bool IsAvailableCheck = false;
            MacroscopeHttpTwoClient         Client         = this.MsJobMaster.GetHttpClient();
            MacroscopeHttpTwoClientResponse ClientResponse = null;
            Uri DocUri = null;

            try
            {
                DocUri = new Uri(this.Url);

                ClientResponse = await Client.Head(
                    DocUri,
                    this.ConfigureHeadRequestHeadersCallback,
                    this.PostProcessRequestHttpHeadersCallback
                    );
            }
            catch (MacroscopeDocumentException ex)
            {
                this.DebugMsg(string.Format("_ExecuteHeadCheck :: MacroscopeDocumentException: {0}", ex.Message));
            }
            catch (Exception ex)
            {
                this.DebugMsg(string.Format("_ExecuteHeadCheck :: Exception: {0}", ex.Message));
            }

            if (ClientResponse != null)
            {
                try
                {
                    this.DebugMsg(string.Format("StatusCode: {0}", ClientResponse.GetResponse().StatusCode));
                    if (ClientResponse.GetResponse() != null)
                    {
                        if (ClientResponse.GetResponse().StatusCode == HttpStatusCode.OK)
                        {
                            IsAvailableCheck = true;
                        }
                    }
                    else
                    {
                        throw new MacroscopeDocumentException("Bad Response in _ExecuteHeadCheck");
                    }
                }
                catch (Exception ex)
                {
                    this.DebugMsg(string.Format("_ExecuteHeadCheck :: Exception: {0}", ex.Message));
                }

                this.ProcessResponseHttpHeaders(Response: ClientResponse);
            }

            return(IsAvailableCheck);
        }
Ejemplo n.º 7
0
        public async Task TestAnalyzeRedirectChains()
        {
            MacroscopeHttpTwoClient                 HttpClient = new MacroscopeHttpTwoClient();
            MacroscopeRedirectChainAnalysis         Analyzer   = new MacroscopeRedirectChainAnalysis(Client: HttpClient);
            List <MacroscopeRedirectChainDocStruct> AnalyzedRedirectChain;

            MacroscopePreferencesManager.SetRedirectChainsMaxHops(Max: 100);

            AnalyzedRedirectChain = await Analyzer.AnalyzeRedirectChains(
                StatusCode : HttpStatusCode.Redirect,
                StartUrl : string.Format("https://httpbin.org/redirect/{0}", MaxHops),
                RedirectUrl : string.Format("https://httpbin.org/redirect/{0}", MaxHops - 1)
                );

            this.DebugMsg(string.Format("AnalyzedRedirectChain: {0}", AnalyzedRedirectChain.GetHashCode()));

            Assert.AreEqual(MaxHops + 1, AnalyzedRedirectChain.Count);
        }
Ejemplo n.º 8
0
        /** -------------------------------------------------------------------- **/

        private async Task <byte[]> _LoadMemoryStreamFromUrl(MacroscopeJobMaster JobMaster, Uri TargetUri)
        {
            MacroscopeHttpTwoClient         Client   = JobMaster.GetHttpClient();
            MacroscopeHttpTwoClientResponse Response = null;

            byte[] ByteData = null;

            try
            {
                Response = await Client.Get(
                    TargetUri,
                    this.ConfigureHeadRequestHeadersCallback,
                    this.PostProcessRequestHttpHeadersCallback
                    );
            }
            catch (MacroscopeDocumentException ex)
            {
                this.DebugMsg(string.Format("MacroscopeDocumentException: {0}", ex.Message));
                this.DebugMsg(string.Format("MacroscopeDocumentException: {0}", TargetUri.ToString()));
            }
            catch (Exception ex)
            {
                this.DebugMsg(string.Format("Exception: {0}", ex.Message));
                this.DebugMsg(string.Format("Exception: {0}", TargetUri.ToString()));
            }

            if (Response != null)
            {
                try
                {
                    ByteData = Response.GetContentAsBytes();
                }
                catch (Exception ex)
                {
                    this.DebugMsg(string.Format("Exception: {0}", ex.Message));
                }
            }
            else
            {
                this.DebugMsg("NULL");
            }

            return(ByteData);
        }
        /**************************************************************************/

        public async Task <bool> PhoneHome()
        {
            bool NewVersionAvailable          = false;
            MacroscopeHttpUrlLoader UrlLoader = new MacroscopeHttpUrlLoader();
            MacroscopeHttpTwoClient Client    = new MacroscopeHttpTwoClient();
            Uri TargetUri = new Uri(MacroscopeConstants.CheckForUpdateUrl);

            byte[] Data = await UrlLoader.LoadImmediateDataFromUrl(Client : Client, TargetUri : TargetUri);

            string PublishedVersion = System.Text.Encoding.UTF8.GetString(Data);
            string CurrentVersion   = Macroscope.GetVersion();
            bool   CheckResult      = this.IsVersionNewer(CurrentVersion: CurrentVersion, CompareVersion: PublishedVersion);

            if (CheckResult)
            {
                NewVersionAvailable = true;
            }

            return(NewVersionAvailable);
        }
        /**************************************************************************/

        private void ProcessResponseHttpHeaders(MacroscopeHttpTwoClientResponse Response)
        {
            HttpResponseMessage ResponseMessage = Response.GetResponse();
            HttpResponseHeaders ResponseHeaders = ResponseMessage.Headers;
            HttpContentHeaders  ContentHeaders  = ResponseMessage.Content.Headers;

            /** Status Code ------------------------------------------------------ **/

            this.SetStatusCode(ResponseMessage.StatusCode);

            this.SetErrorCondition(ResponseMessage.ReasonPhrase);

            try
            {
                switch (this.GetStatusCode())
                {
                // 200 Range

                case HttpStatusCode.OK:
                    this.SetIsNotRedirect();
                    break;

                // 300 Range

                case HttpStatusCode.Moved:
                    this.SetErrorCondition(HttpStatusCode.Moved.ToString());
                    this.SetIsRedirect();
                    break;

                case HttpStatusCode.SeeOther:
                    this.SetErrorCondition(HttpStatusCode.SeeOther.ToString());
                    this.SetIsRedirect();
                    break;

                case HttpStatusCode.Found:
                    this.SetErrorCondition(HttpStatusCode.Redirect.ToString());
                    this.SetIsRedirect();
                    break;

                // 400 Range

                case HttpStatusCode.BadRequest:
                    this.SetErrorCondition(HttpStatusCode.BadRequest.ToString());
                    this.SetIsNotRedirect();
                    break;

                case HttpStatusCode.Unauthorized:
                    this.SetErrorCondition(HttpStatusCode.Unauthorized.ToString());
                    this.SetIsNotRedirect();
                    break;

                case HttpStatusCode.PaymentRequired:
                    this.SetErrorCondition(HttpStatusCode.PaymentRequired.ToString());
                    this.SetIsNotRedirect();
                    break;

                case HttpStatusCode.Forbidden:
                    this.SetErrorCondition(HttpStatusCode.Forbidden.ToString());
                    this.SetIsNotRedirect();
                    break;

                case HttpStatusCode.NotFound:
                    this.SetErrorCondition(HttpStatusCode.NotFound.ToString());
                    this.SetIsNotRedirect();
                    break;

                case HttpStatusCode.MethodNotAllowed:
                    this.SetErrorCondition(HttpStatusCode.MethodNotAllowed.ToString());
                    this.SetIsNotRedirect();
                    break;

                case HttpStatusCode.Gone:
                    this.SetErrorCondition(HttpStatusCode.Gone.ToString());
                    this.SetIsNotRedirect();
                    break;

                case HttpStatusCode.RequestUriTooLong:
                    this.SetErrorCondition(HttpStatusCode.RequestUriTooLong.ToString());
                    this.SetIsNotRedirect();
                    break;

                // Unhandled

                default:
                    throw new MacroscopeDocumentException("Unhandled HttpStatusCode Type");
                }
            }
            catch (MacroscopeDocumentException ex)
            {
                this.DebugMsg(string.Format("MacroscopeDocumentException: {0}", ex.Message));
            }

            /** Raw HTTP Headers ------------------------------------------------- **/

            this.SetHttpResponseStatusLine(Response: Response);

            this.SetHttpResponseHeaders(Response: Response);

            /** Server Information ----------------------------------------------- **/

            /*{
             * this.ServerName = ResponseHeaders.Server.First().ToString();
             * }*/

            /** PROBE HTTP HEADERS ----------------------------------------------- **/

            /** Server HTTP Header ----------------------------------------------- **/
            try
            {
                HttpHeaderValueCollection <ProductInfoHeaderValue> HeaderValue = ResponseHeaders.Server;
                if (HeaderValue != null)
                {
                    if (HeaderValue.FirstOrDefault() != null)
                    {
                        this.SetServerName(HeaderValue.FirstOrDefault().ToString());
                    }
                }
            }
            catch (Exception ex)
            {
                this.DebugMsg(ex.Message);
                FindHttpResponseHeaderCallback Callback = delegate(IEnumerable <string> HeaderValues)
                {
                    this.SetServerName(HeaderValues.First().ToString());
                    return(true);
                };
                if (!this.FindHttpResponseHeader(ResponseHeaders: ResponseHeaders, HeaderName: "server", Callback: Callback))
                {
                    this.FindHttpContentHeader(ContentHeaders: ContentHeaders, HeaderName: "server", Callback: Callback);
                }
            }

            this.DebugMsg(string.Format("this.ServerName: {0}", this.ServerName));

            /** Content-Type HTTP Header ----------------------------------------- **/
            try
            {
                MediaTypeHeaderValue HeaderValue = ContentHeaders.ContentType;
                if (HeaderValue != null)
                {
                    this.DebugMsg(string.Format("HeaderValue: {0}", HeaderValue));
                    this.MimeType = HeaderValue.MediaType;
                    if (HeaderValue.CharSet != null)
                    {
                        this.SetCharacterSet(HeaderValue.CharSet);
                        // TODO: Implement character set probing
                        this.SetCharacterEncoding(NewEncoding: new UTF8Encoding());
                    }
                }
            }
            catch (Exception ex)
            {
                this.DebugMsg(string.Format("MediaType Exception: {0}", ex.Message));
                this.MimeType = MacroscopeConstants.DefaultMimeType;
            }

            this.DebugMsg(string.Format("this.MimeType: {0}", this.MimeType));

            /** Content-Length HTTP Header --------------------------------------- **/
            try
            {
                long?HeaderValue = null;
                if (ContentHeaders.Contains("Content-Length"))
                {
                    HeaderValue = ContentHeaders.ContentLength;
                }
                if (HeaderValue != null)
                {
                    this.ContentLength = HeaderValue;
                }
                else
                {
                    this.ContentLength = 0;
                }
            }
            catch (Exception ex)
            {
                this.DebugMsg(ex.Message);
                this.SetContentLength(Length: 0);
                FindHttpResponseHeaderCallback Callback = delegate(IEnumerable <string> HeaderValues)
                {
                    this.SetContentLength(Length: long.Parse(HeaderValues.FirstOrDefault()));
                    return(true);
                };
                if (!this.FindHttpResponseHeader(ResponseHeaders: ResponseHeaders, HeaderName: "content-length", Callback: Callback))
                {
                    this.FindHttpContentHeader(ContentHeaders: ContentHeaders, HeaderName: "content-length", Callback: Callback);
                }
            }

            this.DebugMsg(string.Format("this.GetContentLength(): {0}", this.GetContentLength()));

            /** Content-Encoding HTTP Header ------------------------------------- **/
            try
            {
                ICollection <string> HeaderValue = ContentHeaders.ContentEncoding;
                if (HeaderValue != null)
                {
                    this.ContentEncoding = HeaderValue.FirstOrDefault();
                }
            }
            catch (Exception ex)
            {
                this.DebugMsg(ex.Message);
                FindHttpResponseHeaderCallback Callback = delegate(IEnumerable <string> HeaderValues)
                {
                    this.ContentEncoding = HeaderValues.FirstOrDefault();
                    return(true);
                };
                if (!this.FindHttpResponseHeader(ResponseHeaders: ResponseHeaders, HeaderName: "content-encoding", Callback: Callback))
                {
                    this.FindHttpContentHeader(ContentHeaders: ContentHeaders, HeaderName: "content-encoding", Callback: Callback);
                }
            }

            if (string.IsNullOrEmpty(this.CompressionMethod) && (!string.IsNullOrEmpty(this.ContentEncoding)))
            {
                this.IsCompressed      = true;
                this.CompressionMethod = this.ContentEncoding;
            }

            this.DebugMsg(string.Format("this.ContentEncoding: {0}", this.ContentEncoding));
            this.DebugMsg(string.Format("this.CompressionMethod: {0}", this.CompressionMethod));

            /** Date HTTP Header ------------------------------------------------- **/
            try
            {
                DateTimeOffset?HeaderValue = ResponseHeaders.Date;
                if (HeaderValue != null)
                {
                    this.DateServer = MacroscopeDateTools.ParseHttpDate(DateString: HeaderValue.ToString());
                }
            }
            catch (Exception ex)
            {
                this.DebugMsg(ex.Message);
                this.DateServer = new DateTime();
                FindHttpResponseHeaderCallback Callback = delegate(IEnumerable <string> HeaderValues)
                {
                    this.DateServer = MacroscopeDateTools.ParseHttpDate(DateString: HeaderValues.First().ToString());
                    return(true);
                };
                if (!this.FindHttpResponseHeader(ResponseHeaders: ResponseHeaders, HeaderName: "date", Callback: Callback))
                {
                    this.FindHttpContentHeader(ContentHeaders: ContentHeaders, HeaderName: "date", Callback: Callback);
                }
            }

            this.DebugMsg(string.Format("this.DateServer: {0}", this.DateServer));

            /** Last-Modified HTTP Header ---------------------------------------- **/
            try
            {
                DateTimeOffset?HeaderValue = ContentHeaders.LastModified;
                if (HeaderValue != null)
                {
                    this.DateModified = MacroscopeDateTools.ParseHttpDate(DateString: HeaderValue.ToString());
                }
            }
            catch (Exception ex)
            {
                this.DebugMsg(ex.Message);
                this.DateModified = new DateTime();
                FindHttpResponseHeaderCallback Callback = delegate(IEnumerable <string> HeaderValues)
                {
                    this.DateModified = MacroscopeDateTools.ParseHttpDate(DateString: HeaderValues.First().ToString());
                    return(true);
                };
                if (!this.FindHttpResponseHeader(ResponseHeaders: ResponseHeaders, HeaderName: "last-modified", Callback: Callback))
                {
                    this.FindHttpContentHeader(ContentHeaders: ContentHeaders, HeaderName: "last-modified", Callback: Callback);
                }
            }

            this.DebugMsg(string.Format("this.DateModified: {0}", this.DateModified));

            /** Expires HTTP Header ---------------------------------------------- **/
            try
            {
                DateTimeOffset?HeaderValue = ContentHeaders.Expires;
                if (HeaderValue != null)
                {
                    this.DateExpires = MacroscopeDateTools.ParseHttpDate(DateString: HeaderValue.ToString());
                }
            }
            catch (Exception ex)
            {
                this.DebugMsg(ex.Message);
                this.DateExpires = new DateTime();
                FindHttpResponseHeaderCallback Callback = delegate(IEnumerable <string> HeaderValues)
                {
                    this.DateExpires = MacroscopeDateTools.ParseHttpDate(DateString: HeaderValues.First().ToString());
                    return(true);
                };
                if (!this.FindHttpResponseHeader(ResponseHeaders: ResponseHeaders, HeaderName: "expires", Callback: Callback))
                {
                    this.FindHttpContentHeader(ContentHeaders: ContentHeaders, HeaderName: "expires", Callback: Callback);
                }
            }

            this.DebugMsg(string.Format("this.DateExpires: {0}", this.DateExpires));

            /** HTST Policy HTTP Header ------------------------------------------ **/
            // https://www.owasp.org/index.php/HTTP_Strict_Transport_Security_Cheat_Sheet
            // Strict-Transport-Security: max-age=31536000; includeSubDomains; preload
            {
                FindHttpResponseHeaderCallback Callback = delegate(IEnumerable <string> HeaderValues)
                {
                    this.HypertextStrictTransportPolicy = true;
                    return(true);
                };
                if (!this.FindHttpResponseHeader(ResponseHeaders: ResponseHeaders, HeaderName: "strict-transport-security", Callback: Callback))
                {
                    this.FindHttpContentHeader(ContentHeaders: ContentHeaders, HeaderName: "strict-transport-security", Callback: Callback);
                }
            }

            this.DebugMsg(string.Format("this.HypertextStrictTransportPolicy: {0}", this.HypertextStrictTransportPolicy));

            /** Location (Redirect) HTTP Header ---------------------------------- **/
            try
            {
                Uri HeaderValue = ResponseHeaders.Location;
                if (HeaderValue != null)
                {
                    this.SetUrlRedirectTo(Url: HeaderValue.ToString());
                }
            }
            catch (Exception ex)
            {
                this.DebugMsg(ex.Message);
                FindHttpResponseHeaderCallback Callback = delegate(IEnumerable <string> HeaderValues)
                {
                    this.SetUrlRedirectTo(Url: HeaderValues.FirstOrDefault().ToString());
                    return(true);
                };
                if (!this.FindHttpResponseHeader(ResponseHeaders: ResponseHeaders, HeaderName: "location", Callback: Callback))
                {
                    this.FindHttpContentHeader(ContentHeaders: ContentHeaders, HeaderName: "location", Callback: Callback);
                }
            }

            this.DebugMsg(string.Format("this.GetIsRedirect(): {0}", this.GetIsRedirect()));
            this.DebugMsg(string.Format("this.GetUrlRedirectTo(): {0}", this.GetUrlRedirectTo()));

            /** Link HTTP Headers ------------------------------------------------ **/
            {
                FindHttpResponseHeaderCallback Callback = delegate(IEnumerable <string> HeaderValues)
                {
                    foreach (string HeaderValue in HeaderValues)
                    {
                        this.DebugMsg(string.Format("HeaderValue: {0}", HeaderValue));
                        this.ProcessHttpLinkHeader(HttpLinkHeader: HeaderValue);
                    }
                    return(true);
                };
                if (!this.FindHttpResponseHeader(ResponseHeaders: ResponseHeaders, HeaderName: "link", Callback: Callback))
                {
                    this.FindHttpContentHeader(ContentHeaders: ContentHeaders, HeaderName: "link", Callback: Callback);
                }
            }

            /** ETag HTTP Header ------------------------------------------------- **/
            try
            {
                EntityTagHeaderValue HeaderValue = ResponseHeaders.ETag;
                if (HeaderValue != null)
                {
                    string ETagValue = HeaderValue.Tag;
                    if (!string.IsNullOrEmpty(ETagValue))
                    {
                        this.SetEtag(HeaderValue.Tag);
                    }
                }
            }
            catch (Exception ex)
            {
                this.DebugMsg(ex.Message);
                FindHttpResponseHeaderCallback Callback = delegate(IEnumerable <string> HeaderValues)
                {
                    string HeaderValue = HeaderValues.FirstOrDefault();
                    if (HeaderValue != null)
                    {
                        if (!string.IsNullOrEmpty(HeaderValue))
                        {
                            this.SetEtag(HeaderValue);
                        }
                    }
                    return(true);
                };
                if (!this.FindHttpResponseHeader(ResponseHeaders: ResponseHeaders, HeaderName: "etag", Callback: Callback))
                {
                    this.FindHttpContentHeader(ContentHeaders: ContentHeaders, HeaderName: "etag", Callback: Callback);
                }
            }

            this.DebugMsg(string.Format("this.Etag: {0}", this.Etag));

            /** WWW-AUTHENTICATE HTTP Header ------------------------------------- **/
            // Reference: http://httpbin.org/basic-auth/user/passwd
            try
            {
                HttpHeaderValueCollection <AuthenticationHeaderValue> HeaderValue = ResponseHeaders.WwwAuthenticate;
                if (HeaderValue != null)
                {
                    string Scheme = null;
                    string Realm  = null;
                    foreach (AuthenticationHeaderValue AuthenticationValue in HeaderValue)
                    {
                        Scheme = AuthenticationValue.Scheme;
                        string Parameter = AuthenticationValue.Parameter;
                        Match  Matched   = Regex.Match(Parameter, "^[^\"]+\"([^\"]+)\"");
                        if (Matched.Success)
                        {
                            Realm = Matched.Groups[1].Value;
                        }
                    }
                    if (!string.IsNullOrEmpty(Scheme) && !string.IsNullOrEmpty(Realm))
                    {
                        if (Scheme.ToLower() == "basic")
                        {
                            this.SetAuthenticationType(MacroscopeConstants.AuthenticationType.BASIC);
                            this.SetAuthenticationRealm(Realm);
                        }
                        else
                        {
                            this.SetAuthenticationType(MacroscopeConstants.AuthenticationType.UNSUPPORTED);
                        }
                    }
                }
            }
            catch (Exception ex)
            {
                this.DebugMsg(ex.Message);
            }
            this.DebugMsg(string.Format("WwwAuthenticate: \"{0}\", Realm: \"{1}\"", this.GetAuthenticationType(), this.GetAuthenticationRealm()));

            /** Process Dates ---------------------------------------------------- **/
            {
                if (this.DateServer.Date == new DateTime().Date)
                {
                    this.DateServer = DateTime.UtcNow;
                }
                if (this.DateModified.Date == new DateTime().Date)
                {
                    this.DateModified = this.DateServer;
                }
            }

            /** Process MIME Type ------------------------------------------------ **/
            {
                Regex reIsHtml       = new Regex(@"^(text/html|application/xhtml+xml)", RegexOptions.IgnoreCase);
                Regex reIsCss        = new Regex(@"^text/css", RegexOptions.IgnoreCase);
                Regex reIsJavascript = new Regex(@"^(application/javascript|text/javascript)", RegexOptions.IgnoreCase);
                Regex reIsImage      = new Regex(@"^image/(gif|png|jpeg|bmp|webp|vnd.microsoft.icon|x-icon)", RegexOptions.IgnoreCase);
                Regex reIsPdf        = new Regex(@"^application/pdf", RegexOptions.IgnoreCase);
                Regex reIsAudio      = new Regex(@"^audio/[a-z0-9]+", RegexOptions.IgnoreCase);
                Regex reIsVideo      = new Regex(@"^video/[a-z0-9]+", RegexOptions.IgnoreCase);
                Regex reIsXml        = new Regex(@"^(application|text)/(atom\+xml|xml)", RegexOptions.IgnoreCase);
                Regex reIsText       = new Regex(@"^(text)/(plain)", RegexOptions.IgnoreCase);

                if (reIsHtml.IsMatch(this.MimeType))
                {
                    this.SetDocumentType(Type: MacroscopeConstants.DocumentType.HTML);
                }
                else
                if (reIsCss.IsMatch(this.MimeType))
                {
                    this.SetDocumentType(Type: MacroscopeConstants.DocumentType.CSS);
                }
                else
                if (reIsJavascript.IsMatch(this.MimeType))
                {
                    this.SetDocumentType(Type: MacroscopeConstants.DocumentType.JAVASCRIPT);
                }
                else
                if (reIsImage.IsMatch(this.MimeType))
                {
                    this.SetDocumentType(Type: MacroscopeConstants.DocumentType.IMAGE);
                }
                else
                if (reIsPdf.IsMatch(this.MimeType))
                {
                    this.SetDocumentType(Type: MacroscopeConstants.DocumentType.PDF);
                }
                else
                if (reIsAudio.IsMatch(this.MimeType))
                {
                    this.SetDocumentType(Type: MacroscopeConstants.DocumentType.AUDIO);
                }
                else
                if (reIsVideo.IsMatch(this.MimeType))
                {
                    this.SetDocumentType(Type: MacroscopeConstants.DocumentType.VIDEO);
                }
                else
                if (reIsXml.IsMatch(this.MimeType))
                {
                    this.SetDocumentType(Type: MacroscopeConstants.DocumentType.XML);
                }
                else
                if (reIsText.IsMatch(this.MimeType))
                {
                    this.SetDocumentType(Type: MacroscopeConstants.DocumentType.TEXT);
                }
                else
                {
                    this.SetDocumentType(Type: MacroscopeConstants.DocumentType.BINARY);
                }
            }

            /** Process Cookies -------------------------------------------------- **/
            // https://stackoverflow.com/questions/29224734/how-to-read-cookies-from-httpresponsemessage
            {
                try
                {
                    CookieContainer  CookieMonster = MacroscopeHttpTwoClient.GetCookieMonster();
                    CookieCollection Biscuits      = CookieMonster.GetCookies(uri: this.GetUri());
                    this.AddCookies(Cookies: Biscuits);
                    this.DebugMsg("cookies");


//          CookieContainer CookieTin = MacroscopeHttpTwoClient.GetCookieMonster();
//          string LimpBizkit = tin.GetCookieHeader( uri: Request.RequestUri );
                }
                catch (Exception ex)
                {
                    this.DebugMsg(ex.Message);
                }
            }

            return;
        }
        /**************************************************************************/

        public MacroscopeRedirectChainAnalysis(MacroscopeHttpTwoClient Client) : base()
        {
            this.SuppressDebugMsg      = true;
            this.HttpClient            = Client;
            this.RedirectChainDocCache = new Dictionary <string, MacroscopeRedirectChainDocStruct>();
        }
        /** -------------------------------------------------------------------- **/

        private async Task _ProcessXmlPage()
        {
            XmlDocument                     XmlDoc   = null;
            MacroscopeHttpTwoClient         Client   = this.DocCollection.GetJobMaster().GetHttpClient();
            MacroscopeHttpTwoClientResponse Response = null;
            string ResponseErrorCondition            = null;

            try
            {
                Response = await Client.Get(
                    this.GetUri(),
                    this.ConfigureXmlPageRequestHeadersCallback,
                    this.PostProcessRequestHttpHeadersCallback
                    );
            }
            catch (MacroscopeDocumentException ex)
            {
                this.DebugMsg(string.Format("_ProcessXmlPage :: MacroscopeDocumentException: {0}", ex.Message));
                ResponseErrorCondition = ex.Message;
                this.SetStatusCode(HttpStatusCode.BadRequest);
                this.AddRemark("_ProcessXmlPage", ex.Message);
            }
            catch (Exception ex)
            {
                this.DebugMsg(string.Format("_ProcessXmlPage :: Exception: {0}", ex.Message));
                ResponseErrorCondition = ex.Message;
                this.SetStatusCode(HttpStatusCode.BadRequest);
                this.AddRemark("_ProcessXmlPage", ex.Message);
            }

            if (Response != null)
            {
                string RawData = "";

                this.ProcessResponseHttpHeaders(Response: Response);

                // Get Response Body
                try
                {
                    DebugMsg(string.Format("MIME TYPE: {0}", this.MimeType));

                    RawData = Response.GetContentAsString();

                    this.SetContentLength(Length: RawData.Length); // May need to find bytes length

                    this.SetWasDownloaded(true);

                    this.SetChecksum(RawData);
                }
                catch (Exception ex)
                {
                    DebugMsg(string.Format("Exception: {0}", ex.Message));
                    this.SetStatusCode(HttpStatusCode.BadRequest);
                    RawData = "";
                    this.SetContentLength(Length: 0);
                }

                if (!string.IsNullOrEmpty(RawData))
                {
                    XmlDoc = new XmlDocument();

                    try
                    {
                        XmlDoc.LoadXml(RawData);
                    }
                    catch (XmlException ex)
                    {
                        DebugMsg(string.Format("XmlException: {0}", ex.Message));
                    }
                    catch (Exception ex)
                    {
                        DebugMsg(string.Format("Exception: {0}", ex.Message));
                    }

                    DebugMsg(string.Format("XmlDoc: {0}", XmlDoc));
                }
                else
                {
                    DebugMsg(string.Format("RawData: {0}", "EMPTY"));
                }

                /** Custom Filters ------------------------------------------------- **/

                if (!string.IsNullOrEmpty(RawData))
                {
                    if (
                        MacroscopePreferencesManager.GetCustomFiltersEnable() &&
                        MacroscopePreferencesManager.GetCustomFiltersApplyToXml())
                    {
                        MacroscopeCustomFilters CustomFilter = this.DocCollection.GetJobMaster().GetCustomFilter();

                        if ((CustomFilter != null) && (CustomFilter.IsEnabled()))
                        {
                            this.ProcessGenericCustomFiltered(
                                CustomFilter: CustomFilter,
                                GenericText: RawData
                                );
                        }
                    }
                }

                /** Data Extractors ------------------------------------------------ **/

                if (!string.IsNullOrEmpty(RawData))
                {
                    if (
                        MacroscopePreferencesManager.GetDataExtractorsEnable() &&
                        MacroscopePreferencesManager.GetDataExtractorsApplyToXml())
                    {
                        this.ProcessGenericDataExtractors(GenericText: RawData);
                    }
                }

                /** ---------------------------------------------------------------- **/

                if ((XmlDoc != null) && (XmlDoc.DocumentElement != null))
                {
                    if (this.DetectSitemapXmlDocument(XmlDoc))
                    {
                        DebugMsg(string.Format("ProcessXmlPage: {0} :: {1}", "SITEMAP DETECTED", this.GetUrl()));
                        this.SetDocumentType(Type: MacroscopeConstants.DocumentType.SITEMAPXML);
                        this.ProcessSitemapXmlOutlinks(XmlDoc);
                    }
                }

                /** ---------------------------------------------------------------- **/

                if (RawData != null)
                {
                    this.SetDocumentText(Text: RawData);
                }

                /** ---------------------------------------------------------------- **/
            }

            if (ResponseErrorCondition != null)
            {
                this.ProcessErrorCondition(ResponseErrorCondition);
            }
        }
        /** -------------------------------------------------------------------- **/

        private async Task _ProcessImagePage()
        {
            MacroscopeHttpTwoClient         Client   = this.DocCollection.GetJobMaster().GetHttpClient();
            MacroscopeHttpTwoClientResponse Response = null;
            string ResponseErrorCondition            = null;

            try
            {
                Response = await Client.Head(
                    this.GetUri(),
                    this.ConfigureImagePageRequestHeadersCallback,
                    this.PostProcessRequestHttpHeadersCallback
                    );
            }
            catch (MacroscopeDocumentException ex)
            {
                this.DebugMsg(string.Format("_ProcessImagePage :: MacroscopeDocumentException: {0}", ex.Message));
                ResponseErrorCondition = ex.Message;
                this.SetStatusCode(HttpStatusCode.BadRequest);
                this.AddRemark("_ProcessImagePage", ex.Message);
            }
            catch (Exception ex)
            {
                this.DebugMsg(string.Format("_ProcessImagePage :: Exception: {0}", ex.Message));
                ResponseErrorCondition = ex.Message;
                this.SetStatusCode(HttpStatusCode.BadRequest);
                this.AddRemark("_ProcessImagePage", ex.Message);
            }

            if (Response != null)
            {
                this.ProcessResponseHttpHeaders(Response: Response);

                /** Title ---------------------------------------------------------- **/
                {
                    MatchCollection reMatches     = Regex.Matches(this.DocUrl, "/([^/]+)$");
                    string          DocumentTitle = null;
                    foreach (Match match in reMatches)
                    {
                        if (match.Groups[1].Value.Length > 0)
                        {
                            DocumentTitle = match.Groups[1].Value.ToString();
                            break;
                        }
                    }
                    if (DocumentTitle != null)
                    {
                        this.SetTitle(DocumentTitle, MacroscopeConstants.TextProcessingMode.NO_PROCESSING);
                        this.DebugMsg(string.Format("TITLE: {0}", this.GetTitle()));
                    }
                    else
                    {
                        this.DebugMsg(string.Format("TITLE: {0}", "MISSING"));
                    }
                }

                /** QR Codes ------------------------------------------------------- **/
                if (MacroscopePreferencesManager.GetDetectQrCodeInImage())
                {
                    MacroscopeHttpImageLoader ImageLoader = new MacroscopeHttpImageLoader();
                    Uri    QrCodeImageUri      = null;
                    string QrCodeImageFilename = await ImageLoader.DownloadImageFromUriToFile(JobMaster : this.DocCollection.GetJobMaster(), TargetUri : this.GetUri());

                    if ((!string.IsNullOrEmpty(QrCodeImageFilename)) && File.Exists(QrCodeImageFilename))
                    {
                        MacroscopeQrCodeAnalysis QrCodeAnalysis = new MacroscopeQrCodeAnalysis();
                        string ResultText = QrCodeAnalysis.Decode(ImageFilename: QrCodeImageFilename);
                        if (!string.IsNullOrEmpty(ResultText))
                        {
                            try
                            {
                                QrCodeImageUri = new Uri(ResultText);
                            }
                            catch (UriFormatException ex)
                            {
                                this.DebugMsg(string.Format("UriFormatException: {0}", ResultText));
                                this.DebugMsg(string.Format("UriFormatException: {0}", ex.Message));
                            }
                            if (QrCodeImageUri != null)
                            {
                                MacroscopeLink Outlink = null;
                                Outlink = this.AddDocumentOutlink(
                                    AbsoluteUrl: QrCodeImageUri.AbsoluteUri,
                                    LinkType: MacroscopeConstants.InOutLinkType.QRCODE,
                                    Follow: true
                                    );
                                if (Outlink != null)
                                {
                                    Outlink.SetRawTargetUrl(TargetUrl: QrCodeImageUri.AbsoluteUri);
                                    this.AddRemark("QRCODEIMAGE", "This image appears to be a QR Code.");
                                }
                            }
                        }
                    }
                }
                /** ---------------------------------------------------------------- **/
            }

            if (ResponseErrorCondition != null)
            {
                this.ErrorCondition = ResponseErrorCondition;
            }
        }
        /** -------------------------------------------------------------------- **/

        private async Task _ExecuteHeadRequest()
        {
            MacroscopeHttpTwoClient         Client         = this.DocCollection.GetJobMaster().GetHttpClient();
            MacroscopeHttpTwoClientResponse ClientResponse = null;
            string ResponseErrorCondition = null;

            this.SetProcessInlinks();
            this.SetProcessHyperlinksIn();

            try
            {
                ClientResponse = await Client.Head(
                    this.GetUri(),
                    this.ConfigureHeadRequestHeadersCallback,
                    this.PostProcessRequestHttpHeadersCallback
                    );

                this.CrawledDate = DateTime.UtcNow;
            }
            catch (MacroscopeDocumentException ex)
            {
                this.DebugMsg(string.Format("_ExecuteHeadRequest :: MacroscopeDocumentException: {0}", ex.Message));
                ResponseErrorCondition = ex.Message;
                this.SetStatusCode(HttpStatusCode.BadRequest);
                this.AddRemark("_ExecuteHeadRequest", ex.Message);
            }
            catch (Exception ex)
            {
                this.DebugMsg(string.Format("_ExecuteHeadRequest :: Exception: {0}", ex.Message));
                ResponseErrorCondition = ex.Message;
                this.SetStatusCode(HttpStatusCode.BadRequest);
                this.AddRemark("_ExecuteHeadRequest", ex.Message);
            }

            if (ClientResponse != null)
            {
                try
                {
                    this.DebugMsg(string.Format("StatusCode: {0}", ClientResponse.GetResponse().StatusCode));

                    if (ClientResponse.GetResponse() != null)
                    {
                        this.SetErrorCondition(ClientResponse.GetResponse().ReasonPhrase);
                    }
                    else
                    {
                        throw new MacroscopeDocumentException("Bad Response in ExecuteHeadRequest");
                    }

                    this.ProcessResponseHttpHeaders(Response: ClientResponse);

                    if (this.GetIsRedirect())
                    {
                        string Location = this.GetUrlRedirectTo();

                        if (!string.IsNullOrEmpty(Location))
                        {
                            MacroscopeLink OutLink = null;

                            this.SetUrlRedirectTo(Url: Location);

                            OutLink = this.AddDocumentOutlink(
                                AbsoluteUrl: Location,
                                LinkType: MacroscopeConstants.InOutLinkType.REDIRECT,
                                Follow: true
                                );

                            OutLink.SetRawTargetUrl(TargetUrl: this.GetUrlRedirectToRaw());
                        }
                    }
                }
                catch (Exception ex)
                {
                    this.DebugMsg(string.Format("_ExecuteHeadRequest :: Exception: {0}", ex.Message));
                    ResponseErrorCondition = ex.Message;
                }
            }

            if (ResponseErrorCondition != null)
            {
                this.ProcessErrorCondition(ResponseErrorCondition);
            }

            return;
        }
Ejemplo n.º 15
0
        /** -------------------------------------------------------------------- **/

        private async Task _ProcessVideoPage()
        {
            MacroscopeHttpTwoClient         Client   = this.DocCollection.GetJobMaster().GetHttpClient();
            MacroscopeHttpTwoClientResponse Response = null;
            string ResponseErrorCondition            = null;

            try
            {
                Response = await Client.Get(
                    this.GetUri(),
                    this.ConfigureVideoPageRequestHeadersCallback,
                    this.PostProcessRequestHttpHeadersCallback
                    );
            }
            catch (MacroscopeDocumentException ex)
            {
                this.DebugMsg(string.Format("_ProcessVideoPage :: MacroscopeDocumentException: {0}", ex.Message));
                ResponseErrorCondition = ex.Message;
                this.SetStatusCode(HttpStatusCode.BadRequest);
                this.AddRemark("_ProcessVideoPage", ex.Message);
            }
            catch (Exception ex)
            {
                this.DebugMsg(string.Format("_ProcessVideoPage :: Exception: {0}", ex.Message));
                ResponseErrorCondition = ex.Message;
                this.SetStatusCode(HttpStatusCode.BadRequest);
                this.AddRemark("_ProcessVideoPage", ex.Message);
            }

            if (Response != null)
            {
                this.ProcessResponseHttpHeaders(Response: Response);

                { // Title
                    MatchCollection reMatches     = Regex.Matches(this.DocUrl, "/([^/]+)$");
                    string          DocumentTitle = null;

                    foreach (Match match in reMatches)
                    {
                        if (match.Groups[1].Value.Length > 0)
                        {
                            DocumentTitle = match.Groups[1].Value.ToString();
                            break;
                        }
                    }

                    if (DocumentTitle != null)
                    {
                        this.SetTitle(DocumentTitle, MacroscopeConstants.TextProcessingMode.NO_PROCESSING);
                        DebugMsg(string.Format("TITLE: {0}", this.GetTitle()));
                    }
                    else
                    {
                        DebugMsg(string.Format("TITLE: {0}", "MISSING"));
                    }
                }
            }

            if (ResponseErrorCondition != null)
            {
                this.ErrorCondition = ResponseErrorCondition;
            }
        }
        /** -------------------------------------------------------------------- **/

        private async Task _ProcessJavascriptPage()
        {
            MacroscopeHttpTwoClient         Client   = this.DocCollection.GetJobMaster().GetHttpClient();
            MacroscopeHttpTwoClientResponse Response = null;
            string ResponseErrorCondition            = null;

            try
            {
                Response = await Client.Get(
                    this.GetUri(),
                    this.ConfigureJavascriptPageRequestHeadersCallback,
                    this.PostProcessRequestHttpHeadersCallback
                    );
            }
            catch (MacroscopeDocumentException ex)
            {
                this.DebugMsg(string.Format("_ProcessJavascriptPage :: MacroscopeDocumentException: {0}", ex.Message));
                ResponseErrorCondition = ex.Message;
                this.SetStatusCode(HttpStatusCode.BadRequest);
                this.AddRemark("_ProcessJavascriptPage", ex.Message);
            }
            catch (Exception ex)
            {
                this.DebugMsg(string.Format("_ProcessJavascriptPage :: Exception: {0}", ex.Message));
                ResponseErrorCondition = ex.Message;
                this.SetStatusCode(HttpStatusCode.BadRequest);
                this.AddRemark("_ProcessJavascriptPage", ex.Message);
            }

            if (Response != null)
            {
                string RawData = "";

                this.ProcessResponseHttpHeaders(Response: Response);

                /** Get Response Body ---------------------------------------------- **/

                try
                {
                    DebugMsg(string.Format("MIME TYPE: {0}", this.MimeType));

                    /*
                     * Encoding encUseEncoding = Encoding.UTF8;
                     *
                     * if( this.GetCharacterEncoding() != null )
                     * {
                     * encUseEncoding = this.GetCharacterEncoding();
                     * }
                     * else
                     * {
                     * encUseEncoding = this.JavascriptSniffCharset();
                     * }
                     */

                    RawData = Response.GetContentAsString();
                    this.SetContentLength(Length: RawData.Length); // May need to find bytes length
                    this.SetChecksum(RawData);
                }
                catch (Exception ex)
                {
                    DebugMsg(string.Format("Exception: {0}", ex.Message));
                    this.SetStatusCode(HttpStatusCode.Ambiguous);
                    RawData = "";
                    this.SetContentLength(Length: 0);
                }

                /** Custom Filters ------------------------------------------------- **/

                if (!string.IsNullOrEmpty(RawData))
                {
                    if (
                        MacroscopePreferencesManager.GetCustomFiltersEnable() &&
                        MacroscopePreferencesManager.GetCustomFiltersApplyToJavascripts())
                    {
                        MacroscopeCustomFilters CustomFilter = this.DocCollection.GetJobMaster().GetCustomFilter();

                        if ((CustomFilter != null) && (CustomFilter.IsEnabled()))
                        {
                            this.ProcessGenericCustomFiltered(
                                CustomFilter: CustomFilter,
                                GenericText: RawData
                                );
                        }
                    }
                }

                /** Data Extractors ------------------------------------------------ **/

                if (!string.IsNullOrEmpty(RawData))
                {
                    if (
                        MacroscopePreferencesManager.GetDataExtractorsEnable() &&
                        MacroscopePreferencesManager.GetDataExtractorsApplyToJavascripts())
                    {
                        this.ProcessGenericDataExtractors(GenericText: RawData);
                    }
                }

                /** Title ---------------------------------------------------------- **/

                {
                    MatchCollection reMatches     = Regex.Matches(this.DocUrl, "/([^/]+)$");
                    string          DocumentTitle = null;
                    foreach (Match match in reMatches)
                    {
                        if (match.Groups[1].Value.Length > 0)
                        {
                            DocumentTitle = match.Groups[1].Value.ToString();
                            break;
                        }
                    }
                    if (DocumentTitle != null)
                    {
                        this.SetTitle(DocumentTitle, MacroscopeConstants.TextProcessingMode.NO_PROCESSING);
                        DebugMsg(string.Format("TITLE: {0}", this.GetTitle()));
                    }
                    else
                    {
                        DebugMsg(string.Format("TITLE: {0}", "MISSING"));
                    }
                }

                /** ---------------------------------------------------------------- **/
            }

            if (ResponseErrorCondition != null)
            {
                this.ProcessErrorCondition(ResponseErrorCondition);
            }
        }
Ejemplo n.º 17
0
        /** -------------------------------------------------------------------- **/

        private async Task _ProcessTextPage()
        {
            List <string>                   TextDoc  = new List <string>();
            MacroscopeHttpTwoClient         Client   = this.DocCollection.GetJobMaster().GetHttpClient();
            MacroscopeHttpTwoClientResponse Response = null;
            string ResponseErrorCondition            = null;

            try
            {
                Response = await Client.Get(
                    this.GetUri(),
                    this.ConfigureTextPageRequestHeadersCallback,
                    this.PostProcessRequestHttpHeadersCallback
                    );
            }
            catch (MacroscopeDocumentException ex)
            {
                this.DebugMsg(string.Format("_ProcessTextPage :: MacroscopeDocumentException: {0}", ex.Message));
                ResponseErrorCondition = ex.Message;
                this.SetStatusCode(HttpStatusCode.BadRequest);
                this.AddRemark("_ProcessTextPage", ex.Message);
            }
            catch (Exception ex)
            {
                this.DebugMsg(string.Format("_ProcessTextPage :: Exception: {0}", ex.Message));
                ResponseErrorCondition = ex.Message;
                this.SetStatusCode(HttpStatusCode.BadRequest);
                this.AddRemark("_ProcessTextPage", ex.Message);
            }

            if (Response != null)
            {
                string RawData = "";

                this.ProcessResponseHttpHeaders(Response: Response);

                /** Get Response Body ---------------------------------------------- **/

                try
                {
                    DebugMsg(string.Format("MIME TYPE: {0}", this.MimeType));

                    RawData = Response.GetContentAsString();

                    this.SetContentLength(Length: RawData.Length); // May need to find bytes length

                    this.SetWasDownloaded(true);

                    this.SetChecksum(RawData);
                }
                catch (Exception ex)
                {
                    DebugMsg(string.Format("Exception: {0}", ex.Message));
                    this.SetStatusCode(HttpStatusCode.BadRequest);
                    RawData = "";
                    this.SetContentLength(Length: 0);
                }

                /** ---------------------------------------------------------------- **/

                if (!string.IsNullOrEmpty(RawData))
                {
                    string[] Lines = Regex.Split(RawData, @"[\r\n]+");
                    TextDoc = Lines.ToList();

                    DebugMsg(string.Format("TextDoc: {0}", TextDoc.Count));
                }
                else
                {
                    DebugMsg(string.Format("RawData: {0}", "EMPTY"));
                }

                /** Custom Filters ------------------------------------------------- **/

                if (!string.IsNullOrEmpty(RawData))
                {
                    if (
                        MacroscopePreferencesManager.GetCustomFiltersEnable() &&
                        MacroscopePreferencesManager.GetCustomFiltersApplyToText())
                    {
                        MacroscopeCustomFilters CustomFilter = this.DocCollection.GetJobMaster().GetCustomFilter();

                        if ((CustomFilter != null) && (CustomFilter.IsEnabled()))
                        {
                            this.ProcessGenericCustomFiltered(
                                CustomFilter: CustomFilter,
                                GenericText: RawData
                                );
                        }
                    }
                }

                /** Data Extractors ------------------------------------------------ **/

                if (!string.IsNullOrEmpty(RawData))
                {
                    if (
                        MacroscopePreferencesManager.GetDataExtractorsEnable() &&
                        MacroscopePreferencesManager.GetDataExtractorsApplyToText())
                    {
                        this.ProcessGenericDataExtractors(GenericText: RawData);
                    }
                }

                /** Process Text Document ------------------------------------------ **/

                if ((TextDoc != null) && (TextDoc.Count > 0))
                {
                    this.SetDocumentText(Text: string.Join(Environment.NewLine, TextDoc));

                    if (this.GetPath().EndsWith("robots.txt", StringComparison.InvariantCultureIgnoreCase))
                    {
                        long?TextSize          = this.GetContentLength();
                        long?RobotsMaxTextSize = 1024 * 512;

                        this.ProcessRobotsTextOutlinks(TextDoc: TextDoc);

                        if (this.DetectSitemapTextDocument(TextDoc: TextDoc))
                        {
                            DebugMsg(string.Format("ProcessTextPage: {0} :: {1}", "SITEMAP DETECTED", this.GetUrl()));
                            this.SetDocumentType(Type: MacroscopeConstants.DocumentType.SITEMAPTEXT);
                            this.ProcessSitemapTextOutlinks(TextDoc: TextDoc);
                        }

                        if (TextSize > RobotsMaxTextSize)
                        {
                            this.AddRemark("ROBOTS_TOO_BIG", "Robots.txt is larger than 512KB");
                        }
                    }
                    else
                    {
                        if (this.GetIsInternal())
                        {
                            this.ProcessPureTextOutlinks(TextDoc: TextDoc, LinkType: MacroscopeConstants.InOutLinkType.PURETEXT);
                        }
                    }
                }
                else
                {
                    this.SetDocumentText(Text: "");
                }

                /** ---------------------------------------------------------------- **/
            }

            if (ResponseErrorCondition != null)
            {
                this.ProcessErrorCondition(ResponseErrorCondition);
            }
        }
        /** -------------------------------------------------------------------- **/

        private async Task _ProcessPdfPage()
        {
            MacroscopeHttpTwoClient         Client         = this.DocCollection.GetJobMaster().GetHttpClient();
            MacroscopeHttpTwoClientResponse ClientResponse = null;
            string ResponseErrorCondition = null;

            try
            {
                ClientResponse = await Client.Get(
                    this.GetUri(),
                    this.ConfigurePdfPageRequestHeadersCallback,
                    this.PostProcessRequestHttpHeadersCallback
                    );
            }
            catch (MacroscopeDocumentException ex)
            {
                this.DebugMsg(string.Format("_ProcessPdfPage :: MacroscopeDocumentException: {0}", ex.Message));
                ResponseErrorCondition = ex.Message;
                this.AddRemark("_ProcessPdfPage", ex.Message);
            }
            catch (Exception ex)
            {
                this.DebugMsg(string.Format("_ProcessPdfPage :: Exception: {0}", ex.Message));
                ResponseErrorCondition = ex.Message;
                this.AddRemark("_ProcessPdfPage", ex.Message);
            }

            if (ClientResponse != null)
            {
                MacroscopePdfTools PdfTools;

                this.ProcessResponseHttpHeaders(Response: ClientResponse);

                {                              // Probe Locale
                  //this.Locale = "en"; // Implement locale probing
                    this.Locale = "x-default"; // Implement locale probing
                    this.SetHreflang(HrefLangLocale: this.Locale, Url: this.DocUrl);
                }

                { // Canonical
                    this.Canonical = this.DocUrl;
                    this.DebugMsg(string.Format("CANONICAL: {0}", this.Canonical));
                }

                /** Get Response Body ---------------------------------------------- **/

                try
                {
                    byte[] RawData = ClientResponse.GetContentAsBytes();
                    this.SetContentLength(Length: RawData.Length);

                    PdfTools = new MacroscopePdfTools(PdfData: RawData);

                    if (PdfTools.GetHasError())
                    {
                        this.AddRemark("CORRUPT_PDF", Observation: PdfTools.GetErrorMessage());
                    }

                    this.SetWasDownloaded(true);
                }
                catch (Exception ex)
                {
                    this.DebugMsg(string.Format("Exception: {0}", ex.Message));
                    this.SetStatusCode(HttpStatusCode.BadRequest);
                    PdfTools = null;
                    this.SetContentLength(Length: 0);
                }

                /** Title ---------------------------------------------------------- **/

                if (PdfTools != null)
                {
                    string Text = PdfTools.GetTitle();

                    if (!string.IsNullOrEmpty(Text))
                    {
                        this.SetTitle(Text, MacroscopeConstants.TextProcessingMode.NO_PROCESSING);
                        this.DebugMsg(string.Format("TITLE: {0}", this.GetTitle()));
                    }
                    else
                    {
                        this.DebugMsg(string.Format("TITLE: {0}", "MISSING"));
                    }
                }

                /** Author --------------------------------------------------------- **/

                if (PdfTools != null)
                {
                    string Text = PdfTools.GetAuthor();

                    if (!string.IsNullOrEmpty(Text))
                    {
                        this.SetAuthor(AuthorText: Text, ProcessingMode: MacroscopeConstants.TextProcessingMode.NO_PROCESSING);
                        this.DebugMsg(string.Format("AUTHOR: {0}", this.GetAuthor()));
                    }
                    else
                    {
                        this.DebugMsg(string.Format("AUTHOR: {0}", "MISSING"));
                    }
                }

                /** Description ---------------------------------------------------- **/

                if (PdfTools != null)
                {
                    string Text = PdfTools.GetDescription();

                    if (!string.IsNullOrEmpty(Text))
                    {
                        this.SetDescription(Text, MacroscopeConstants.TextProcessingMode.NO_PROCESSING);
                        this.DebugMsg(string.Format("TITLE: {0}", this.GetDescription()));
                    }
                    else
                    {
                        this.DebugMsg(string.Format("TITLE: {0}", "MISSING"));
                    }
                }

                /** Metadata Keywords ---------------------------------------------- **/

                if (PdfTools != null)
                {
                    string Text = PdfTools.GetKeywords();

                    if (!string.IsNullOrEmpty(Text))
                    {
                        this.SetKeywords(KeywordsText: Text);
                        this.DebugMsg(string.Format("KEYWORDS: {0}", this.GetKeywords()));
                    }
                    else
                    {
                        this.DebugMsg(string.Format("KEYWORDS: {0}", "MISSING"));
                    }
                }

                /** Body Text ------------------------------------------------------ **/

                if (PdfTools != null)
                {
                    this.SetBodyText(Text: "");

                    if (PdfTools.GetHasError())
                    {
                        this.AddRemark("PDF_ERROR", Observation: PdfTools.GetErrorMessage());
                    }
                    else
                    {
                        string Text = PdfTools.GetTextAsString();
                        if (!string.IsNullOrEmpty(Text))
                        {
                            this.SetDocumentText(Text: Text);
                            this.SetBodyText(Text: Text);
                        }
                    }

                    this.DebugMsg(string.Format("BODY TEXT: {0}", this.GetBodyTextRaw()));
                }

                /** Data Extractors ------------------------------------------------ **/

                if (!string.IsNullOrEmpty(this.GetBodyTextRaw()))
                {
                    if (MacroscopePreferencesManager.GetDataExtractorsEnable())
                    {
                        if (MacroscopePreferencesManager.GetDataExtractorsApplyToPdf())
                        {
                            string Text = this.GetBodyTextRaw();
                            this.ProcessGenericDataExtractors(GenericText: Text);
                        }
                    }
                }

                /** Out Links Text ------------------------------------------------- **/

                if (this.GetDocumentTextRawLength() > 0)
                {
                    if (this.GetIsInternal())
                    {
                        string Text = this.GetDocumentTextRaw();
                        this.ProcessPureTextOutlinks(TextDoc: Text, LinkType: MacroscopeConstants.InOutLinkType.PDF);
                    }
                }

                /** Out Links in Annotations --------------------------------------- **/

                if (this.GetIsInternal() && (this.GetDocumentTextRawLength() > 0))
                {
                    List <KeyValuePair <string, string> > AnnotationOutLinks = PdfTools.GetOutLinks();

                    // TODO: Implement extraction of text that underlies the link annotation

                    foreach (KeyValuePair <string, string> AnnotationOutLinkPair in AnnotationOutLinks)
                    {
                        MacroscopeHyperlinkOut HyperlinkOut = null;
                        string AnnotationOutLinkUrlAbs;

                        AnnotationOutLinkUrlAbs = MacroscopeHttpUrlUtils.MakeUrlAbsolute(
                            BaseHref: this.BaseHref,
                            BaseUrl: this.DocUrl,
                            Url: AnnotationOutLinkPair.Key
                            );

                        HyperlinkOut = this.HyperlinksOut.Add(LinkType: MacroscopeConstants.HyperlinkType.PDF, UrlTarget: AnnotationOutLinkUrlAbs);
                        HyperlinkOut.SetRawTargetUrl(TargetUrl: AnnotationOutLinkUrlAbs);
                        HyperlinkOut.SetAltText(AnnotationOutLinkPair.Value);
                        HyperlinkOut.SetAnchorText(AnnotationOutLinkPair.Value);
                        HyperlinkOut.SetTitle(AnnotationOutLinkPair.Value);
                        HyperlinkOut.SetDoFollow();
                        HyperlinkOut.SetMethod(Method: "GET");

                        this.AddDocumentOutlink(AbsoluteUrl: AnnotationOutLinkUrlAbs, LinkType: MacroscopeConstants.InOutLinkType.PDF, Follow: true);
                    }
                }

                /** ---------------------------------------------------------------- **/
            }

            if (ResponseErrorCondition != null)
            {
                this.ProcessErrorCondition(ResponseErrorCondition);
            }
        }
        /** -------------------------------------------------------------------- **/

        private async Task _ProcessCssPage()
        {
            MacroscopeHttpTwoClient         Client   = this.DocCollection.GetJobMaster().GetHttpClient();
            MacroscopeHttpTwoClientResponse Response = null;
            string ResponseErrorCondition            = null;

            DebugMsg(string.Format("ProcessCssPage: {0}", ""));

            try
            {
                Response = await Client.Get(
                    this.GetUri(),
                    this.ConfigureCssPageRequestHeadersCallback,
                    this.PostProcessRequestHttpHeadersCallback
                    );
            }
            catch (MacroscopeDocumentException ex)
            {
                this.DebugMsg(string.Format("_ProcessCssPage :: MacroscopeDocumentException: {0}", ex.Message));
                ResponseErrorCondition = ex.Message;
                this.SetStatusCode(HttpStatusCode.BadRequest);
                this.AddRemark("_ProcessCssPage", ex.Message);
            }
            catch (Exception ex)
            {
                this.DebugMsg(string.Format("_ProcessCssPage :: Exception: {0}", ex.Message));
                ResponseErrorCondition = ex.Message;
                this.SetStatusCode(HttpStatusCode.BadRequest);
                this.AddRemark("_ProcessCssPage", ex.Message);
            }

            if (Response != null)
            {
                string RawData = "";

                this.ProcessResponseHttpHeaders(Response: Response);

                /** Get Response Body ---------------------------------------------- **/

                try
                {
                    DebugMsg(string.Format("MIME TYPE: {0}", this.MimeType));

                    RawData = Response.GetContentAsString();

                    this.SetContentLength(Length: RawData.Length); // May need to find bytes length

                    this.SetWasDownloaded(true);
                }
                catch (Exception ex)
                {
                    DebugMsg(string.Format("Exception: {0}", ex.Message));
                    this.SetStatusCode(HttpStatusCode.Ambiguous);
                    this.SetContentLength(Length: 0);
                }

                if (!string.IsNullOrEmpty(RawData))
                {
                    try
                    {
                        StylesheetParser CssParser     = new StylesheetParser();
                        Stylesheet       CssStylesheet = CssParser.Parse(RawData);
                        this.ProcessCssOutlinks(CssStylesheet: CssStylesheet);
                    }
                    catch (Exception ex)
                    {
                        this.DebugMsg(string.Format("ProcessHtmlAttributeCssLinks: {0}", ex.Message));
                        this.AddRemark("ProcessHtmlAttributeCssLinks", ex.Message);
                    }
                }
                else
                {
                    DebugMsg(string.Format("ProcessCssPage: ERROR: {0}", this.GetUrl()));
                }

                /** Custom Filters ------------------------------------------------- **/

                if (!string.IsNullOrEmpty(RawData))
                {
                    if (
                        MacroscopePreferencesManager.GetCustomFiltersEnable() &&
                        MacroscopePreferencesManager.GetCustomFiltersApplyToCss())
                    {
                        MacroscopeCustomFilters CustomFilter = this.DocCollection.GetJobMaster().GetCustomFilter();

                        if ((CustomFilter != null) && (CustomFilter.IsEnabled()))
                        {
                            this.ProcessGenericCustomFiltered(
                                CustomFilter: CustomFilter,
                                GenericText: RawData
                                );
                        }
                    }
                }

                /** Data Extractors ------------------------------------------------ **/

                if (!string.IsNullOrEmpty(RawData))
                {
                    if (
                        MacroscopePreferencesManager.GetDataExtractorsEnable() &&
                        MacroscopePreferencesManager.GetDataExtractorsApplyToCss())
                    {
                        this.ProcessGenericDataExtractors(GenericText: RawData);
                    }
                }

                /** Title ---------------------------------------------------------- **/

                {
                    MatchCollection reMatches     = Regex.Matches(this.DocUrl, "/([^/]+)$");
                    string          DocumentTitle = null;
                    foreach (Match match in reMatches)
                    {
                        if (match.Groups[1].Value.Length > 0)
                        {
                            DocumentTitle = match.Groups[1].Value.ToString();
                            break;
                        }
                    }
                    if (DocumentTitle != null)
                    {
                        this.SetTitle(DocumentTitle, MacroscopeConstants.TextProcessingMode.NO_PROCESSING);
                        DebugMsg(string.Format("TITLE: {0}", this.GetTitle()));
                    }
                    else
                    {
                        DebugMsg(string.Format("TITLE: {0}", "MISSING"));
                    }
                }
            }

            if (ResponseErrorCondition != null)
            {
                this.ProcessErrorCondition(ResponseErrorCondition);
            }
        }