예제 #1
0
        public async Task TestHttpTwoClientGet()
        {
            MacroscopeHttpTwoClient Client = new MacroscopeHttpTwoClient();
            List <Uri> UrlList             = new List <Uri>();

            UrlList.Add(new Uri("https://nazuke.github.io/robots.txt"));

            foreach (Uri Url in UrlList)
            {
                this.DebugMsg(string.Format("Url: {0}", Url));

                MacroscopeHttpTwoClientResponse ClientResponse = await Client.Get(
                    Url,
                    this.PreProcessHeadRequestHeadersCallback,
                    this.PostProcessRequestHttpHeadersCallback
                    );

                HttpResponseMessage Response = ClientResponse.GetResponse();

                this.DebugMsg(string.Format("Response.Version: {0}", Response.Version));

                Assert.AreEqual(200, (int)Response.StatusCode);

                Assert.Greater(ClientResponse.GetContentAsString().Length, 0);
            }

            return;
        }
        /**************************************************************************/

        public async Task <Image> LoadImageFromUri(MacroscopeJobMaster JobMaster, Uri TargetUri)
        {
            MacroscopeHttpTwoClient         Client   = JobMaster.GetHttpClient();
            MacroscopeHttpTwoClientResponse Response = null;
            Image LoadedImage = null;

            try
            {
                Response = await Client.Get(
                    TargetUri,
                    this.ConfigureHeadRequestHeadersCallback,
                    this.PostProcessRequestHttpHeadersCallback
                    );
            }
            catch (MacroscopeDocumentException ex)
            {
                this.DebugMsg(string.Format("MacroscopeDocumentException: {0}", ex.Message));
                this.DebugMsg(string.Format("MacroscopeDocumentException: {0}", TargetUri.ToString()));
            }
            catch (Exception ex)
            {
                this.DebugMsg(string.Format("Exception: {0}", ex.Message));
                this.DebugMsg(string.Format("Exception: {0}", TargetUri.ToString()));
            }

            if (Response != null)
            {
                try
                {
                    string ImageFilename = Path.GetTempFileName();
                    byte[] ByteData      = Response.GetContentAsBytes();

                    using (FileStream ImageStream = File.Create(ImageFilename))
                    {
                        foreach (byte b in ByteData)
                        {
                            ImageStream.WriteByte(b);
                        }
                        ImageStream.Close();
                    }

                    if (File.Exists(ImageFilename))
                    {
                        TemporaryFiles.Add(ImageFilename);
                        LoadedImage = Image.FromFile(ImageFilename);
                    }
                }
                catch (Exception ex)
                {
                    this.DebugMsg(string.Format("Exception: {0}", ex.Message));
                }
            }

            return(LoadedImage);
        }
예제 #3
0
        /** -------------------------------------------------------------------- **/

        private async Task <byte[]> _LoadMemoryStreamFromUrl(MacroscopeJobMaster JobMaster, Uri TargetUri)
        {
            MacroscopeHttpTwoClient         Client   = JobMaster.GetHttpClient();
            MacroscopeHttpTwoClientResponse Response = null;

            byte[] ByteData = null;

            try
            {
                Response = await Client.Get(
                    TargetUri,
                    this.ConfigureHeadRequestHeadersCallback,
                    this.PostProcessRequestHttpHeadersCallback
                    );
            }
            catch (MacroscopeDocumentException ex)
            {
                this.DebugMsg(string.Format("MacroscopeDocumentException: {0}", ex.Message));
                this.DebugMsg(string.Format("MacroscopeDocumentException: {0}", TargetUri.ToString()));
            }
            catch (Exception ex)
            {
                this.DebugMsg(string.Format("Exception: {0}", ex.Message));
                this.DebugMsg(string.Format("Exception: {0}", TargetUri.ToString()));
            }

            if (Response != null)
            {
                try
                {
                    ByteData = Response.GetContentAsBytes();
                }
                catch (Exception ex)
                {
                    this.DebugMsg(string.Format("Exception: {0}", ex.Message));
                }
            }
            else
            {
                this.DebugMsg("NULL");
            }

            return(ByteData);
        }
        /** -------------------------------------------------------------------- **/

        private async Task _ProcessXmlPage()
        {
            XmlDocument                     XmlDoc   = null;
            MacroscopeHttpTwoClient         Client   = this.DocCollection.GetJobMaster().GetHttpClient();
            MacroscopeHttpTwoClientResponse Response = null;
            string ResponseErrorCondition            = null;

            try
            {
                Response = await Client.Get(
                    this.GetUri(),
                    this.ConfigureXmlPageRequestHeadersCallback,
                    this.PostProcessRequestHttpHeadersCallback
                    );
            }
            catch (MacroscopeDocumentException ex)
            {
                this.DebugMsg(string.Format("_ProcessXmlPage :: MacroscopeDocumentException: {0}", ex.Message));
                ResponseErrorCondition = ex.Message;
                this.SetStatusCode(HttpStatusCode.BadRequest);
                this.AddRemark("_ProcessXmlPage", ex.Message);
            }
            catch (Exception ex)
            {
                this.DebugMsg(string.Format("_ProcessXmlPage :: Exception: {0}", ex.Message));
                ResponseErrorCondition = ex.Message;
                this.SetStatusCode(HttpStatusCode.BadRequest);
                this.AddRemark("_ProcessXmlPage", ex.Message);
            }

            if (Response != null)
            {
                string RawData = "";

                this.ProcessResponseHttpHeaders(Response: Response);

                // Get Response Body
                try
                {
                    DebugMsg(string.Format("MIME TYPE: {0}", this.MimeType));

                    RawData = Response.GetContentAsString();

                    this.SetContentLength(Length: RawData.Length); // May need to find bytes length

                    this.SetWasDownloaded(true);

                    this.SetChecksum(RawData);
                }
                catch (Exception ex)
                {
                    DebugMsg(string.Format("Exception: {0}", ex.Message));
                    this.SetStatusCode(HttpStatusCode.BadRequest);
                    RawData = "";
                    this.SetContentLength(Length: 0);
                }

                if (!string.IsNullOrEmpty(RawData))
                {
                    XmlDoc = new XmlDocument();

                    try
                    {
                        XmlDoc.LoadXml(RawData);
                    }
                    catch (XmlException ex)
                    {
                        DebugMsg(string.Format("XmlException: {0}", ex.Message));
                    }
                    catch (Exception ex)
                    {
                        DebugMsg(string.Format("Exception: {0}", ex.Message));
                    }

                    DebugMsg(string.Format("XmlDoc: {0}", XmlDoc));
                }
                else
                {
                    DebugMsg(string.Format("RawData: {0}", "EMPTY"));
                }

                /** Custom Filters ------------------------------------------------- **/

                if (!string.IsNullOrEmpty(RawData))
                {
                    if (
                        MacroscopePreferencesManager.GetCustomFiltersEnable() &&
                        MacroscopePreferencesManager.GetCustomFiltersApplyToXml())
                    {
                        MacroscopeCustomFilters CustomFilter = this.DocCollection.GetJobMaster().GetCustomFilter();

                        if ((CustomFilter != null) && (CustomFilter.IsEnabled()))
                        {
                            this.ProcessGenericCustomFiltered(
                                CustomFilter: CustomFilter,
                                GenericText: RawData
                                );
                        }
                    }
                }

                /** Data Extractors ------------------------------------------------ **/

                if (!string.IsNullOrEmpty(RawData))
                {
                    if (
                        MacroscopePreferencesManager.GetDataExtractorsEnable() &&
                        MacroscopePreferencesManager.GetDataExtractorsApplyToXml())
                    {
                        this.ProcessGenericDataExtractors(GenericText: RawData);
                    }
                }

                /** ---------------------------------------------------------------- **/

                if ((XmlDoc != null) && (XmlDoc.DocumentElement != null))
                {
                    if (this.DetectSitemapXmlDocument(XmlDoc))
                    {
                        DebugMsg(string.Format("ProcessXmlPage: {0} :: {1}", "SITEMAP DETECTED", this.GetUrl()));
                        this.SetDocumentType(Type: MacroscopeConstants.DocumentType.SITEMAPXML);
                        this.ProcessSitemapXmlOutlinks(XmlDoc);
                    }
                }

                /** ---------------------------------------------------------------- **/

                if (RawData != null)
                {
                    this.SetDocumentText(Text: RawData);
                }

                /** ---------------------------------------------------------------- **/
            }

            if (ResponseErrorCondition != null)
            {
                this.ProcessErrorCondition(ResponseErrorCondition);
            }
        }
        /** -------------------------------------------------------------------- **/

        private async Task _ProcessJavascriptPage()
        {
            MacroscopeHttpTwoClient         Client   = this.DocCollection.GetJobMaster().GetHttpClient();
            MacroscopeHttpTwoClientResponse Response = null;
            string ResponseErrorCondition            = null;

            try
            {
                Response = await Client.Get(
                    this.GetUri(),
                    this.ConfigureJavascriptPageRequestHeadersCallback,
                    this.PostProcessRequestHttpHeadersCallback
                    );
            }
            catch (MacroscopeDocumentException ex)
            {
                this.DebugMsg(string.Format("_ProcessJavascriptPage :: MacroscopeDocumentException: {0}", ex.Message));
                ResponseErrorCondition = ex.Message;
                this.SetStatusCode(HttpStatusCode.BadRequest);
                this.AddRemark("_ProcessJavascriptPage", ex.Message);
            }
            catch (Exception ex)
            {
                this.DebugMsg(string.Format("_ProcessJavascriptPage :: Exception: {0}", ex.Message));
                ResponseErrorCondition = ex.Message;
                this.SetStatusCode(HttpStatusCode.BadRequest);
                this.AddRemark("_ProcessJavascriptPage", ex.Message);
            }

            if (Response != null)
            {
                string RawData = "";

                this.ProcessResponseHttpHeaders(Response: Response);

                /** Get Response Body ---------------------------------------------- **/

                try
                {
                    DebugMsg(string.Format("MIME TYPE: {0}", this.MimeType));

                    /*
                     * Encoding encUseEncoding = Encoding.UTF8;
                     *
                     * if( this.GetCharacterEncoding() != null )
                     * {
                     * encUseEncoding = this.GetCharacterEncoding();
                     * }
                     * else
                     * {
                     * encUseEncoding = this.JavascriptSniffCharset();
                     * }
                     */

                    RawData = Response.GetContentAsString();
                    this.SetContentLength(Length: RawData.Length); // May need to find bytes length
                    this.SetChecksum(RawData);
                }
                catch (Exception ex)
                {
                    DebugMsg(string.Format("Exception: {0}", ex.Message));
                    this.SetStatusCode(HttpStatusCode.Ambiguous);
                    RawData = "";
                    this.SetContentLength(Length: 0);
                }

                /** Custom Filters ------------------------------------------------- **/

                if (!string.IsNullOrEmpty(RawData))
                {
                    if (
                        MacroscopePreferencesManager.GetCustomFiltersEnable() &&
                        MacroscopePreferencesManager.GetCustomFiltersApplyToJavascripts())
                    {
                        MacroscopeCustomFilters CustomFilter = this.DocCollection.GetJobMaster().GetCustomFilter();

                        if ((CustomFilter != null) && (CustomFilter.IsEnabled()))
                        {
                            this.ProcessGenericCustomFiltered(
                                CustomFilter: CustomFilter,
                                GenericText: RawData
                                );
                        }
                    }
                }

                /** Data Extractors ------------------------------------------------ **/

                if (!string.IsNullOrEmpty(RawData))
                {
                    if (
                        MacroscopePreferencesManager.GetDataExtractorsEnable() &&
                        MacroscopePreferencesManager.GetDataExtractorsApplyToJavascripts())
                    {
                        this.ProcessGenericDataExtractors(GenericText: RawData);
                    }
                }

                /** Title ---------------------------------------------------------- **/

                {
                    MatchCollection reMatches     = Regex.Matches(this.DocUrl, "/([^/]+)$");
                    string          DocumentTitle = null;
                    foreach (Match match in reMatches)
                    {
                        if (match.Groups[1].Value.Length > 0)
                        {
                            DocumentTitle = match.Groups[1].Value.ToString();
                            break;
                        }
                    }
                    if (DocumentTitle != null)
                    {
                        this.SetTitle(DocumentTitle, MacroscopeConstants.TextProcessingMode.NO_PROCESSING);
                        DebugMsg(string.Format("TITLE: {0}", this.GetTitle()));
                    }
                    else
                    {
                        DebugMsg(string.Format("TITLE: {0}", "MISSING"));
                    }
                }

                /** ---------------------------------------------------------------- **/
            }

            if (ResponseErrorCondition != null)
            {
                this.ProcessErrorCondition(ResponseErrorCondition);
            }
        }
        /** -------------------------------------------------------------------- **/

        private async Task _ProcessPdfPage()
        {
            MacroscopeHttpTwoClient         Client         = this.DocCollection.GetJobMaster().GetHttpClient();
            MacroscopeHttpTwoClientResponse ClientResponse = null;
            string ResponseErrorCondition = null;

            try
            {
                ClientResponse = await Client.Get(
                    this.GetUri(),
                    this.ConfigurePdfPageRequestHeadersCallback,
                    this.PostProcessRequestHttpHeadersCallback
                    );
            }
            catch (MacroscopeDocumentException ex)
            {
                this.DebugMsg(string.Format("_ProcessPdfPage :: MacroscopeDocumentException: {0}", ex.Message));
                ResponseErrorCondition = ex.Message;
                this.AddRemark("_ProcessPdfPage", ex.Message);
            }
            catch (Exception ex)
            {
                this.DebugMsg(string.Format("_ProcessPdfPage :: Exception: {0}", ex.Message));
                ResponseErrorCondition = ex.Message;
                this.AddRemark("_ProcessPdfPage", ex.Message);
            }

            if (ClientResponse != null)
            {
                MacroscopePdfTools PdfTools;

                this.ProcessResponseHttpHeaders(Response: ClientResponse);

                {                              // Probe Locale
                  //this.Locale = "en"; // Implement locale probing
                    this.Locale = "x-default"; // Implement locale probing
                    this.SetHreflang(HrefLangLocale: this.Locale, Url: this.DocUrl);
                }

                { // Canonical
                    this.Canonical = this.DocUrl;
                    this.DebugMsg(string.Format("CANONICAL: {0}", this.Canonical));
                }

                /** Get Response Body ---------------------------------------------- **/

                try
                {
                    byte[] RawData = ClientResponse.GetContentAsBytes();
                    this.SetContentLength(Length: RawData.Length);

                    PdfTools = new MacroscopePdfTools(PdfData: RawData);

                    if (PdfTools.GetHasError())
                    {
                        this.AddRemark("CORRUPT_PDF", Observation: PdfTools.GetErrorMessage());
                    }

                    this.SetWasDownloaded(true);
                }
                catch (Exception ex)
                {
                    this.DebugMsg(string.Format("Exception: {0}", ex.Message));
                    this.SetStatusCode(HttpStatusCode.BadRequest);
                    PdfTools = null;
                    this.SetContentLength(Length: 0);
                }

                /** Title ---------------------------------------------------------- **/

                if (PdfTools != null)
                {
                    string Text = PdfTools.GetTitle();

                    if (!string.IsNullOrEmpty(Text))
                    {
                        this.SetTitle(Text, MacroscopeConstants.TextProcessingMode.NO_PROCESSING);
                        this.DebugMsg(string.Format("TITLE: {0}", this.GetTitle()));
                    }
                    else
                    {
                        this.DebugMsg(string.Format("TITLE: {0}", "MISSING"));
                    }
                }

                /** Author --------------------------------------------------------- **/

                if (PdfTools != null)
                {
                    string Text = PdfTools.GetAuthor();

                    if (!string.IsNullOrEmpty(Text))
                    {
                        this.SetAuthor(AuthorText: Text, ProcessingMode: MacroscopeConstants.TextProcessingMode.NO_PROCESSING);
                        this.DebugMsg(string.Format("AUTHOR: {0}", this.GetAuthor()));
                    }
                    else
                    {
                        this.DebugMsg(string.Format("AUTHOR: {0}", "MISSING"));
                    }
                }

                /** Description ---------------------------------------------------- **/

                if (PdfTools != null)
                {
                    string Text = PdfTools.GetDescription();

                    if (!string.IsNullOrEmpty(Text))
                    {
                        this.SetDescription(Text, MacroscopeConstants.TextProcessingMode.NO_PROCESSING);
                        this.DebugMsg(string.Format("TITLE: {0}", this.GetDescription()));
                    }
                    else
                    {
                        this.DebugMsg(string.Format("TITLE: {0}", "MISSING"));
                    }
                }

                /** Metadata Keywords ---------------------------------------------- **/

                if (PdfTools != null)
                {
                    string Text = PdfTools.GetKeywords();

                    if (!string.IsNullOrEmpty(Text))
                    {
                        this.SetKeywords(KeywordsText: Text);
                        this.DebugMsg(string.Format("KEYWORDS: {0}", this.GetKeywords()));
                    }
                    else
                    {
                        this.DebugMsg(string.Format("KEYWORDS: {0}", "MISSING"));
                    }
                }

                /** Body Text ------------------------------------------------------ **/

                if (PdfTools != null)
                {
                    this.SetBodyText(Text: "");

                    if (PdfTools.GetHasError())
                    {
                        this.AddRemark("PDF_ERROR", Observation: PdfTools.GetErrorMessage());
                    }
                    else
                    {
                        string Text = PdfTools.GetTextAsString();
                        if (!string.IsNullOrEmpty(Text))
                        {
                            this.SetDocumentText(Text: Text);
                            this.SetBodyText(Text: Text);
                        }
                    }

                    this.DebugMsg(string.Format("BODY TEXT: {0}", this.GetBodyTextRaw()));
                }

                /** Data Extractors ------------------------------------------------ **/

                if (!string.IsNullOrEmpty(this.GetBodyTextRaw()))
                {
                    if (MacroscopePreferencesManager.GetDataExtractorsEnable())
                    {
                        if (MacroscopePreferencesManager.GetDataExtractorsApplyToPdf())
                        {
                            string Text = this.GetBodyTextRaw();
                            this.ProcessGenericDataExtractors(GenericText: Text);
                        }
                    }
                }

                /** Out Links Text ------------------------------------------------- **/

                if (this.GetDocumentTextRawLength() > 0)
                {
                    if (this.GetIsInternal())
                    {
                        string Text = this.GetDocumentTextRaw();
                        this.ProcessPureTextOutlinks(TextDoc: Text, LinkType: MacroscopeConstants.InOutLinkType.PDF);
                    }
                }

                /** Out Links in Annotations --------------------------------------- **/

                if (this.GetIsInternal() && (this.GetDocumentTextRawLength() > 0))
                {
                    List <KeyValuePair <string, string> > AnnotationOutLinks = PdfTools.GetOutLinks();

                    // TODO: Implement extraction of text that underlies the link annotation

                    foreach (KeyValuePair <string, string> AnnotationOutLinkPair in AnnotationOutLinks)
                    {
                        MacroscopeHyperlinkOut HyperlinkOut = null;
                        string AnnotationOutLinkUrlAbs;

                        AnnotationOutLinkUrlAbs = MacroscopeHttpUrlUtils.MakeUrlAbsolute(
                            BaseHref: this.BaseHref,
                            BaseUrl: this.DocUrl,
                            Url: AnnotationOutLinkPair.Key
                            );

                        HyperlinkOut = this.HyperlinksOut.Add(LinkType: MacroscopeConstants.HyperlinkType.PDF, UrlTarget: AnnotationOutLinkUrlAbs);
                        HyperlinkOut.SetRawTargetUrl(TargetUrl: AnnotationOutLinkUrlAbs);
                        HyperlinkOut.SetAltText(AnnotationOutLinkPair.Value);
                        HyperlinkOut.SetAnchorText(AnnotationOutLinkPair.Value);
                        HyperlinkOut.SetTitle(AnnotationOutLinkPair.Value);
                        HyperlinkOut.SetDoFollow();
                        HyperlinkOut.SetMethod(Method: "GET");

                        this.AddDocumentOutlink(AbsoluteUrl: AnnotationOutLinkUrlAbs, LinkType: MacroscopeConstants.InOutLinkType.PDF, Follow: true);
                    }
                }

                /** ---------------------------------------------------------------- **/
            }

            if (ResponseErrorCondition != null)
            {
                this.ProcessErrorCondition(ResponseErrorCondition);
            }
        }
예제 #7
0
        /** -------------------------------------------------------------------- **/

        private async Task _ProcessTextPage()
        {
            List <string>                   TextDoc  = new List <string>();
            MacroscopeHttpTwoClient         Client   = this.DocCollection.GetJobMaster().GetHttpClient();
            MacroscopeHttpTwoClientResponse Response = null;
            string ResponseErrorCondition            = null;

            try
            {
                Response = await Client.Get(
                    this.GetUri(),
                    this.ConfigureTextPageRequestHeadersCallback,
                    this.PostProcessRequestHttpHeadersCallback
                    );
            }
            catch (MacroscopeDocumentException ex)
            {
                this.DebugMsg(string.Format("_ProcessTextPage :: MacroscopeDocumentException: {0}", ex.Message));
                ResponseErrorCondition = ex.Message;
                this.SetStatusCode(HttpStatusCode.BadRequest);
                this.AddRemark("_ProcessTextPage", ex.Message);
            }
            catch (Exception ex)
            {
                this.DebugMsg(string.Format("_ProcessTextPage :: Exception: {0}", ex.Message));
                ResponseErrorCondition = ex.Message;
                this.SetStatusCode(HttpStatusCode.BadRequest);
                this.AddRemark("_ProcessTextPage", ex.Message);
            }

            if (Response != null)
            {
                string RawData = "";

                this.ProcessResponseHttpHeaders(Response: Response);

                /** Get Response Body ---------------------------------------------- **/

                try
                {
                    DebugMsg(string.Format("MIME TYPE: {0}", this.MimeType));

                    RawData = Response.GetContentAsString();

                    this.SetContentLength(Length: RawData.Length); // May need to find bytes length

                    this.SetWasDownloaded(true);

                    this.SetChecksum(RawData);
                }
                catch (Exception ex)
                {
                    DebugMsg(string.Format("Exception: {0}", ex.Message));
                    this.SetStatusCode(HttpStatusCode.BadRequest);
                    RawData = "";
                    this.SetContentLength(Length: 0);
                }

                /** ---------------------------------------------------------------- **/

                if (!string.IsNullOrEmpty(RawData))
                {
                    string[] Lines = Regex.Split(RawData, @"[\r\n]+");
                    TextDoc = Lines.ToList();

                    DebugMsg(string.Format("TextDoc: {0}", TextDoc.Count));
                }
                else
                {
                    DebugMsg(string.Format("RawData: {0}", "EMPTY"));
                }

                /** Custom Filters ------------------------------------------------- **/

                if (!string.IsNullOrEmpty(RawData))
                {
                    if (
                        MacroscopePreferencesManager.GetCustomFiltersEnable() &&
                        MacroscopePreferencesManager.GetCustomFiltersApplyToText())
                    {
                        MacroscopeCustomFilters CustomFilter = this.DocCollection.GetJobMaster().GetCustomFilter();

                        if ((CustomFilter != null) && (CustomFilter.IsEnabled()))
                        {
                            this.ProcessGenericCustomFiltered(
                                CustomFilter: CustomFilter,
                                GenericText: RawData
                                );
                        }
                    }
                }

                /** Data Extractors ------------------------------------------------ **/

                if (!string.IsNullOrEmpty(RawData))
                {
                    if (
                        MacroscopePreferencesManager.GetDataExtractorsEnable() &&
                        MacroscopePreferencesManager.GetDataExtractorsApplyToText())
                    {
                        this.ProcessGenericDataExtractors(GenericText: RawData);
                    }
                }

                /** Process Text Document ------------------------------------------ **/

                if ((TextDoc != null) && (TextDoc.Count > 0))
                {
                    this.SetDocumentText(Text: string.Join(Environment.NewLine, TextDoc));

                    if (this.GetPath().EndsWith("robots.txt", StringComparison.InvariantCultureIgnoreCase))
                    {
                        long?TextSize          = this.GetContentLength();
                        long?RobotsMaxTextSize = 1024 * 512;

                        this.ProcessRobotsTextOutlinks(TextDoc: TextDoc);

                        if (this.DetectSitemapTextDocument(TextDoc: TextDoc))
                        {
                            DebugMsg(string.Format("ProcessTextPage: {0} :: {1}", "SITEMAP DETECTED", this.GetUrl()));
                            this.SetDocumentType(Type: MacroscopeConstants.DocumentType.SITEMAPTEXT);
                            this.ProcessSitemapTextOutlinks(TextDoc: TextDoc);
                        }

                        if (TextSize > RobotsMaxTextSize)
                        {
                            this.AddRemark("ROBOTS_TOO_BIG", "Robots.txt is larger than 512KB");
                        }
                    }
                    else
                    {
                        if (this.GetIsInternal())
                        {
                            this.ProcessPureTextOutlinks(TextDoc: TextDoc, LinkType: MacroscopeConstants.InOutLinkType.PURETEXT);
                        }
                    }
                }
                else
                {
                    this.SetDocumentText(Text: "");
                }

                /** ---------------------------------------------------------------- **/
            }

            if (ResponseErrorCondition != null)
            {
                this.ProcessErrorCondition(ResponseErrorCondition);
            }
        }
예제 #8
0
        /** -------------------------------------------------------------------- **/

        private async Task _ProcessVideoPage()
        {
            MacroscopeHttpTwoClient         Client   = this.DocCollection.GetJobMaster().GetHttpClient();
            MacroscopeHttpTwoClientResponse Response = null;
            string ResponseErrorCondition            = null;

            try
            {
                Response = await Client.Get(
                    this.GetUri(),
                    this.ConfigureVideoPageRequestHeadersCallback,
                    this.PostProcessRequestHttpHeadersCallback
                    );
            }
            catch (MacroscopeDocumentException ex)
            {
                this.DebugMsg(string.Format("_ProcessVideoPage :: MacroscopeDocumentException: {0}", ex.Message));
                ResponseErrorCondition = ex.Message;
                this.SetStatusCode(HttpStatusCode.BadRequest);
                this.AddRemark("_ProcessVideoPage", ex.Message);
            }
            catch (Exception ex)
            {
                this.DebugMsg(string.Format("_ProcessVideoPage :: Exception: {0}", ex.Message));
                ResponseErrorCondition = ex.Message;
                this.SetStatusCode(HttpStatusCode.BadRequest);
                this.AddRemark("_ProcessVideoPage", ex.Message);
            }

            if (Response != null)
            {
                this.ProcessResponseHttpHeaders(Response: Response);

                { // Title
                    MatchCollection reMatches     = Regex.Matches(this.DocUrl, "/([^/]+)$");
                    string          DocumentTitle = null;

                    foreach (Match match in reMatches)
                    {
                        if (match.Groups[1].Value.Length > 0)
                        {
                            DocumentTitle = match.Groups[1].Value.ToString();
                            break;
                        }
                    }

                    if (DocumentTitle != null)
                    {
                        this.SetTitle(DocumentTitle, MacroscopeConstants.TextProcessingMode.NO_PROCESSING);
                        DebugMsg(string.Format("TITLE: {0}", this.GetTitle()));
                    }
                    else
                    {
                        DebugMsg(string.Format("TITLE: {0}", "MISSING"));
                    }
                }
            }

            if (ResponseErrorCondition != null)
            {
                this.ErrorCondition = ResponseErrorCondition;
            }
        }
        /** -------------------------------------------------------------------- **/

        private async Task _ProcessCssPage()
        {
            MacroscopeHttpTwoClient         Client   = this.DocCollection.GetJobMaster().GetHttpClient();
            MacroscopeHttpTwoClientResponse Response = null;
            string ResponseErrorCondition            = null;

            DebugMsg(string.Format("ProcessCssPage: {0}", ""));

            try
            {
                Response = await Client.Get(
                    this.GetUri(),
                    this.ConfigureCssPageRequestHeadersCallback,
                    this.PostProcessRequestHttpHeadersCallback
                    );
            }
            catch (MacroscopeDocumentException ex)
            {
                this.DebugMsg(string.Format("_ProcessCssPage :: MacroscopeDocumentException: {0}", ex.Message));
                ResponseErrorCondition = ex.Message;
                this.SetStatusCode(HttpStatusCode.BadRequest);
                this.AddRemark("_ProcessCssPage", ex.Message);
            }
            catch (Exception ex)
            {
                this.DebugMsg(string.Format("_ProcessCssPage :: Exception: {0}", ex.Message));
                ResponseErrorCondition = ex.Message;
                this.SetStatusCode(HttpStatusCode.BadRequest);
                this.AddRemark("_ProcessCssPage", ex.Message);
            }

            if (Response != null)
            {
                string RawData = "";

                this.ProcessResponseHttpHeaders(Response: Response);

                /** Get Response Body ---------------------------------------------- **/

                try
                {
                    DebugMsg(string.Format("MIME TYPE: {0}", this.MimeType));

                    RawData = Response.GetContentAsString();

                    this.SetContentLength(Length: RawData.Length); // May need to find bytes length

                    this.SetWasDownloaded(true);
                }
                catch (Exception ex)
                {
                    DebugMsg(string.Format("Exception: {0}", ex.Message));
                    this.SetStatusCode(HttpStatusCode.Ambiguous);
                    this.SetContentLength(Length: 0);
                }

                if (!string.IsNullOrEmpty(RawData))
                {
                    try
                    {
                        StylesheetParser CssParser     = new StylesheetParser();
                        Stylesheet       CssStylesheet = CssParser.Parse(RawData);
                        this.ProcessCssOutlinks(CssStylesheet: CssStylesheet);
                    }
                    catch (Exception ex)
                    {
                        this.DebugMsg(string.Format("ProcessHtmlAttributeCssLinks: {0}", ex.Message));
                        this.AddRemark("ProcessHtmlAttributeCssLinks", ex.Message);
                    }
                }
                else
                {
                    DebugMsg(string.Format("ProcessCssPage: ERROR: {0}", this.GetUrl()));
                }

                /** Custom Filters ------------------------------------------------- **/

                if (!string.IsNullOrEmpty(RawData))
                {
                    if (
                        MacroscopePreferencesManager.GetCustomFiltersEnable() &&
                        MacroscopePreferencesManager.GetCustomFiltersApplyToCss())
                    {
                        MacroscopeCustomFilters CustomFilter = this.DocCollection.GetJobMaster().GetCustomFilter();

                        if ((CustomFilter != null) && (CustomFilter.IsEnabled()))
                        {
                            this.ProcessGenericCustomFiltered(
                                CustomFilter: CustomFilter,
                                GenericText: RawData
                                );
                        }
                    }
                }

                /** Data Extractors ------------------------------------------------ **/

                if (!string.IsNullOrEmpty(RawData))
                {
                    if (
                        MacroscopePreferencesManager.GetDataExtractorsEnable() &&
                        MacroscopePreferencesManager.GetDataExtractorsApplyToCss())
                    {
                        this.ProcessGenericDataExtractors(GenericText: RawData);
                    }
                }

                /** Title ---------------------------------------------------------- **/

                {
                    MatchCollection reMatches     = Regex.Matches(this.DocUrl, "/([^/]+)$");
                    string          DocumentTitle = null;
                    foreach (Match match in reMatches)
                    {
                        if (match.Groups[1].Value.Length > 0)
                        {
                            DocumentTitle = match.Groups[1].Value.ToString();
                            break;
                        }
                    }
                    if (DocumentTitle != null)
                    {
                        this.SetTitle(DocumentTitle, MacroscopeConstants.TextProcessingMode.NO_PROCESSING);
                        DebugMsg(string.Format("TITLE: {0}", this.GetTitle()));
                    }
                    else
                    {
                        DebugMsg(string.Format("TITLE: {0}", "MISSING"));
                    }
                }
            }

            if (ResponseErrorCondition != null)
            {
                this.ProcessErrorCondition(ResponseErrorCondition);
            }
        }