/// <summary>
        /// Process the given uri
        /// </summary>
        /// <param name="uri"></param>
        public void ProcessUri(Uri uri)
        {
            if (uri == null)
            {
                throw new Exception("Error : Uri cannot be null.");
            }
            try
            {
                KrawlContext.ErrorLogMethod(LOGTYPE.USERINFO, String.Format($"Processing Url:{uri.AbsoluteUri}"), null);
                string htmlContent = DownloadHtmlAndUpdateResources(uri);

                if (htmlContent != null)
                {
                    #region Initialise Html Document

                    KHtmlDocument htmlDocument = new KHtmlDocument();
                    htmlDocument.LoadHtml(htmlContent);
                    htmlDocument.IdentifyBaseTagAndSetValue();

                    #endregion

                    ProcessHtml(uri, htmlDocument);

                    #region Generate relative Url and call html callback function

                    if (KrawlContext.ProcessedHtmlCallBackMethod != null)
                    {
                        try
                        {
                            string       path    = KrawlerUtility.GenerateHtmlLocalPath(uri);
                            AssetDetails linkMap = new AssetDetails();
                            if (KrawlContext.Resources.UniqueWebpagesDictionary.TryGetValue(uri.AbsoluteUri, out linkMap))
                            {
                                linkMap.NewUrl = path;
                                KrawlContext.Resources.UniqueWebpagesDictionary[uri.AbsoluteUri] = linkMap;
                            }
                            KrawlContext.ProcessedHtmlCallBackMethod(path, htmlDocument.DocumentNode.InnerHtml);
                        }
                        catch (Exception ex)
                        {
                            KrawlContext.ErrorLogMethod(LOGTYPE.ERROR, "Error Message : Error Generating relative Url or in html callback function", ex);
                        }
                    }

                    #endregion
                }

                #region Resouces updated Method call

                try
                {
                    KrawlContext.UpdatedResoucesCallBackMethod(KrawlContext.Resources);
                }
                catch (Exception ex)
                {
                    KrawlContext.ErrorLogMethod(LOGTYPE.ERROR, $"Error Message : Error while updating the DB value of Url : {uri.AbsoluteUri}", ex);
                }

                #endregion

                KrawlContext.ErrorLogMethod(LOGTYPE.USERINFO, $"Processing Url : {uri.AbsoluteUri} completed.", null);
            }
            catch (Exception ex)
            {
                KrawlContext.ErrorLogMethod(LOGTYPE.USERINFO, $"Something went wrong while processing : {uri.AbsoluteUri}.", null);
                throw new Exception($"Error while processing the Url : {uri.AbsoluteUri}", ex);
            }
        }
 public void KrawlerUtilityTest()
 {
     var xyz = KrawlerUtility.GenerateFileLocalPath(new Uri("http://13.127.86.77/abc.css?sccss=1&ver=4.9.5"), ".css");
 }
Exemplo n.º 3
0
        public void ProcessTheResource(AssetDetails asset, FileType type, Uri uri)
        {
            if (asset == null)
            {
                throw new ArgumentNullException(nameof(asset));
            }
            if (String.IsNullOrEmpty(asset.LinkUrl))
            {
                throw new ArgumentException(nameof(asset.LinkUrl));
            }

            try
            {
                #region Download File

                string        url    = asset.LinkUrl;
                IRestResponse result = null;

                //Download the File
                result = HttpRequest.HttpRequestWithReadAndWriteTimeOut(uri, Context.Configuration.ReadAndWriteTimeOut, Context.Configuration.UserAgentString);


                //Check the StatuCode
                if (result.StatusCode.Equals(HttpStatusCode.OK))
                {
                    asset.ResponseStatusCode = HttpStatusCode.OK;
                }
                else
                {
                    asset.ResponseStatusCode = result.StatusCode;
                    Context.DownloadedFileCallBackMethod(asset, type, null, null);
                    throw new Exception(String.Format("Error downloading the File : {0}, Status Code : {1}",
                                                      asset.LinkUrl, result.StatusCode));
                }

                #endregion

                #region Process File

                var byteArray = result.RawBytes;

                if (type.Equals(FileType.STYLE) && Context.Configuration.CrawlCssEnabled)
                {
                    //CrawlCss
                    //and also update the new files found and push it to queue
                    String cssText = result.Content;
                    cssText   = CrawlCss(cssText, uri);
                    byteArray = Encoding.Default.GetBytes(cssText);
                }

                var extension = KrawlerUtility.GetExtensionFromContentType(uri.LocalPath, result.ContentType);
                if (String.IsNullOrEmpty(extension))
                {
                    if (type.Equals(FileType.SCRIPT))
                    {
                        extension = ".js";
                    }
                    else if (type.Equals(FileType.STYLE))
                    {
                        extension = ".css";
                    }
                }
                string filePath = KrawlerUtility.GenerateFileLocalPath(uri, extension);
                if (String.IsNullOrEmpty(filePath))
                {
                    filePath = uri.LocalPath;
                    Context.ErrorLogMethod(LOGTYPE.INFORMATION, String.Format("file path generate was nulll or empty for url : {0}", uri.LocalPath), null);
                }

                #endregion

                #region Save File

                //Callback method to save the file
                asset.NewUrl = filePath;
                Context.DownloadedFileCallBackMethod(asset, type, byteArray, result.ContentType);


                #endregion
            }
            catch (Exception ex)
            {
                Context.ErrorLogMethod(LOGTYPE.INFORMATION, String.Format("Error downloading Url: {0}", asset.LinkUrl), ex);
            }
        }