Esempio n. 1
0
        private static void saveCrawlInDatabase(crawlingResult crawledResult)
        {
            lock (_lockDatabaseAdd)
            {
                using (var context = new DBCrawling())
                {
                    crawledResult.UrlHash = crawledResult.Url.ComputeHashOfOurSystem();
                    crawledResult.HostUrl = crawledResult.Url.returnRootUrl();
                    var vrResult = context.tblMainUrls.SingleOrDefault(b => b.UrlHash == crawledResult.UrlHash);
                    crawledResult.ParentUrlHash = crawledResult.ParentUrlHash.ComputeHashOfOurSystem();

                    if (crawledResult.blcrawlSuccess == true)
                    {
                        crawledResult.IsCrawled = true;
                        if (!string.IsNullOrEmpty(crawledResult.SourceCode))
                        {
                            double dblOriginalSourceCodeLenght = crawledResult.SourceCode.Length;
                            crawledResult.SourceCode         = crawledResult.SourceCode.CompressString();
                            crawledResult.CompressionPercent = Convert.ToByte(
                                Math.Floor(
                                    ((crawledResult.SourceCode.Length.ToDouble() / dblOriginalSourceCodeLenght) * 100))
                                );
                        }
                        crawledResult.CrawlTryCounter = 0;
                    }


                    tblMainUrl finalObject = crawledResult.converToBaseMainUrlClass();

                    //this approach brings extra overhead to the server with deleting from server first
                    //therefore will use copy properties of object to another object without changing reference
                    //if (vrResult != null)
                    //{
                    //    context.tblMainUrls.Remove(vrResult);
                    //    context.SaveChanges();
                    //}



                    if (vrResult != null)
                    {
                        finalObject.DiscoverDate    = vrResult.DiscoverDate;
                        finalObject.LinkDepthLevel  = vrResult.LinkDepthLevel;
                        finalObject.CrawlTryCounter = vrResult.CrawlTryCounter;
                        if (crawledResult.blcrawlSuccess == false)
                        {
                            finalObject.CrawlTryCounter++;
                        }
                        finalObject.CopyProperties(vrResult);
                    }
                    else
                    {
                        context.tblMainUrls.Add(finalObject);
                    }


                    var gg = context.SaveChanges();
                }
            }
        }
        private static void saveCrawlInDatabase(crawlingResult crawledResult)
        {
            if (crawledResult.blcrawlSuccess == false)
            {
                return;
            }

            using (var context = new DBCrawling())
            {
                tblMainUrl crawledUrl = new tblMainUrl();

                crawledUrl.UrlHash = crawledResult.srCrawledUrl.normalizeUrl().ComputeSha256Hash();

                var vrResult = context.tblMainUrls.SingleOrDefault(b => b.UrlHash == crawledUrl.UrlHash);

                if (vrResult == null)
                {
                    context.tblMainUrls.Add(crawledUrl);
                }
                else
                {
                    crawledUrl = vrResult;
                    context.tblMainUrls.Attach(crawledUrl);
                    context.Entry(crawledUrl).State = EntityState.Modified;
                }

                crawledUrl.DiscoverDate       = crawledResult.dtDiscoverDate;
                crawledUrl.FetchTimeMS        = crawledResult.irCrawlingTimeMS;
                crawledUrl.LastCrawlingDate   = crawledResult.dtLastCrawlingDate;
                crawledUrl.LinkDepthLevel     = crawledResult.irLinkDepthLevel;
                crawledUrl.PageTile           = crawledResult.srTitleofPage;
                crawledUrl.ParentUrlHash      = crawledResult.srParentUrlHash.normalizeUrl().ComputeSha256Hash();
                crawledUrl.SourceCode         = crawledResult.srCrawledSourceCode.CompressString();
                crawledUrl.CompressionPercent = Convert.ToByte(
                    Math.Floor(
                        ((crawledUrl.SourceCode.Length.ToDouble() / crawledResult.srCrawledSourceCode.Length.ToDouble()) * 100))
                    );
                crawledUrl.Url = crawledResult.srCrawledUrl;



                var gg = context.SaveChanges();
            }
        }
Esempio n. 3
0
 private static tblMainUrl converToBaseMainUrlClass(this tblMainUrl finalObject)
 {
     return(JsonConvert.DeserializeObject <tblMainUrl>(JsonConvert.SerializeObject(finalObject)));
 }