private static void saveCrawlInDatabase(crawlingResult crawledResult) { lock (_lockDatabaseAdd) { using (var context = new DBCrawling()) { crawledResult.UrlHash = crawledResult.Url.ComputeHashOfOurSystem(); crawledResult.HostUrl = crawledResult.Url.returnRootUrl(); var vrResult = context.tblMainUrls.SingleOrDefault(b => b.UrlHash == crawledResult.UrlHash); crawledResult.ParentUrlHash = crawledResult.ParentUrlHash.ComputeHashOfOurSystem(); if (crawledResult.blcrawlSuccess == true) { crawledResult.IsCrawled = true; if (!string.IsNullOrEmpty(crawledResult.SourceCode)) { double dblOriginalSourceCodeLenght = crawledResult.SourceCode.Length; crawledResult.SourceCode = crawledResult.SourceCode.CompressString(); crawledResult.CompressionPercent = Convert.ToByte( Math.Floor( ((crawledResult.SourceCode.Length.ToDouble() / dblOriginalSourceCodeLenght) * 100)) ); } crawledResult.CrawlTryCounter = 0; } tblMainUrl finalObject = crawledResult.converToBaseMainUrlClass(); //this approach brings extra overhead to the server with deleting from server first //therefore will use copy properties of object to another object without changing reference //if (vrResult != null) //{ // context.tblMainUrls.Remove(vrResult); // context.SaveChanges(); //} if (vrResult != null) { finalObject.DiscoverDate = vrResult.DiscoverDate; finalObject.LinkDepthLevel = vrResult.LinkDepthLevel; finalObject.CrawlTryCounter = vrResult.CrawlTryCounter; if (crawledResult.blcrawlSuccess == false) { finalObject.CrawlTryCounter++; } finalObject.CopyProperties(vrResult); } else { context.tblMainUrls.Add(finalObject); } var gg = context.SaveChanges(); } } }
private static void saveCrawlInDatabase(crawlingResult crawledResult) { if (crawledResult.blcrawlSuccess == false) { return; } using (var context = new DBCrawling()) { tblMainUrl crawledUrl = new tblMainUrl(); crawledUrl.UrlHash = crawledResult.srCrawledUrl.normalizeUrl().ComputeSha256Hash(); var vrResult = context.tblMainUrls.SingleOrDefault(b => b.UrlHash == crawledUrl.UrlHash); if (vrResult == null) { context.tblMainUrls.Add(crawledUrl); } else { crawledUrl = vrResult; context.tblMainUrls.Attach(crawledUrl); context.Entry(crawledUrl).State = EntityState.Modified; } crawledUrl.DiscoverDate = crawledResult.dtDiscoverDate; crawledUrl.FetchTimeMS = crawledResult.irCrawlingTimeMS; crawledUrl.LastCrawlingDate = crawledResult.dtLastCrawlingDate; crawledUrl.LinkDepthLevel = crawledResult.irLinkDepthLevel; crawledUrl.PageTile = crawledResult.srTitleofPage; crawledUrl.ParentUrlHash = crawledResult.srParentUrlHash.normalizeUrl().ComputeSha256Hash(); crawledUrl.SourceCode = crawledResult.srCrawledSourceCode.CompressString(); crawledUrl.CompressionPercent = Convert.ToByte( Math.Floor( ((crawledUrl.SourceCode.Length.ToDouble() / crawledResult.srCrawledSourceCode.Length.ToDouble()) * 100)) ); crawledUrl.Url = crawledResult.srCrawledUrl; var gg = context.SaveChanges(); } }
private static tblMainUrl converToBaseMainUrlClass(this tblMainUrl finalObject) { return(JsonConvert.DeserializeObject <tblMainUrl>(JsonConvert.SerializeObject(finalObject))); }