private void btnTest_Click(object sender, RoutedEventArgs e) { using (DBCrawling db = new DBCrawling()) { db.tblMainUrls.RemoveRange(db.tblMainUrls); db.SaveChanges(); db.tblMainUrls.Add(new tblMainUrl { Url = "www.toros.edu.tr", ParentUrlHash = "www.toros.edu.tr", SourceCode = "gg", UrlHash = "ww" }); db.SaveChanges(); } }
private static void saveCrawlInDatabase(crawlingResult crawledResult) { lock (_lockDatabaseAdd) { using (var context = new DBCrawling()) { crawledResult.UrlHash = crawledResult.Url.ComputeHashOfOurSystem(); crawledResult.HostUrl = crawledResult.Url.returnRootUrl(); var vrResult = context.tblMainUrls.SingleOrDefault(b => b.UrlHash == crawledResult.UrlHash); crawledResult.ParentUrlHash = crawledResult.ParentUrlHash.ComputeHashOfOurSystem(); if (crawledResult.blcrawlSuccess == true) { crawledResult.IsCrawled = true; if (!string.IsNullOrEmpty(crawledResult.SourceCode)) { double dblOriginalSourceCodeLenght = crawledResult.SourceCode.Length; crawledResult.SourceCode = crawledResult.SourceCode.CompressString(); crawledResult.CompressionPercent = Convert.ToByte( Math.Floor( ((crawledResult.SourceCode.Length.ToDouble() / dblOriginalSourceCodeLenght) * 100)) ); } crawledResult.CrawlTryCounter = 0; } tblMainUrl finalObject = crawledResult.converToBaseMainUrlClass(); //this approach brings extra overhead to the server with deleting from server first //therefore will use copy properties of object to another object without changing reference //if (vrResult != null) //{ // context.tblMainUrls.Remove(vrResult); // context.SaveChanges(); //} if (vrResult != null) { finalObject.DiscoverDate = vrResult.DiscoverDate; finalObject.LinkDepthLevel = vrResult.LinkDepthLevel; finalObject.CrawlTryCounter = vrResult.CrawlTryCounter; if (crawledResult.blcrawlSuccess == false) { finalObject.CrawlTryCounter++; } finalObject.CopyProperties(vrResult); } else { context.tblMainUrls.Add(finalObject); } var gg = context.SaveChanges(); } } }
private static void saveCrawlInDatabase(crawlingResult crawledResult) { if (crawledResult.blcrawlSuccess == false) { return; } using (var context = new DBCrawling()) { tblMainUrl crawledUrl = new tblMainUrl(); crawledUrl.UrlHash = crawledResult.srCrawledUrl.normalizeUrl().ComputeSha256Hash(); var vrResult = context.tblMainUrls.SingleOrDefault(b => b.UrlHash == crawledUrl.UrlHash); if (vrResult == null) { context.tblMainUrls.Add(crawledUrl); } else { crawledUrl = vrResult; context.tblMainUrls.Attach(crawledUrl); context.Entry(crawledUrl).State = EntityState.Modified; } crawledUrl.DiscoverDate = crawledResult.dtDiscoverDate; crawledUrl.FetchTimeMS = crawledResult.irCrawlingTimeMS; crawledUrl.LastCrawlingDate = crawledResult.dtLastCrawlingDate; crawledUrl.LinkDepthLevel = crawledResult.irLinkDepthLevel; crawledUrl.PageTile = crawledResult.srTitleofPage; crawledUrl.ParentUrlHash = crawledResult.srParentUrlHash.normalizeUrl().ComputeSha256Hash(); crawledUrl.SourceCode = crawledResult.srCrawledSourceCode.CompressString(); crawledUrl.CompressionPercent = Convert.ToByte( Math.Floor( ((crawledUrl.SourceCode.Length.ToDouble() / crawledResult.srCrawledSourceCode.Length.ToDouble()) * 100)) ); crawledUrl.Url = crawledResult.srCrawledUrl; var gg = context.SaveChanges(); } }
private static void saveDiscoveredLinksInDatabaseForFutureCrawling(crawlingResult crawlResult) { lock (_lockDatabaseAdd) { using (var context = new DBCrawling()) { HashSet <string> hsProcessedUrls = new HashSet <string>(); foreach (var vrPerLink in crawlResult.lstDiscoveredLinks) { var vrHashedLink = vrPerLink.ComputeHashOfOurSystem(); if (hsProcessedUrls.Contains(vrHashedLink)) { continue; } var vrResult = context.tblMainUrls.Any(databaseRecord => databaseRecord.UrlHash == vrHashedLink); if (vrResult == false) { crawlingResult newLinkCrawlingResult = new crawlingResult(); newLinkCrawlingResult.Url = vrPerLink.normalizeUrl(); newLinkCrawlingResult.HostUrl = newLinkCrawlingResult.Url.returnRootUrl(); newLinkCrawlingResult.UrlHash = vrPerLink.ComputeHashOfOurSystem(); newLinkCrawlingResult.ParentUrlHash = crawlResult.UrlHash; newLinkCrawlingResult.LinkDepthLevel = (short)(crawlResult.LinkDepthLevel + 1); context.tblMainUrls.Add(newLinkCrawlingResult.converToBaseMainUrlClass()); hsProcessedUrls.Add(vrHashedLink); Interlocked.Increment(ref irDiscoveredUrlCount); } } context.SaveChanges(); } } }