partial void DeleteSessionScan(SessionScan instance);
partial void UpdateSessionScan(SessionScan instance);
partial void InsertSessionScan(SessionScan instance);
private void detach_SessionScans(SessionScan entity) { this.SendPropertyChanging(); entity.Session = null; }
private void attach_SessionScans(SessionScan entity) { this.SendPropertyChanging(); entity.Session = this; }
private static void bot_UriProcessingFinished(object sender, UriProcessingFinishedEventArgs e) { Robot robot = sender as Robot; if (robot != null) Console.WriteLine("Done: {0} Threads: {1} Processing: {2} To Go: {3}", robot.ProcessedCount, robot.ActiveThreadCount, robot.ProcessingCount, robot.NotProcessedCount); Console.ForegroundColor = ConsoleColor.Cyan; Console.WriteLine(e.Element.RequestedUri.ToString()); Console.ResetColor(); UriFoundCount++; if (e.Status >= 500) { string path = String.Format(@"c:\crawler\{0}\{1}.html", e.Element.BaseUri, e.ContentHash); if (!File.Exists(path)) { using (StreamWriter writer = File.CreateText(path)) { writer.Write(e.Content); } } } string title = null; string description = null; string keywords = null; string robots = null; string matchData = e.Content; Match titleMatch = TitleExpression.Match(matchData); if (titleMatch.Success) title = titleMatch.Groups["title"].Value.Trim(); MatchCollection metaMatches = MetaExpression.Matches(matchData); foreach (Match match in metaMatches) { if (match.Success) { if (String.Compare(match.Groups["name"].Value, "description", true) == 0) description = match.Groups["content"].Value.Trim(); else if (String.Compare(match.Groups["name"].Value, "keywords", true) == 0) keywords = match.Groups["content"].Value.Trim(); else if (String.Compare(match.Groups["name"].Value, "robots", true) == 0) robots = match.Groups["content"].Value.Trim(); } } try { using (CrawlerDataContext dc = new CrawlerDataContext()) { SessionScan scan = new SessionScan { SessionKey = SessionKey, UrlHash = e.Element.RequestedUri.ToString().ToHashString("SHA1"), ContentHash = e.ContentHash, ScanDate = DateTime.UtcNow, Host = e.Element.RequestedUri.Host, Base = e.Element.BaseUri.OriginalString, Found = e.Element.FoundUri.OriginalString, Url = e.Element.RequestedUri.OriginalString, Redirect = e.ResponseHeaders[HttpResponseHeader.Location], Method = e.Method, Status = e.Status, Title = title, Description = description, Keywords = keywords, Robots = ProcessRobots(robots, e).ToString(), ContentType = e.ResponseHeaders[HttpResponseHeader.ContentType], ContentEncoding = e.ResponseHeaders[HttpResponseHeader.ContentEncoding], ContentLength = TryConvertInt64(e.ResponseHeaders[HttpResponseHeader.ContentLength]), CacheControl = e.ResponseHeaders[HttpResponseHeader.CacheControl], Expires = e.ResponseHeaders[HttpResponseHeader.Expires] }; Dictionary<string, SessionScanRelation> relatedUrls = new Dictionary<string, SessionScanRelation>(e.Related.Length); // remove duplicates foreach (UriElement related in e.Related) { string relatedHash = related.RequestedUri.ToString().ToHashString("SHA1"); if (relatedUrls.ContainsKey(relatedHash)) relatedUrls[relatedHash].Count++; else relatedUrls.Add(relatedHash, new SessionScanRelation { SessionKey = SessionKey, UrlHash = e.Element.RequestedUri.ToString().ToHashString("SHA1"), RelatedHash = relatedHash, Related = related.RequestedUri.ToString(), Count = 1 }); } // add all the related urls to the scan scan.SessionScanRelations.AddRange(relatedUrls.Values); dc.SessionScans.InsertOnSubmit(scan); dc.SubmitChanges(); } } catch (Exception exc) { if (!Errors.Contains(exc.Message)) Errors.Add(exc.Message); Console.BackgroundColor = ConsoleColor.Red; Console.WriteLine(exc.Message); Console.ResetColor(); } }