/// <summary> /// Sets up the crawl with all the parameters it needs to continue until finished. /// </summary> /// <param name="urlToStart">The valid url to start crawling from</param> /// <param name="maxAttempts">Number of times to try failed pages (not implemented)</param> /// <param name="secondsDelay">Number of seconds to wait between page loads</param> /// <param name="steps">Number of steps away from the urlToStart to transverse before stopping</param> /// <param name="databaseFileName">Name of the file to store data in</param> public void Seed(string urlToStart, int maxAttempts, int secondsDelay, int steps, string databaseFileName) { try { _seedUri = new Uri(urlToStart); } catch (Exception ex) { throw new ArgumentException("Invalid URI supplied as seed", ex); } if (!_seedUri.IsWellFormedOriginalString() || !_seedUri.IsAbsoluteUri) { throw new ArgumentException("Invliad URI supplied as seed"); } var seedNode = new WebNode { NodeUri = new Uri(urlToStart) }; WebNodes.Add(seedNode); _maxCrawlAttempts = maxAttempts; _secondsDelay = secondsDelay; _maxSteps = steps; var dbSetup = new Database(); dbSetup.ConnectToDatabase(databaseFileName); _snapShot = new SnapShot(databaseFileName); _snapShotId = _snapShot.InsertSnapShot(urlToStart, secondsDelay, steps); }
private static HtmlDocument GetDocument(WebNode node) { var htmlWeb = new HtmlWeb(); var doc = htmlWeb.Load(node.NodeUri.AbsoluteUri); return(doc); }
/// <summary> /// Get the text from the document. It will be stored as word separated text with punctuation still in place /// </summary> /// <param name="doc">The document to pull words from</param> /// <param name="node">The WebNode that this document is in reference to</param> /// <returns>Word separated text with punctuation still in place</returns> private string GetText(HtmlDocument doc, WebNode node) { var text = new StringBuilder(); var textNodes = doc.DocumentNode.SelectNodes("//text()"); foreach (HtmlNode textNode in textNodes) { var textBlock = textNode.InnerText.Trim(); if (textBlock == "") { continue; // skip empty text blocks } text.Append(textBlock + " "); } node.IsParsed = true; var cleanText = CleanText(text.ToString()); return(cleanText); }
private BaseNode DoSPWeb(Web web, BaseNode parentNode, BaseNode rootNode) { BaseNode myNode = null; try { myNode = new WebNode(web); parentNode.Children.Add(myNode); myNode.ParentNode = parentNode; myNode.RootNode = rootNode; myNode.NodeConnector = this; myNode.LoadedData = true; web.Context.Load(web.Webs); web.Context.Load(web.Lists); web.Context.ExecuteQuery(); try { foreach (Web childWeb in web.Webs) { //doSPWeb(childWeb, myNode, rootNode); //Draw the nodes - user will expand them later if necessary BaseNode childNode = new WebNode(childWeb); myNode.Children.Add(childNode); childNode.ParentNode = parentNode; childNode.RootNode = rootNode; childNode.NodeConnector = this; } } catch (Exception ex) { SPCoderLogging.Logger.Error($"Error expanding Web: {ex.Message}"); return(myNode); } // Add Content Type Container node BaseNode contentTypeContainerNode = new ContentTypeContainerNode(web.ContentTypes); myNode.Children.Add(contentTypeContainerNode); contentTypeContainerNode.ParentNode = myNode; contentTypeContainerNode.RootNode = rootNode; contentTypeContainerNode.NodeConnector = this; // Add Field Container node BaseNode fieldContainerNode = new FieldContainerNode(web.Fields); myNode.Children.Add(fieldContainerNode); fieldContainerNode.ParentNode = myNode; fieldContainerNode.RootNode = rootNode; fieldContainerNode.NodeConnector = this; foreach (Microsoft.SharePoint.Client.List list in web.Lists) { BaseNode myListNode = new ListNode(list); myNode.Children.Add(myListNode); myListNode.ParentNode = myNode; myListNode.RootNode = rootNode; myListNode.NodeConnector = this; } return(myNode); } catch (Exception ex) { SPCoderLogging.Logger.Error($"Error expanding Web: {ex.Message}"); return(myNode); } }
internal MainPane(App app) { Data = app; TreeView.AdviseContextMenu(TreeView_ContextMenu); TreeView.AdviseDrag(delegate(HtmlMouseEvent e) { TreeViewHit hit = TreeView.Hit(e); if (hit != null) { object node = hit.Nodes.List[hit.Index]; FolderNode folder = node as FolderNode; if (folder != null) { return(new ListItemsDragSource(new ListItem[] { folder.Item })); } WebNode web = node as WebNode; if (web != null) { return(new WebNodeDragSource(web)); } } return(null); }); TreeView.AdviseDrop(delegate(object dataObject) { IList <ListItem> listItems = dataObject as IList <ListItem>; if (listItems != null) { return(new ListItemsNodeDropTarget(TreeView, listItems)); } WebNode web = dataObject as WebNode; return(web != null ? new WebNodeDragTarget(TreeView, web) : null); }); if (SupportedHtml5Features.File) { PostAdvise( delegate() { return(Element.AdviseDrop(null, null)); }); TreeViewFileDropTarget fileDropTarget = new TreeViewFileDropTarget(TreeView); } TreeView.AdviseKeyDown( delegate(HtmlKeyboardEvent e) { if (e.KeyCode == KeyCode.Delete) { FolderNode folder = TreeView.Selection.Data as FolderNode; if (folder != null) { NodeUtil.DeleteFolder(folder); } } }); }