/// <summary> /// Takes a single Uri (Url) and returns the catalog that is generated /// by following all the links from that point. /// </summary> /// <remarks> ///This is the MAIN method of the indexing system. /// </remarks> public Catalog BuildCatalog(Uri startPageUri) { _Catalog = new Catalog(); _Catalog.clear(); // _Catalog = Catalog.c // Setup Stop, Go, Stemming SetPreferences(); _Robot = new RobotsTxt(startPageUri, Preferences.RobotUserAgent); // GETS THE FIRST DOCUMENT, AND STARTS THE SPIDER! -- create the 'root' document to start the search // HtmlDocument htmldoc = new HtmlDocument(startPageUri); // RECURSIVE CALL TO 'Process()' STARTS HERE ProcessUri(startPageUri, 0); // Now we've FINISHED Spidering ProgressEvent(this, new ProgressEventArgs(1, "Spider.Catalog() complete.")); ProgressEvent(this, new ProgressEventArgs(2, "Serializing to disk location " + Preferences.CatalogFileName)); // Serialization of the Catalog, so we can load it again if the server Application is restarted _Catalog.Save(); ProgressEvent(this, new ProgressEventArgs(3, "Save to disk " + Preferences.CatalogFileName + " successful")); return(_Catalog);// finished, return to the calling code to 'use' }
/// <summary> /// [v6] /// </summary> /// <param name="startPageUri">array of start pages</param> /// <returns>Catalog of words/documents</returns> public Catalog BuildCatalog(Uri[] startPageUris) { _Catalog = new Catalog(); //_Cache = new Cache(); // [v7] ProgressEvent(this, new ProgressEventArgs(1, "Spider.Catalog (Uri Array) count: " + startPageUris.Length.ToString())); // Setup Stop, Go, Stemming SetPreferences(); foreach (Uri startPageUri in startPageUris) { _CurrentStartUri = startPageUri; // to compare against fully qualified links _CurrentStartUriString = _CurrentStartUri.AbsoluteUri.ToString().ToLower(); ProgressEvent(this, new ProgressEventArgs(1, "Spider.Catalog (start Uri) " + startPageUri.AbsoluteUri)); _Robot = new RobotsTxt(startPageUri, Preferences.RobotUserAgent); // GETS THE FIRST DOCUMENT, AND STARTS THE SPIDER! -- create the 'root' document to start the search // HtmlDocument htmldoc = new HtmlDocument(startPageUri); // RECURSIVE CALL TO 'Process()' STARTS HERE ProcessUri(startPageUri, 0); ProgressEvent(this, new ProgressEventArgs(1, "Spider.Catalog (end Uri) " + startPageUri.AbsoluteUri)); } // Now we've FINISHED Spidering ProgressEvent(this, new ProgressEventArgs(1, "Spider.Catalog() complete.")); ProgressEvent(this, new ProgressEventArgs(2, "Serializing to disk location " + Preferences.CatalogFileName)); // Serialization of the Catalog, so we can load it again if the server Application is restarted _Catalog.Save(); //_Cache.Save(); //[v7] ProgressEvent(this, new ProgressEventArgs(3, "Save to disk " + Preferences.CatalogFileName + " successful")); return(_Catalog);// finished, return to the calling code to 'use' }
/// <summary> /// [v6] /// </summary> /// <param name="startPageUri">array of start pages</param> /// <returns>Catalog of words/documents</returns> public Catalog BuildCatalog(Uri[] startPageUris) { _Catalog = new Catalog(); //_Cache = new Cache(); // [v7] ProgressEvent(this, new ProgressEventArgs(1, "Spider.Catalog (Uri Array) count: " + startPageUris.Length.ToString())); // Setup Stop, Go, Stemming SetPreferences(); foreach (Uri startPageUri in startPageUris) { _CurrentStartUri = startPageUri; // to compare against fully qualified links _CurrentStartUriString = _CurrentStartUri.AbsoluteUri.ToString().ToLower(); ProgressEvent(this, new ProgressEventArgs(1, "Spider.Catalog (start Uri) " + startPageUri.AbsoluteUri)); _Robot = new RobotsTxt(startPageUri, Preferences.RobotUserAgent); // GETS THE FIRST DOCUMENT, AND STARTS THE SPIDER! -- create the 'root' document to start the search // HtmlDocument htmldoc = new HtmlDocument(startPageUri); // RECURSIVE CALL TO 'Process()' STARTS HERE ProcessUri(startPageUri, 0); ProgressEvent(this, new ProgressEventArgs(1, "Spider.Catalog (end Uri) " + startPageUri.AbsoluteUri)); } // Now we've FINISHED Spidering ProgressEvent(this, new ProgressEventArgs(1, "Spider.Catalog() complete.")); ProgressEvent(this, new ProgressEventArgs(2, "Serializing to disk location " + Preferences.CatalogFileName)); // Serialization of the Catalog, so we can load it again if the server Application is restarted _Catalog.Save(); //_Cache.Save(); //[v7] ProgressEvent(this, new ProgressEventArgs(3, "Save to disk " + Preferences.CatalogFileName + " successful")); return _Catalog;// finished, return to the calling code to 'use' }