コード例 #1
ファイル: Spider.cs プロジェクト: chandru9279/StarBase
 /// <summary>
 /// Creates an instance of ResourceServer <see cref="ResourceServer"/> and uses it 
 /// to get a list of Resources and catalogs their contents
 /// </summary>
 /// <remarks>
 ///This is the MAIN method of the indexing system.
 /// </remarks>
 public Catalog BuildCatalog()
     _Catalog = new Catalog();
     ProgressEvent(this, new ProgressEventArgs(1, "Spider.BuildCatalog() starting."));
     // Setup Stop, Go, Stemming
     ResourceServer server = new ResourceServer();
     foreach (string path in server.PathCollection)
     // Now we've FINISHED Spidering
     ProgressEvent(this, new ProgressEventArgs(1, "Spider.BuildCatalog() complete."));
     if (_Catalog.Length > 0)
         ProgressEvent(this, new ProgressEventArgs(2, "Serializing to disk location " + Preferences.CatalogFileName));
         // Serialization of the Catalog, so we can load it again if the server Application is restarted
         ProgressEvent(this, new ProgressEventArgs(3, "Save to disk " + Preferences.CatalogFileName + " successful"));
         ProgressEvent(this, new ProgressEventArgs(3, "Not serializing/saving: Empty catalog!"));
     return _Catalog;// finished, return to the calling code to 'use'
コード例 #2
ファイル: Crawling.aspx.cs プロジェクト: chandru9279/StarBase
        /// <summary>
        /// This page uses the Spider class to read and catalog a website
        /// </summary>
        protected void Page_Load(object sender, EventArgs e)
                                <style type='text/css'>
                                    BODY { color: #000000; background-color: white; font-family: trebuchet ms, verdana, arial, sans-serif; font-size:x-small; margin-left: 0px; margin-top: 0px; }
                                <title>Crawling the library...</title>
                            <h3><p><font color='red'>S</font><font color='blue'>h</font><font color='green'>e</font><font color='orange'>l</font><font color='navy'>f</font>.<font color='maroon'>Search</font> <font color='#990000'><sup>Beta</sup></font></p></h3>
                            <h2>Catalog file not found! Building Catalog now ..</h2><p>"
            // Build the catalog!
            Spider spidey = new Spider();
            spidey.SpiderProgressEvent += new SpiderProgressEventHandler(OnProgressEvent);
            _Catalog = spidey.BuildCatalog();
            Cache[Preferences.CatalogCacheKey] = _Catalog;

            // Check if anything was found
            if (_Catalog.Length > 0)
                Response.Write("<br />Finished - now you can search!</p>");
                Logger.PerformanceLog(this, "Built new Catalog successfully!");
                Response.Write("<center><a href='Search.aspx'><h3>Start Searching Now !</a></h3></center>");

                Response.Write("</p><br /><p font='color:red'>Sorry, nothing was cataloged." +
                    " Administrator will check if there are resources to catalog," +
                    " and the logs to see if any error has occured. Sorry for the " +
                    "inconvenience, please check back here later.</p>");
コード例 #3
ファイル: SearchEngine.cs プロジェクト: chandru9279/StarBase
        /// <summary>
        /// Method called from UI
        /// </summary>
        /// <param name="searchterm">search query</param>
        /// <param name="catalog">catalog to search</param>
        /// <returns>ResultFile SortedList for display</returns>
        public SortedList GetResults(string searchterm, Catalog catalog)
            SortedList output = new SortedList();

            // ----------------------- DOING A SEARCH -----------------------
            if ((null != searchterm) && (null != catalog))

                string[] searchTermArray = null, searchTermDisplay = null;

                /****** Too *********/
                Regex r = new Regex(@"\s+");             // matches continuous whitespace
                searchterm = r.Replace(searchterm, " "); // replaces 'em with a single space
                searchTermArray = searchterm.Split(' '); // then split
                searchTermDisplay = (string[])searchTermArray.Clone();
                for (int i = 0; i < searchTermArray.Length; i++)
                    if (_GoChecker.IsGoWord(searchTermArray[i]))
                    {	// was a Go word, just Lower it
                        searchTermArray[i] = searchTermArray[i].ToLower();
                    {	// Not a Go word, apply stemming
                        searchTermArray[i] = searchTermArray[i].Trim(' ', '?', '\"', ',', '\'', ';', ':', '.', '(', ')').ToLower();
                        searchTermArray[i] = _Stemmer.StemWord(searchTermArray[i].ToString());

                if (searchterm == String.Empty)
                    // After trimming the search term, it was found to be empty!
                    return output;
                {	// we have a search term!
                    DateTime start = DateTime.Now;  // to show 'time taken' to perform search

                    // Array of arrays of results that match ONE of the search criteria
                    Hashtable[] searchResultsArrayArray = new Hashtable[searchTermArray.Length];
                    // finalResultsArray is populated with pages that *match* ALL the search criteria
                    HybridDictionary finalResultsArray = new HybridDictionary();

                    bool botherToFindMatches = true;
                    int indexOfShortestResultSet = -1, lengthOfShortestResultSet = -1;

                    for (int i = 0; i < searchTermArray.Length; i++)
                    {	// ##### THE SEARCH #####
                        searchResultsArrayArray[i] = catalog.Search(searchTermArray[i].ToString());
                        if (null == searchResultsArrayArray[i])
                            _Matches += searchTermDisplay[i] + " <font color='gray' style='font-size:xx-small'>(not found)</font> ";
                            botherToFindMatches = false; // if *any one* of the terms isn't found, there won't be a 'set' of Matches
                            int resultsInThisSet = searchResultsArrayArray[i].Count;
                            _Matches += "<a href=\"?" + Preferences.QuerystringParameterName + "=" + searchTermDisplay[i] + "\" title=\"" + searchTermArray[i] + "\">"
                                    + searchTermDisplay[i]
                                    + "</a> <font color=gray style='font-size:xx-small'>(" + resultsInThisSet + ")</font> ";
                            if ((lengthOfShortestResultSet == -1) || (lengthOfShortestResultSet > resultsInThisSet))
                                indexOfShortestResultSet = i;
                                lengthOfShortestResultSet = resultsInThisSet;

                    // Find the common files from the array of arrays of documents
                    // matching ONE of the criteria
                    if (botherToFindMatches)                                            // all words have *some* matches
                    {																	// for each result set [NOT required, but maybe later if we do AND/OR searches)
                        int c = indexOfShortestResultSet;                               // loop through the *shortest* resultset
                        Hashtable searchResultsArray = searchResultsArrayArray[c];

                        foreach (object foundInFile in searchResultsArray)             // for each file in the *shortest* result set
                            DictionaryEntry fo = (DictionaryEntry)foundInFile;          // find matching files in the other resultsets

                            int matchcount = 0, totalcount = 0, weight = 0;

                            for (int cx = 0; cx < searchResultsArrayArray.Length; cx++)
                                totalcount += (cx + 1);                                // keep track, so we can compare at the end (if term is in ALL resultsets)
                                if (cx == c)                                      // current resultset
                                    matchcount += (cx + 1);                          // implicitly matches in the current resultset
                                    weight += (int)fo.Value;                       // sum the weighting
                                    Hashtable searchResultsArrayx = searchResultsArrayArray[cx];
                                    if (null != searchResultsArrayx)
                                        foreach (object foundInFilex in searchResultsArrayx)
                                        {   // for each file in the result set
                                            DictionaryEntry fox = (DictionaryEntry)foundInFilex;
                                            if (fo.Key == fox.Key)
                                                matchcount += (cx + 1);               // and if it matches, track the matchcount
                                                weight += (int)fox.Value;           // and weighting; then break out of loop, since
                                                break;                              // no need to keep looking through this resultset
                                        } // foreach
                                    } // if
                                } // else
                            } // for
                            if ((matchcount > 0) && (matchcount == totalcount))		// was matched in each Array
                            {   // we build the finalResults here, to pass to the formatting code below
                                // - we could do the formatting here, but it would mix up the 'result generation'
                                // and display code too much
                                fo.Value = weight; // set the 'weight' in the combined results to the sum of individual document matches
                                if (!finalResultsArray.Contains(fo.Key)) finalResultsArray.Add(fo.Key, fo);
                            } // if
                        } // foreach

                    // Time taken calculation
                    Int64 ticks = DateTime.Now.Ticks - start.Ticks;
                    TimeSpan taken = new TimeSpan(ticks);
                    if (taken.Seconds > 0)
                        _DisplayTime = taken.Seconds + " seconds";
                    else if (taken.TotalMilliseconds > 0)
                        _DisplayTime = Convert.ToInt32(taken.TotalMilliseconds) + " milliseconds";
                        _DisplayTime = "less than 1 millisecond";

                    // Format the results
                    if (finalResultsArray.Count > 0)
                    {	// intermediate data-structure for 'ranked' result HTML
                        output = new SortedList(finalResultsArray.Count); // empty sorted list
                        ResultFile infile;
                        int sortrank = 0;

                        // build each result row
                        foreach (object foundInFile in finalResultsArray.Keys)
                            // Create a ResultFile with it's own Rank
                            infile = new ResultFile((File)foundInFile);

                            infile.Rank = (int)((DictionaryEntry)finalResultsArray[foundInFile]).Value;
                            sortrank = infile.Rank * -1000;		// Assume not 'thousands' of results
                            if (output.Contains(sortrank))
                            { // rank exists - drop key index one number until it fits
                                for (int i = 1; i < 999; i++)
                                    if (!output.Contains(sortrank))
                                        output.Add(sortrank, infile);
                                output.Add(sortrank, infile);
                            sortrank = 0;	// reset for next pass
                        // Jim Harkins [paged results]
                        // http://aspnet.4guysfromrolla.com/articles/081804-1.aspx
                    } // else Count == 0, so output SortedList will be empty
            return output;
コード例 #4
ファイル: Search.aspx.cs プロジェクト: chandru9279/StarBase
        protected void Page_Load()
            bool getCatalog = false;
            {   // see if there is a catalog object in the cache
                _Catalog = (Catalog)Cache[Preferences.CatalogCacheKey];
                _WordCount = _Catalog.Length; // if so, get the _WordCount
            catch (Exception ex)
                // If not, we'll need to load_from_file or build the catalog again.
                Logger.PerformanceLog(this, "Catalog object unavailable : Loadind from file!" + ex.ToString());
                _Catalog = null; // in case

            if (null == _Catalog)
                getCatalog = true;
            else if (_Catalog.Length == 0)
                getCatalog = true;

            if (getCatalog)
                // No catalog 'in memory', so let's look for one
                // First, for a serialized version on disk
                _Catalog = Catalog.Load();	// returns null if not found

                // Still no Catalog, so we have to start building a new one
                if (null == _Catalog)
                    Logger.PerformanceLog(this, "Catalog object unavailable & serialized file missing : Building new Catalog!");
                    Response.Redirect("Crawling.aspx", true);
                {	// Yep, there was a serialized catalog file
                    // Don't forget to add to cache for next time (the Spider does this too)
                    Cache[Preferences.CatalogCacheKey] = _Catalog;
                    _WordCount = _Catalog.Length; // if so, get the _WordCount
                    Logger.PerformanceLog(this, "Deserialized catalog and put in Cache");

            ucSearchPanelHeader.WordCount = _WordCount;
            ucSearchPanelFooter.WordCount = _WordCount;

            if (this.SearchQuery == "")
                ucSearchPanelFooter.Visible = false;
                ucSearchPanelFooter.IsFooter = true;
                ucSearchPanelHeader.IsSearchResultsPage = false;
                SearchEngine se = new SearchEngine();
                SortedList output = GetSearchResults(se); // which'll do se.GetResults(this.SearchQuery, _Catalog);

                _NumberOfMatches = output.Count.ToString();
                if (output.Count > 0)
                    _PagedResults.DataSource = output.GetValueList();
                    _PagedResults.AllowPaging = true;
                    _PagedResults.PageSize = MaxResultsPerPage; //;Preferences.ResultsPerPage; //10;
                    _PagedResults.CurrentPageIndex = Request.QueryString["page"] == null ? 0 : Convert.ToInt32(Request.QueryString["page"]) - 1;

                    _Matches = se.SearchQueryMatchHtml;
                    _DisplayTime = se.DisplayTime;

                    SearchResults.DataSource = _PagedResults;
                    lblNoSearchResults.Visible = true;
                // Set the display info in the top & bottom user controls
                ucSearchPanelHeader.Word = ucSearchPanelFooter.Word = SearchQuery;
                ucSearchPanelFooter.Visible = true;
                ucSearchPanelFooter.IsFooter = true;
                ucSearchPanelHeader.IsSearchResultsPage = true;

            if (!string.IsNullOrEmpty(Request.QueryString["semantics"]) && (Request.QueryString["semantics"].Equals("true",StringComparison.InvariantCultureIgnoreCase)))
                SemanticsPanel.Visible = true;
                SInformation.Text = SInfo.SemanticsHtml;