Beispiel #1
0
 /// <summary>
 /// Creates an instance of ResourceServer <see cref="ResourceServer"/> and uses it 
 /// to get a list of Resources and catalogs their contents
 /// </summary>
 /// <remarks>
 ///This is the MAIN method of the indexing system.
 /// </remarks>
 public Catalog BuildCatalog()
 {
     _Catalog = new Catalog();
     ProgressEvent(this, new ProgressEventArgs(1, "Spider.BuildCatalog() starting."));
     // Setup Stop, Go, Stemming
     SetPreferences();
     ResourceServer server = new ResourceServer();
     foreach (string path in server.PathCollection)
     {
         ProcessPath(path);
     }
     // Now we've FINISHED Spidering
     ProgressEvent(this, new ProgressEventArgs(1, "Spider.BuildCatalog() complete."));
     if (_Catalog.Length > 0)
     {
         ProgressEvent(this, new ProgressEventArgs(2, "Serializing to disk location " + Preferences.CatalogFileName));
         // Serialization of the Catalog, so we can load it again if the server Application is restarted
         _Catalog.Save();
         ProgressEvent(this, new ProgressEventArgs(3, "Save to disk " + Preferences.CatalogFileName + " successful"));
     }
     else
     {
         ProgressEvent(this, new ProgressEventArgs(3, "Not serializing/saving: Empty catalog!"));
     }
     return _Catalog;// finished, return to the calling code to 'use'
 }
Beispiel #2
0
        /// <summary>
        /// This page uses the Spider class to read and catalog a website
        /// </summary>
        protected void Page_Load(object sender, EventArgs e)
        {
            Response.Write(
                          @"<html>
                            <head>
                                <style type='text/css'>
                                    BODY { color: #000000; background-color: white; font-family: trebuchet ms, verdana, arial, sans-serif; font-size:x-small; margin-left: 0px; margin-top: 0px; }
                                </style>
                                <title>Crawling the library...</title>
                            </head>
                            <body>
                            <h3><p><font color='red'>S</font><font color='blue'>h</font><font color='green'>e</font><font color='orange'>l</font><font color='navy'>f</font>.<font color='maroon'>Search</font> <font color='#990000'><sup>Beta</sup></font></p></h3>
                            <h2>Catalog file not found! Building Catalog now ..</h2><p>"
                          );
            // Build the catalog!
            Spider spidey = new Spider();
            spidey.SpiderProgressEvent += new SpiderProgressEventHandler(OnProgressEvent);
            _Catalog = spidey.BuildCatalog();
            Cache[Preferences.CatalogCacheKey] = _Catalog;

            // Check if anything was found
            if (_Catalog.Length > 0)
            {
                Response.Write("<br />Finished - now you can search!</p>");
                Logger.PerformanceLog(this, "Built new Catalog successfully!");
                Response.Write("<center><a href='Search.aspx'><h3>Start Searching Now !</a></h3></center>");
                Response.Write("</body></html>");

            }
            else
            {
                Response.Write("</p><br /><p font='color:red'>Sorry, nothing was cataloged." +
                    " Administrator will check if there are resources to catalog," +
                    " and the logs to see if any error has occured. Sorry for the " +
                    "inconvenience, please check back here later.</p>");
                Response.Write("</body></html>");
                Response.End();
            }
        }
Beispiel #3
0
        /// <summary>
        /// Method called from UI
        /// </summary>
        /// <param name="searchterm">search query</param>
        /// <param name="catalog">catalog to search</param>
        /// <returns>ResultFile SortedList for display</returns>
        public SortedList GetResults(string searchterm, Catalog catalog)
        {
            SortedList output = new SortedList();

            // ----------------------- DOING A SEARCH -----------------------
            if ((null != searchterm) && (null != catalog))
            {
                SetPreferences();

                string[] searchTermArray = null, searchTermDisplay = null;

                /****** Too *********/
                Regex r = new Regex(@"\s+");             // matches continuous whitespace
                searchterm = r.Replace(searchterm, " "); // replaces 'em with a single space
                searchTermArray = searchterm.Split(' '); // then split
                searchTermDisplay = (string[])searchTermArray.Clone();
                for (int i = 0; i < searchTermArray.Length; i++)
                {
                    if (_GoChecker.IsGoWord(searchTermArray[i]))
                    {	// was a Go word, just Lower it
                        searchTermArray[i] = searchTermArray[i].ToLower();
                    }
                    else
                    {	// Not a Go word, apply stemming
                        searchTermArray[i] = searchTermArray[i].Trim(' ', '?', '\"', ',', '\'', ';', ':', '.', '(', ')').ToLower();
                        searchTermArray[i] = _Stemmer.StemWord(searchTermArray[i].ToString());
                    }
                }

                if (searchterm == String.Empty)
                {
                    // After trimming the search term, it was found to be empty!
                    return output;
                }
                else
                {	// we have a search term!
                    DateTime start = DateTime.Now;  // to show 'time taken' to perform search

                    // Array of arrays of results that match ONE of the search criteria
                    Hashtable[] searchResultsArrayArray = new Hashtable[searchTermArray.Length];
                    // finalResultsArray is populated with pages that *match* ALL the search criteria
                    HybridDictionary finalResultsArray = new HybridDictionary();

                    bool botherToFindMatches = true;
                    int indexOfShortestResultSet = -1, lengthOfShortestResultSet = -1;

                    for (int i = 0; i < searchTermArray.Length; i++)
                    {	// ##### THE SEARCH #####
                        searchResultsArrayArray[i] = catalog.Search(searchTermArray[i].ToString());
                        if (null == searchResultsArrayArray[i])
                        {
                            _Matches += searchTermDisplay[i] + " <font color='gray' style='font-size:xx-small'>(not found)</font> ";
                            botherToFindMatches = false; // if *any one* of the terms isn't found, there won't be a 'set' of Matches
                        }
                        else
                        {
                            int resultsInThisSet = searchResultsArrayArray[i].Count;
                            _Matches += "<a href=\"?" + Preferences.QuerystringParameterName + "=" + searchTermDisplay[i] + "\" title=\"" + searchTermArray[i] + "\">"
                                    + searchTermDisplay[i]
                                    + "</a> <font color=gray style='font-size:xx-small'>(" + resultsInThisSet + ")</font> ";
                            if ((lengthOfShortestResultSet == -1) || (lengthOfShortestResultSet > resultsInThisSet))
                            {
                                indexOfShortestResultSet = i;
                                lengthOfShortestResultSet = resultsInThisSet;
                            }
                        }
                    }

                    // Find the common files from the array of arrays of documents
                    // matching ONE of the criteria
                    if (botherToFindMatches)                                            // all words have *some* matches
                    {																	// for each result set [NOT required, but maybe later if we do AND/OR searches)
                        int c = indexOfShortestResultSet;                               // loop through the *shortest* resultset
                        Hashtable searchResultsArray = searchResultsArrayArray[c];

                        foreach (object foundInFile in searchResultsArray)             // for each file in the *shortest* result set
                        {
                            DictionaryEntry fo = (DictionaryEntry)foundInFile;          // find matching files in the other resultsets

                            int matchcount = 0, totalcount = 0, weight = 0;

                            for (int cx = 0; cx < searchResultsArrayArray.Length; cx++)
                            {
                                totalcount += (cx + 1);                                // keep track, so we can compare at the end (if term is in ALL resultsets)
                                if (cx == c)                                      // current resultset
                                {
                                    matchcount += (cx + 1);                          // implicitly matches in the current resultset
                                    weight += (int)fo.Value;                       // sum the weighting
                                }
                                else
                                {
                                    Hashtable searchResultsArrayx = searchResultsArrayArray[cx];
                                    if (null != searchResultsArrayx)
                                    {
                                        foreach (object foundInFilex in searchResultsArrayx)
                                        {   // for each file in the result set
                                            DictionaryEntry fox = (DictionaryEntry)foundInFilex;
                                            if (fo.Key == fox.Key)
                                            {
                                                matchcount += (cx + 1);               // and if it matches, track the matchcount
                                                weight += (int)fox.Value;           // and weighting; then break out of loop, since
                                                break;                              // no need to keep looking through this resultset
                                            }
                                        } // foreach
                                    } // if
                                } // else
                            } // for
                            if ((matchcount > 0) && (matchcount == totalcount))		// was matched in each Array
                            {   // we build the finalResults here, to pass to the formatting code below
                                // - we could do the formatting here, but it would mix up the 'result generation'
                                // and display code too much
                                fo.Value = weight; // set the 'weight' in the combined results to the sum of individual document matches
                                if (!finalResultsArray.Contains(fo.Key)) finalResultsArray.Add(fo.Key, fo);
                            } // if
                        } // foreach
                    }

                    // Time taken calculation
                    Int64 ticks = DateTime.Now.Ticks - start.Ticks;
                    TimeSpan taken = new TimeSpan(ticks);
                    if (taken.Seconds > 0)
                    {
                        _DisplayTime = taken.Seconds + " seconds";
                    }
                    else if (taken.TotalMilliseconds > 0)
                    {
                        _DisplayTime = Convert.ToInt32(taken.TotalMilliseconds) + " milliseconds";
                    }
                    else
                    {
                        _DisplayTime = "less than 1 millisecond";
                    }

                    // Format the results
                    if (finalResultsArray.Count > 0)
                    {	// intermediate data-structure for 'ranked' result HTML
                        //SortedList
                        output = new SortedList(finalResultsArray.Count); // empty sorted list
                        ResultFile infile;
                        int sortrank = 0;

                        // build each result row
                        foreach (object foundInFile in finalResultsArray.Keys)
                        {
                            // Create a ResultFile with it's own Rank
                            infile = new ResultFile((File)foundInFile);

                            infile.Rank = (int)((DictionaryEntry)finalResultsArray[foundInFile]).Value;
                            sortrank = infile.Rank * -1000;		// Assume not 'thousands' of results
                            if (output.Contains(sortrank))
                            { // rank exists - drop key index one number until it fits
                                for (int i = 1; i < 999; i++)
                                {
                                    sortrank++;
                                    if (!output.Contains(sortrank))
                                    {
                                        output.Add(sortrank, infile);
                                        break;
                                    }
                                }
                            }
                            else
                            {
                                output.Add(sortrank, infile);
                            }
                            sortrank = 0;	// reset for next pass
                        }
                        // Jim Harkins [paged results]
                        // http://aspnet.4guysfromrolla.com/articles/081804-1.aspx
                    } // else Count == 0, so output SortedList will be empty
                }
            }
            return output;
        }
Beispiel #4
0
        protected void Page_Load()
        {
            bool getCatalog = false;
            try
            {   // see if there is a catalog object in the cache
                _Catalog = (Catalog)Cache[Preferences.CatalogCacheKey];
                _WordCount = _Catalog.Length; // if so, get the _WordCount
            }
            catch (Exception ex)
            {
                // If not, we'll need to load_from_file or build the catalog again.
                Logger.PerformanceLog(this, "Catalog object unavailable : Loadind from file!" + ex.ToString());
                _Catalog = null; // in case
            }

            if (null == _Catalog)
                getCatalog = true;
            else if (_Catalog.Length == 0)
                getCatalog = true;

            if (getCatalog)
            {
                // No catalog 'in memory', so let's look for one
                // First, for a serialized version on disk
                _Catalog = Catalog.Load();	// returns null if not found

                // Still no Catalog, so we have to start building a new one
                if (null == _Catalog)
                {
                    Logger.PerformanceLog(this, "Catalog object unavailable & serialized file missing : Building new Catalog!");
                    Response.Redirect("Crawling.aspx", true);
                }
                else
                {	// Yep, there was a serialized catalog file
                    // Don't forget to add to cache for next time (the Spider does this too)
                    Cache[Preferences.CatalogCacheKey] = _Catalog;
                    _WordCount = _Catalog.Length; // if so, get the _WordCount
                    Logger.PerformanceLog(this, "Deserialized catalog and put in Cache");
                }
            }

            ucSearchPanelHeader.WordCount = _WordCount;
            ucSearchPanelFooter.WordCount = _WordCount;

            if (this.SearchQuery == "")
            {
                ucSearchPanelFooter.Visible = false;
                ucSearchPanelFooter.IsFooter = true;
                ucSearchPanelHeader.IsSearchResultsPage = false;
            }
            else
            {
                SearchEngine se = new SearchEngine();
                SortedList output = GetSearchResults(se); // which'll do se.GetResults(this.SearchQuery, _Catalog);

                _NumberOfMatches = output.Count.ToString();
                if (output.Count > 0)
                {
                    _PagedResults.DataSource = output.GetValueList();
                    _PagedResults.AllowPaging = true;
                    _PagedResults.PageSize = MaxResultsPerPage; //;Preferences.ResultsPerPage; //10;
                    _PagedResults.CurrentPageIndex = Request.QueryString["page"] == null ? 0 : Convert.ToInt32(Request.QueryString["page"]) - 1;

                    _Matches = se.SearchQueryMatchHtml;
                    _DisplayTime = se.DisplayTime;

                    SearchResults.DataSource = _PagedResults;
                    SearchResults.DataBind();
                }
                else
                {
                    lblNoSearchResults.Visible = true;
                }
                // Set the display info in the top & bottom user controls
                ucSearchPanelHeader.Word = ucSearchPanelFooter.Word = SearchQuery;
                ucSearchPanelFooter.Visible = true;
                ucSearchPanelFooter.IsFooter = true;
                ucSearchPanelHeader.IsSearchResultsPage = true;
            }

            if (!string.IsNullOrEmpty(Request.QueryString["semantics"]) && (Request.QueryString["semantics"].Equals("true",StringComparison.InvariantCultureIgnoreCase)))
            {
                SemanticsPanel.Visible = true;
                SInformation.Text = SInfo.SemanticsHtml;
            }
        }