Example #1
0
        protected override void DoStuff()
        {
            if (uri.AbsolutePath == "/robots.txt")
            {
                RobotsTxt();
                return;
            }
            if (uri.AbsolutePath == "/stats")
            {
                ShowStats();
                return;
            }

            string[] paths = uri.AbsolutePath.Split(new char[] { '/' }, 4);
            if (paths.Length != 4)
            {
                SendError(404, "Not found", "Not a recognized search path.");
                log.Warn("Unknown request " + uri.AbsolutePath);
                return;
            }
            what       = paths[1];
            dbname     = paths[2];
            searchterm = HttpUtility.UrlDecode(paths[3], Encoding.UTF8);

            log.InfoFormat("query:{0} what:{1} dbname:{2} term:{3}",
                           rawUri, what, dbname, searchterm);

            IDictionary query = new QueryStringMap(uri);

            state = SearchPool.ForWiki(dbname);

            contentType = "text/plain";
            if (what.Equals("titlematch"))
            {
                DoTitleMatches();
            }
            else if (what.Equals("titleprefix"))
            {
                DoTitlePrefix();
            }
            else if (what.Equals("search"))
            {
                int startAt = 0, endAt = 100;
                if (query.Contains("offset"))
                {
                    startAt = Math.Max(Int32.Parse((string)query["offset"]), 0);
                }
                if (query.Contains("limit"))
                {
                    endAt = Math.Min(Int32.Parse((string)query["limit"]), maxlines);
                }
                NamespaceFilter namespaces = new NamespaceFilter((string)query["namespaces"]);
                DoNormalSearch(startAt, endAt, namespaces);
            }
            else if (what.Equals("quit"))
            {
                // TEMP HACK for profiling
                System.Environment.Exit(0);
            }
            else if (what.Equals("raw"))
            {
                DoRawSearch();
            }
            else
            {
                SendError(404, "Not Found",
                          "Unrecognized search type. Try one of: " +
                          "titlematch, titleprefix, search, quit, raw.");
                log.Warn("Unknown request type [" + what + "]; ignoring.");
            }
        }
Example #2
0
        private void DoNormalSearch(int offset, int limit, NamespaceFilter namespaces)
        {
            string encsearchterm = String.Format("title:({0})^4 OR ({1})", searchterm, searchterm);

            DateTime now = DateTime.UtcNow;
            Query    query;

            /* If we fail to parse the query, it's probably due to illegal
             * use of metacharacters, so we escape them all and try again.
             */
            try {
                query = state.Parse(encsearchterm);
            } catch (Exception e) {
                string escaped = "";
                for (int i = 0; i < searchterm.Length; ++i)
                {
                    escaped += "\\" + searchterm[i];
                }
                encsearchterm = "title:(" + escaped + ")^4 OR (" + escaped + ")";
                try {
                    query = state.Parse(encsearchterm);
                } catch (Exception e2) {
                    log.Error("Problem parsing search term: " + e2.Message + "\n" + e2.StackTrace);
                    return;
                }
            }
            Hits hits = null;

            try {
                hits = state.Searcher.Search(query);
            } catch (Exception e) {
                log.Error("Error searching: " + e.Message + "\n" + e.StackTrace);
                return;
            }

            SendHeaders(200, "OK");

            int numhits = hits.Length();

            LogRequest(searchterm, query, numhits, now);

            SendOutputLine(numhits.ToString());

            if (numhits == 0)
            {
                string spelfix = MakeSpelFix(searchterm);
                SendOutputLine(HttpUtility.UrlEncode(spelfix, Encoding.UTF8));
            }
            else
            {
                // Lucene's filters seem to want to run over the entire
                // document set, which is really slow. We'll do namespace
                // checks as we go along, and stop once we've seen enough.
                //
                // The good side is that we can return the first N documents
                // pretty quickly. The bad side is that the total hits
                // number we return is bogus: it's for all namespaces combined.
                int matches = 0;
                //string lastMatch = "";
                for (int i = 0; i < numhits && i < maxoffset; i++)
                {
                    Document doc           = hits.Doc(i);
                    string   pageNamespace = doc.Get("namespace");
                    if (namespaces.filter(pageNamespace))
                    {
                        if (matches++ < offset)
                        {
                            continue;
                        }
                        string title = doc.Get("title");

                        /*
                         * string squish = pageNamespace+":"+title;
                         * if (lastMatch.Equals(squish)) {
                         *      // skip duplicate results due to indexing bugs
                         *      maxoffset++;
                         *      matches--;
                         *      continue;
                         * }
                         * lastMatch = squish;
                         */
                        float score = hits.Score(i);
                        SendResultLine(score, pageNamespace, title);
                        if (matches >= (limit + offset))
                        {
                            break;
                        }
                    }
                }
            }
        }