protected override void DoStuff() { if (uri.AbsolutePath == "/robots.txt") { RobotsTxt(); return; } if (uri.AbsolutePath == "/stats") { ShowStats(); return; } string[] paths = uri.AbsolutePath.Split(new char[] { '/' }, 4); if (paths.Length != 4) { SendError(404, "Not found", "Not a recognized search path."); log.Warn("Unknown request " + uri.AbsolutePath); return; } what = paths[1]; dbname = paths[2]; searchterm = HttpUtility.UrlDecode(paths[3], Encoding.UTF8); log.InfoFormat("query:{0} what:{1} dbname:{2} term:{3}", rawUri, what, dbname, searchterm); IDictionary query = new QueryStringMap(uri); state = SearchPool.ForWiki(dbname); contentType = "text/plain"; if (what.Equals("titlematch")) { DoTitleMatches(); } else if (what.Equals("titleprefix")) { DoTitlePrefix(); } else if (what.Equals("search")) { int startAt = 0, endAt = 100; if (query.Contains("offset")) { startAt = Math.Max(Int32.Parse((string)query["offset"]), 0); } if (query.Contains("limit")) { endAt = Math.Min(Int32.Parse((string)query["limit"]), maxlines); } NamespaceFilter namespaces = new NamespaceFilter((string)query["namespaces"]); DoNormalSearch(startAt, endAt, namespaces); } else if (what.Equals("quit")) { // TEMP HACK for profiling System.Environment.Exit(0); } else if (what.Equals("raw")) { DoRawSearch(); } else { SendError(404, "Not Found", "Unrecognized search type. Try one of: " + "titlematch, titleprefix, search, quit, raw."); log.Warn("Unknown request type [" + what + "]; ignoring."); } }
private void DoNormalSearch(int offset, int limit, NamespaceFilter namespaces) { string encsearchterm = String.Format("title:({0})^4 OR ({1})", searchterm, searchterm); DateTime now = DateTime.UtcNow; Query query; /* If we fail to parse the query, it's probably due to illegal * use of metacharacters, so we escape them all and try again. */ try { query = state.Parse(encsearchterm); } catch (Exception e) { string escaped = ""; for (int i = 0; i < searchterm.Length; ++i) { escaped += "\\" + searchterm[i]; } encsearchterm = "title:(" + escaped + ")^4 OR (" + escaped + ")"; try { query = state.Parse(encsearchterm); } catch (Exception e2) { log.Error("Problem parsing search term: " + e2.Message + "\n" + e2.StackTrace); return; } } Hits hits = null; try { hits = state.Searcher.Search(query); } catch (Exception e) { log.Error("Error searching: " + e.Message + "\n" + e.StackTrace); return; } SendHeaders(200, "OK"); int numhits = hits.Length(); LogRequest(searchterm, query, numhits, now); SendOutputLine(numhits.ToString()); if (numhits == 0) { string spelfix = MakeSpelFix(searchterm); SendOutputLine(HttpUtility.UrlEncode(spelfix, Encoding.UTF8)); } else { // Lucene's filters seem to want to run over the entire // document set, which is really slow. We'll do namespace // checks as we go along, and stop once we've seen enough. // // The good side is that we can return the first N documents // pretty quickly. The bad side is that the total hits // number we return is bogus: it's for all namespaces combined. int matches = 0; //string lastMatch = ""; for (int i = 0; i < numhits && i < maxoffset; i++) { Document doc = hits.Doc(i); string pageNamespace = doc.Get("namespace"); if (namespaces.filter(pageNamespace)) { if (matches++ < offset) { continue; } string title = doc.Get("title"); /* * string squish = pageNamespace+":"+title; * if (lastMatch.Equals(squish)) { * // skip duplicate results due to indexing bugs * maxoffset++; * matches--; * continue; * } * lastMatch = squish; */ float score = hits.Score(i); SendResultLine(score, pageNamespace, title); if (matches >= (limit + offset)) { break; } } } } }