Example #1
0
        //public List<int> SearchIuses(string searchTerm)
        //{
        //    List<int> results = new List<int>();

        //    IndexSearcher searcher = new IndexSearcher(FSDirectory.GetDirectory(indexPath));
        //    QueryParser parser = new QueryParser("Rubro", analyzer);
        //    parser.SetEnablePositionIncrements(false);

        //    PhraseQuery q = new PhraseQuery();
        //    String[] words = searchTerm.Split(' ');

        //    foreach (string word in words)
        //    {
        //        q.Add(new Term("Rubro", word));
        //    }
        //    Console.WriteLine(q.ToString());
        //    //Query query = parser.Parse(searchTerm);
        //    Hits hitsFound = searcher.Search(q);

        //    TesisIndx sampleDataFileRow = null;

        //    for (int i = 0; i < hitsFound.Length(); i++)
        //    {
        //        sampleDataFileRow = new TesisIndx();
        //        Document doc = hitsFound.Doc(i);
        //        sampleDataFileRow.Ius = int.Parse(doc.Get("Ius"));
        //        sampleDataFileRow.Rubro = doc.Get("Rubro");
        //        sampleDataFileRow.Texto = doc.Get("Texto");
        //        float score = hitsFound.Score(i);
        //        sampleDataFileRow.Score = score;

        //        results.Add(sampleDataFileRow.Ius);
        //    }



        //    parser = new QueryParser("Texto", analyzer);
        //    parser.SetEnablePositionIncrements(false);

        //    q = new PhraseQuery();
        //    words = searchTerm.Split(' ');

        //    foreach (string word in words)
        //    {
        //        q.Add(new Term("Texto", word));
        //    }

        //    // query = parser.Parse(searchTerm);
        //    hitsFound = searcher.Search(q);

        //    for (int i = 0; i < hitsFound.Length(); i++)
        //    {
        //        sampleDataFileRow = new TesisIndx();
        //        Document doc = hitsFound.Doc(i);
        //        sampleDataFileRow.Ius = int.Parse(doc.Get("Ius"));
        //        sampleDataFileRow.Rubro = doc.Get("Rubro");
        //        sampleDataFileRow.Texto = doc.Get("Texto");
        //        float score = hitsFound.Score(i);
        //        sampleDataFileRow.Score = score;

        //        results.Add(sampleDataFileRow.Ius);
        //    }

        //    results.Distinct();

        //    return results;
        //}


        /// <summary>
        /// Busca en el índice previamente construido las tesis que tengan coincidencia ya sea en el Rubro o Texto
        /// del término buscado
        /// </summary>
        /// <param name="searchTerm"></param>
        /// <returns></returns>
        public List <int> SearchIuses(string searchTerm)
        {
            List <int> results = new List <int>();

            IndexSearcher searcher = new IndexSearcher(FSDirectory.GetDirectory(indexPath));
            QueryParser   parser   = new QueryParser("RubroIndx", analyzer);

            parser.SetEnablePositionIncrements(false);


            Query query = parser.Parse(String.Format("\"{0}\"", searchTerm));

            Console.WriteLine(query.ToString());
            Hits hitsFound = searcher.Search(query);

            TesisIndx sampleDataFileRow = null;

            for (int i = 0; i < hitsFound.Length(); i++)
            {
                sampleDataFileRow = new TesisIndx();
                Document doc = hitsFound.Doc(i);
                sampleDataFileRow.Ius       = int.Parse(doc.Get("Ius"));
                sampleDataFileRow.RubroIndx = doc.Get("RubroIndx");
                sampleDataFileRow.TextoIndx = doc.Get("TextoIndx");
                float score = hitsFound.Score(i);
                sampleDataFileRow.Score = score;

                results.Add(sampleDataFileRow.Ius);
            }



            parser = new QueryParser("TextoIndx", analyzer);
            parser.SetEnablePositionIncrements(false);

            query = parser.Parse(String.Format("\"{0}\"", searchTerm));
            Console.WriteLine(query.ToString());
            hitsFound = searcher.Search(query);

            for (int i = 0; i < hitsFound.Length(); i++)
            {
                sampleDataFileRow = new TesisIndx();
                Document doc = hitsFound.Doc(i);
                sampleDataFileRow.Ius   = int.Parse(doc.Get("Ius"));
                sampleDataFileRow.Rubro = doc.Get("RubroIndx");
                sampleDataFileRow.Texto = doc.Get("TextoIndx");
                float score = hitsFound.Score(i);
                sampleDataFileRow.Score = score;

                results.Add(sampleDataFileRow.Ius);
            }

            results.Distinct();

            return(results);
        }
        public static IDictionary <string, string> Query(string searchTerm)
        {
            BuildIndexTask.Wait();

            System.Text.RegularExpressions.Regex rgx = new System.Text.RegularExpressions.Regex("[^a-zA-Z0-9]");
            searchTerm = rgx.Replace(searchTerm, " ");

            IndexSearcher searcher = new IndexSearcher(luceneIndexDirectory);
            QueryParser   parser   = new QueryParser("Guideline", analyzer);

            Query query     = parser.Parse(searchTerm.ToLower());
            Hits  hitsFound = searcher.Search(query);

            IDictionary <string, string> results = new Dictionary <string, string>();

            for (int i = 0; i < hitsFound.Length(); i++)
            {
                Document doc             = hitsFound.Doc(i);
                float    score           = hitsFound.Score(i);
                string   CodeSnippetName = doc.Get("CodeSnippetName");
                string   CodeSnippet     = doc.Get("CodeSnippet");

                if (score > 0.6)
                {
                    results.Add(CodeSnippetName, CodeSnippet);
                }
            }

            searcher.Close();

            return(results);
        }
Example #3
0
    public IEnumerable <DataFileRow> Search(string searchTerm)
    {
        IndexSearcher searcher = new IndexSearcher(luceneIndexDirectory);
        QueryParser   parser   = new QueryParser("LineText", analyzer);

        Query query     = parser.Parse(searchTerm);
        Hits  hitsFound = searcher.Search(query);

        List <DataFileRow> results           = new List <DataFileRow>();
        DataFileRow        sampleDataFileRow = null;

        for (int i = 0; i < hitsFound.Length(); i++)
        {
            sampleDataFileRow = new DataFileRow();
            Document doc = hitsFound.Doc(i);
            sampleDataFileRow.LineNumber = int.Parse(doc.Get("LineNumber"));
            sampleDataFileRow.LineText   = doc.Get("LineText");
            float score = hitsFound.Score(i);
            sampleDataFileRow.Score = score;

            results.Add(sampleDataFileRow);
        }
        luceneIndexDirectory.Close();
        searcher.Close();
        return(results.OrderByDescending(x => x.Score).ToList());
    }
Example #4
0
    public static void Main(string[] args)
    {
        string indexpath = args[0];
        string query     = args[1];

        IndexSearcher searcher = new IndexSearcher(indexpath);

        Query parsedquery = QueryParser.Parse(query,
                                              "summary", new StandardAnalyzer());

        Hits hits = searcher.Search(parsedquery);

        Console.WriteLine("Found " + hits.Length() + " document(s) that matched query '" + query + "':\n");

        for (int i = 0; i < hits.Length(); i++)
        {
            Document doc = hits.Doc(i);
            Console.WriteLine(hits.Score(i) + ": " + doc.Get("excerpt") + "\n");
            if (i == 50)
            {
                break;
            }
        }
        searcher.Close();
    }
Example #5
0
        void DoTitleMatches()
        {
            string term = searchterm;

            try {
                string[] terms = Regex.Split(term, " +");
                term = "";
                foreach (string t in terms)
                {
                    term += t + "~ ";
                }
                searchterm = "title:(" + term + ")";

                DateTime now   = DateTime.UtcNow;
                Query    query = state.Parse(searchterm);
                Hits     hits  = state.Searcher.Search(query);

                int numhits = hits.Length();
                LogRequest(searchterm, query, numhits, now);

                SendHeaders(200, "OK");
                for (int i = 0; i < numhits && i < 10; i++)
                {
                    Document doc           = hits.Doc(i);
                    float    score         = hits.Score(i);
                    string   pageNamespace = doc.Get("namespace");
                    string   title         = doc.Get("title");
                    SendResultLine(score, pageNamespace, title);
                }
            } catch (Exception e) {
                log.Error(e.Message + e.StackTrace);
            }
        }
Example #6
0
        public List <string> Search(string text)
        {
            List <string> lstFilteredValue = new List <string>();

            try
            {
                IndexSearcher MyIndexSearcher = new IndexSearcher(LuceneDirectory);

                Query mainQuery = this.GetParsedQuerywc(text);
                //Do the search
                Hits hits = MyIndexSearcher.Search(mainQuery);

                int results = hits.Length();

                for (int i = 0; i < results; i++)
                {
                    Document doc   = hits.Doc(i);
                    float    score = hits.Score(i);
                    lstFilteredValue.Add(doc.Get("Name") + "," + doc.Get("Id"));
                }
            }
            catch (Exception GeneralException)
            {
            }
            return(lstFilteredValue);
        }
Example #7
0
        public static IList <string> Search(string searchTerm)
        {
            System.Text.RegularExpressions.Regex rgx = new System.Text.RegularExpressions.Regex("[^a-zA-Z0-9]");
            searchTerm = rgx.Replace(searchTerm, " ");

            IndexSearcher searcher = new IndexSearcher(luceneIndexDirectory);
            QueryParser   parser   = new QueryParser("Data", analyzer);

            Query query     = parser.Parse(searchTerm.ToLower());
            Hits  hitsFound = searcher.Search(query);

            IList <string> results = new List <string>();

            for (int i = 0; i < hitsFound.Length(); i++)
            {
                Document doc      = hitsFound.Doc(i);
                float    score    = hitsFound.Score(i);
                string   fileName = doc.Get("FileName");

                if (score > 0.6)
                {
                    results.Add(doc.Get("FileName"));
                }
            }

            searcher.Close();

            return(results);
        }
Example #8
0
        public override List <SearchResult> Search(string searchStr)
        {
            List <SearchResult> results = new List <SearchResult>();

            string cleanSearchStr = cleaner.Replace(searchStr, "").ToLower().Trim();

            IndexSearcher searcher = new IndexSearcher(directory);
            //QueryParser parser = new QueryParser("title", analyzer);
            //Query query = parser.Parse(cleanSearchStr + "~0.7");

            Query query = parser.Parse(cleanSearchStr + "~0.7");
            Hits  hits  = searcher.Search(query);

            int resultCount = hits.Length();

            for (int i = 0; i < resultCount; i++)
            {
                SearchResult result = new SearchResult();
                result.Item  = DatabaseManager.Get <T>(int.Parse(hits.Doc(i).Get("id")));
                result.Score = hits.Score(i);

                results.Add(result);
            }

            return(results);
        }
Example #9
0
        /// <summary>
        /// Searches the index.
        /// </summary>
        /// <param name="queryText"></param>
        /// <param name="categoryNames"></param>
        /// <param name="pageIndex"></param>
        /// <param name="pageSize"></param>
        /// <param name="roleIds"></param>
        /// <returns></returns>
        public SearchResultCollection Find(string queryText, IList <string> categoryNames, int pageIndex, int pageSize, IEnumerable <int> roleIds)
        {
            long startTicks = DateTime.Now.Ticks;

            // the overall-query
            BooleanQuery query = new BooleanQuery();

            // add our parsed query
            if (!String.IsNullOrEmpty(queryText))
            {
                Query multiQuery = MultiFieldQueryParser.Parse(new[] { queryText, queryText, queryText }, new[] { "title", "summary", "contents" }, new StandardAnalyzer());
                query.Add(multiQuery, BooleanClause.Occur.MUST);
            }
            // add the security constraint - must be satisfied
            query.Add(this.BuildSecurityQuery(roleIds), BooleanClause.Occur.MUST);

            // Add the category query (if available)
            if (categoryNames != null)
            {
                query.Add(this.BuildCategoryQuery(categoryNames), BooleanClause.Occur.MUST);
            }

            IndexSearcher searcher = new IndexSearcher(this._indexDirectory);
            Hits          hits     = searcher.Search(query);
            int           start    = pageIndex * pageSize;
            int           end      = (pageIndex + 1) * pageSize;

            if (hits.Length() <= end)
            {
                end = hits.Length();
            }
            SearchResultCollection results = new SearchResultCollection(end);

            results.TotalCount = hits.Length();
            results.PageIndex  = pageIndex;

            for (int i = start; i < end; i++)
            {
                SearchResult result = new SearchResult();
                result.Title      = hits.Doc(i).Get("title");
                result.Summary    = hits.Doc(i).Get("summary");
                result.Author     = hits.Doc(i).Get("author");
                result.ModuleType = hits.Doc(i).Get("moduletype");
                result.Path       = hits.Doc(i).Get("path");
                string[] categories = hits.Doc(i).GetValues("category");
                result.Category = categories != null?String.Join(", ", categories) : String.Empty;

                result.DateCreated = DateTime.Parse((hits.Doc(i).Get("datecreated")));
                result.Score       = hits.Score(i);
                result.Boost       = hits.Doc(i).GetBoost();
                result.SectionId   = Int32.Parse(hits.Doc(i).Get("sectionid"));
                results.Add(result);
            }
            searcher.Close();
            results.ExecutionTime = DateTime.Now.Ticks - startTicks;

            return(results);
        }
Example #10
0
        public static void  Main(System.String[] args)
        {
            try
            {
                Directory   directory = new RAMDirectory();
                Analyzer    analyzer  = new SimpleAnalyzer();
                IndexWriter writer    = new IndexWriter(directory, analyzer, true);

                System.String[] docs = new System.String[] { "a b c d e", "a b c d e a b c d e", "a b c d e f g h i j", "a c e", "e c a", "a c e a c e", "a c e a b c" };
                for (int j = 0; j < docs.Length; j++)
                {
                    Document d = new Document();
                    d.Add(Field.Text("contents", docs[j]));
                    writer.AddDocument(d);
                }
                writer.Close();

                Searcher searcher = new IndexSearcher(directory);

                System.String[] queries = new System.String[] { "\"a c e\"" };
                Hits            hits    = null;

                QueryParsers.QueryParser parser = new QueryParsers.QueryParser("contents", analyzer);
                parser.SetPhraseSlop(4);
                for (int j = 0; j < queries.Length; j++)
                {
                    Query query = parser.Parse(queries[j]);
                    System.Console.Out.WriteLine("Query: " + query.ToString("contents"));

                    //DateFilter filter =
                    //  new DateFilter("modified", Time(1997,0,1), Time(1998,0,1));
                    //DateFilter filter = DateFilter.Before("modified", Time(1997,00,01));
                    //System.out.println(filter);

                    hits = searcher.Search(query);

                    System.Console.Out.WriteLine(hits.Length() + " total results");
                    for (int i = 0; i < hits.Length() && i < 10; i++)
                    {
                        Document d = hits.Doc(i);
                        System.Console.Out.WriteLine(i + " " + hits.Score(i) + " " + d.Get("contents"));
                    }
                }
                searcher.Close();
            }
            catch (System.Exception e)
            {
                System.Console.Out.WriteLine(" caught a " + e.GetType() + "\n with message: " + e.Message);
            }
        }
        public EntityInfo Extract(Hits hits, int index)
        {
            Document doc = hits.Doc(index);
            //TODO if we are lonly looking for score (unlikely), avoid accessing doc (lazy load)
            EntityInfo entityInfo = Extract(doc);

            object[] eip = entityInfo.Projection;

            if (eip != null && eip.Length > 0)
            {
                for (int x = 0; x < projection.Length; x++)
                {
                    switch (projection[x])
                    {
                    case ProjectionConstants.SCORE:
                        eip[x] = hits.Score(index);
                        break;

                    case ProjectionConstants.ID:
                        eip[x] = entityInfo.Id;
                        break;

                    case ProjectionConstants.DOCUMENT:
                        eip[x] = doc;
                        break;

                    case ProjectionConstants.DOCUMENT_ID:
                        eip[x] = hits.Id(index);
                        break;

                    case ProjectionConstants.BOOST:
                        eip[x] = doc.GetBoost();
                        break;

                    case ProjectionConstants.THIS:
                        //THIS could be projected more than once
                        //THIS loading delayed to the Loader phase
                        if (entityInfo.IndexesOfThis == null)
                        {
                            entityInfo.IndexesOfThis = new List <int>(1);
                        }
                        entityInfo.IndexesOfThis.Add(x);
                        break;
                    }
                }
            }

            return(entityInfo);
        }
Example #12
0
        private void  DoTestSearch(System.IO.StringWriter out_Renamed, bool useCompoundFile)
        {
            Directory   directory = new RAMDirectory();
            Analyzer    analyzer  = new SimpleAnalyzer();
            IndexWriter writer    = new IndexWriter(directory, analyzer, true);

            writer.SetUseCompoundFile(useCompoundFile);

            System.String[] docs = new System.String[] { "a b c d e", "a b c d e a b c d e", "a b c d e f g h i j", "a c e", "e c a", "a c e a c e", "a c e a b c" };
            for (int j = 0; j < docs.Length; j++)
            {
                Document d = new Document();
                d.Add(Field.Text("contents", docs[j]));
                writer.AddDocument(d);
            }
            writer.Close();

            Searcher searcher = new IndexSearcher(directory);

            System.String[] queries = new System.String[] { "a b", "\"a b\"", "\"a b c\"", "a c", "\"a c\"", "\"a c e\"" };
            Hits            hits    = null;

            QueryParsers.QueryParser parser = new QueryParsers.QueryParser("contents", analyzer);
            parser.SetPhraseSlop(4);
            for (int j = 0; j < queries.Length; j++)
            {
                Query query = parser.Parse(queries[j]);
                out_Renamed.WriteLine("Query: " + query.ToString("contents"));

                //DateFilter filter =
                //  new DateFilter("modified", Time(1997,0,1), Time(1998,0,1));
                //DateFilter filter = DateFilter.Before("modified", Time(1997,00,01));
                //System.out.println(filter);

                hits = searcher.Search(query);

                out_Renamed.WriteLine(hits.Length() + " total results");
                for (int i = 0; i < hits.Length() && i < 10; i++)
                {
                    Document d = hits.Doc(i);
                    out_Renamed.WriteLine(i + " " + hits.Score(i) + " " + d.Get("contents"));
                }
            }
            searcher.Close();
        }
Example #13
0
        public string[] Fncsearch(string strSearchContent)
        {
            DateTime start = DateTime.Now;

            try
            {
                searcher = new IndexSearcher(this.pathIndex);
            }
            catch (IOException ex)
            {
                System.Windows.Forms.MessageBox.Show("The index doesn't exist or is damaged. Please rebuild the index.\r\n\r\nDetails:\r\n" + ex.Message);
                return(null);
            }

            if (strSearchContent.Trim(new char[] { ' ' }) == String.Empty)
            {
                return(null);
            }
            if (strSearchContent == "*")
            {
                MessageBox.Show("Sorry cannot search files with *");
            }
            Query query = QueryParser.Parse(strSearchContent, "text", new StandardAnalyzer());
            Hits  hits  = searcher.Search(query);

            string[] strTempArr = new string[hits.Length()];
            for (int i = 0; i < hits.Length(); i++)
            {
                Document      doc      = hits.Doc(i);
                string        filename = doc.Get("title");
                string        path     = doc.Get("path");
                string        folder   = System.IO.Path.GetDirectoryName(path);
                DirectoryInfo di       = new DirectoryInfo(folder);
                string        s        = filename + "     " + path + "     " + hits.Score(i).ToString();
                System.Windows.Forms.ListViewItem item = new System.Windows.Forms.ListViewItem(new string[] { null, filename, di.Name, hits.Score(i).ToString() });
                item.Tag      = path;
                strTempArr[i] = s;
            }
            searcher.Close();

            //string searchReport = String.Format("Search took {0}. Found {1} items.", (DateTime.Now - start), hits.Length());
            //status(searchReport);
            return(strTempArr);
        }
        internal IList <ISbItem> OpenSearchRequest(string phrase, out int total)
        {
            IList <ISbItem> versiculos = new List <ISbItem>();

            IndexSearcher searcher    = this.DefaultIndex.GetIndex();
            QueryParser   queryParser = new QueryParser("versiculo", new StandardAnalyzer());
            Hits          hits        = searcher.Search(queryParser.Parse(phrase));

            total = hits.Length();
            for (int i = 0; i < (hits.Length() > maxOpenSearchResults ? maxOpenSearchResults : hits.Length()); i++)
            {
                ISbItem item = this.DefaultContainer.Ext().GetByID(Convert.ToInt64(hits.Doc(i).Get("id"))) as ISbItem;
                item.Tag = hits.Score(i).ToString();
                this.DefaultContainer.Activate(item, 1);
                versiculos.Add(item);
            }

            return(versiculos);
        }
Example #15
0
        public DataTable search(string searchText)
        {
            DateTime start = DateTime.Now;

            try
            {
                searcher = new IndexSearcher(this.pathIndex);
            }
            catch (IOException ex)
            {
                throw new IndexDamagedException("The index doesn't exist or is damaged. Please rebuild the index.\r\n\r\nDetails:\r\n" + ex.Message);
            }

            if ((searchText.Trim() != null && searchText.Trim().Length == 0))
            {
                return(new DataTable());
            }

            Query query = QueryParser.Parse(searchText, "text", new StandardAnalyzer());

            Hits hits = searcher.Search(query);

            DataTable dt = new DataTable();

            dt.Columns.Add("title", typeof(string));
            dt.Columns.Add("path", typeof(string));
            dt.Columns.Add("hits", typeof(string));

            for (int i = 0; i < hits.Length(); i++)
            {
                DataRow  dr  = dt.NewRow();
                Document doc = hits.Doc(i);

                dr["title"] = doc.Get("title");
                dr["path"]  = doc.Get("path");
                dr["hits"]  = hits.Score(i).ToString();
                dt.Rows.Add(dr);
            }
            searcher.Close();

            return(dt);
        }
Example #16
0
        void DoRawSearch()
        {
            DateTime now   = DateTime.UtcNow;
            Query    query = state.Parse(searchterm);
            Hits     hits  = state.Searcher.Search(query);

            int numhits = hits.Length();

            LogRequest("(raw)", query, numhits, now);

            SendHeaders(200, "OK");
            for (int i = 0; i < numhits && i < 10; i++)
            {
                Document doc           = hits.Doc(i);
                float    score         = hits.Score(i);
                string   pageNamespace = doc.Get("namespace");
                string   title         = doc.Get("title");
                SendResultLine(score, pageNamespace, title);
            }
        }
Example #17
0
        /// <summary>
        /// This method searches for the search term passed by the caller.
        /// </summary>
        /// <param name="searchTerm">The search term as a string that the caller wants to search for within the
        /// index as referenced by this object.</param>
        /// <param name="ids">An out parameter that is populated by this method for the caller with docments ids.</param>
        /// <param name="results">An out parameter that is populated by this method for the caller with docments text.</param>
        /// <param name="scores">An out parameter that is populated by this method for the caller with docments scores.</param>
        internal int Search(string searchTerm, out Object[] ids, out string[] results, out float[] scores, WordExpander expandWithWordNet, bool expandOnNoHits)
        {
            checkDbLock();
            if (!IsDbPresent)
            {
                ids     = new Document[0];
                results = new string[0];
                scores  = new float[0];
                return(0);
            }
            IndexSearcher indexSearcher = new IndexSearcher(_directory);

            try
            {
                QueryParser queryParser = new QueryParser(_fieldName, _analyzer);
                Query       query       = queryParser.Parse(searchTerm);
                Hits        hits        = indexSearcher.Search(query);
                int         numHits     = hits.Length();

                ids     = new Document[numHits];
                results = new string[numHits];
                scores  = new float[numHits];

                for (int i = 0; i < numHits; ++i)
                {
                    float  score = hits.Score(i);
                    var    hdoc  = hits.Doc(i);
                    string text  = hdoc.Get(_fieldName);
                    //string idAsText = hdoc.Get(MyLuceneIndexer.DOC_ID_FIELD_NAME);
                    ids[i]     = hdoc;
                    results[i] = text;
                    scores[i]  = score;
                }

                if (numHits == 0 && expandOnNoHits)
                {
                    // Try expansion
                    //QueryParser queryParser = new QueryParser(_fieldName, _analyzer);
                    MultiFieldQueryParser queryParserWN = new MultiFieldQueryParser(
                        new string[] { _fieldName, MyLuceneIndexer.HYPO_FIELD_NAME },
                        _analyzer);
                    string hypo_expand = expandWithWordNet(searchTerm, false);
                    Query  queryWN     = queryParserWN.Parse(hypo_expand);
                    Hits   hitsWN      = indexSearcher.Search(queryWN);
                    int    numHitsWN   = hitsWN.Length();

                    ids     = new Document[numHitsWN];
                    results = new string[numHitsWN];
                    scores  = new float[numHitsWN];
                    for (int i = 0; i < numHitsWN; ++i)
                    {
                        float  score = hitsWN.Score(i);
                        string text  = hitsWN.Doc(i).Get(_fieldName);
                        //string idAsText = hitsWN.Doc(i).Get(MyLuceneIndexer.DOC_ID_FIELD_NAME);
                        ids[i]     = hitsWN.Doc(i);// UInt64.Parse(idAsText);
                        results[i] = text;
                        scores[i]  = score;
                    }
                }
            }
            finally
            {
                indexSearcher.Close();
            }
            return(ids.Length);
        }/*
Example #18
0
        internal int DeleteTopScoring0(string searchQuery, bool mustContainExact)
        {
            checkDbLock();
            if (!IsDbPresent)
            {
                return(0);
            }
            // Searching:
            ulong[]  ids;
            string[] results;
            float[]  scores;

            int numHits;

            // find it
            writeToLog("Replacing best \"{0}\"...", searchQuery);
            //Search(query, out ids, out results, out scores);
            IndexSearcher indexSearcher = new IndexSearcher(_directory);
            int           deleted       = 0;

            try
            {
                QueryParser queryParser        = new QueryParser(_fieldName, _analyzer);
                Query       query              = queryParser.Parse(searchQuery);
                Hits        hits               = indexSearcher.Search(query);
                string      searchQueryToLower = searchQuery.ToLower();
                numHits = hits.Length();

                // if we want to do something smarter later
                ids     = new ulong[numHits];
                results = new string[numHits];
                scores  = new float[numHits];
                for (int i = 0; i < numHits; ++i)
                {
                    float  score    = hits.Score(i);
                    string text     = hits.Doc(i).Get(_fieldName);
                    string idAsText = hits.Doc(i).Get(MyLuceneIndexer.DOC_ID_FIELD_NAME);
                    ids[i]     = UInt64.Parse(idAsText);
                    results[i] = text;
                    scores[i]  = score;
                }
                if (numHits > 0)
                {
                    //IndexReader indexReader = indexSearcher.GetIndexReader();
                    IndexWriter indexWriter = new IndexWriter(_directory, _analyzer);
                    float       topscore    = scores[0];
                    for (int i = 0; i < numHits; i++)
                    {
                        if (scores[i] == topscore)
                        {
                            if (mustContainExact)
                            {
                                // checks word order basically?
                                if (!results[i].ToLower().Contains(searchQueryToLower))
                                {
                                    writeToLog("Cannot or wont delete " + searchQueryToLower);
                                    continue;
                                }
                            }
                            writeToLog("DEBUG9 deleting " + searchQueryToLower);
                            //indexSearcher.GetIndexReader().DeleteDocument(i);
                            //indexReader.DeleteDocuments(new Term( MyLuceneIndexer.DOC_ID_FIELD_NAME, ids[i].ToString () ) );
                            indexWriter.DeleteDocuments(new Term(MyLuceneIndexer.DOC_ID_FIELD_NAME, ids[i].ToString()));
                            deleted++;
                        }
                    }
                    //indexReader.Commit();
                    //indexReader.Close();
                    indexWriter.Commit();
                    indexWriter.Close();
                }
            }
            finally
            {
                indexSearcher.Close();
            }
            return(deleted);
        }
Example #19
0
        /// <summary>
        /// 构造返回结果
        /// </summary>
        /// <returns></returns>
        private List <ProductSimpleInfo> ProductBinding(Hits hits, string key, int order, int pageNo, int pageLen, out int recCount)
        {
            recCount = hits.Length();
            //  合并
            int n = 0;
            ProductSimpleInfo         info = null;
            IList <ProductSimpleInfo> list = new List <ProductSimpleInfo>();

            while (n < recCount)
            {
                //  去除匹配度太低结果
                if (hits.Score(n) < 0.01)
                {
                    n++;
                    continue;
                }
                try
                {
                    info = new ProductSimpleInfo()
                    {
                        productID      = int.Parse(hits.Doc(n).Get("ProductID")),
                        productCode    = hits.Doc(n).Get("ProductCode"),
                        chineseName    = hits.Doc(n).Get("ChineseName"),
                        cadn           = hits.Doc(n).Get("CADN"),
                        longName       = hits.Doc(n).Get("LongName"),
                        pinyinName     = hits.Doc(n).Get("PinyinName"),
                        marketPrice    = decimal.Parse(hits.Doc(n).Get("MarketPrice")),
                        tradePrice     = decimal.Parse(hits.Doc(n).Get("TradePrice")),
                        sellCount      = int.Parse(hits.Doc(n).Get("SellCount")),
                        favorCount     = int.Parse(hits.Doc(n).Get("Favorite")),
                        productType    = short.Parse(hits.Doc(n).Get("ProductType")),
                        specifications = hits.Doc(n).Get("Specifications"),
                        images         = hits.Doc(n).Get("Images"),
                        actions        = short.Parse(hits.Doc(n).Get("Actions")),
                        comments       = int.Parse(hits.Doc(n).Get("Comments")),
                        selling        = int.Parse(hits.Doc(n).Get("Selling")),
                        manufacturer   = hits.Doc(n).Get("Manufacturer"),
                        sellingTime    = DateTime.Parse(hits.Doc(n).Get("SellingTime")),
                        recommend      = float.Parse(hits.Doc(n).Get("Recommend")),
                        preferential   = float.Parse(hits.Doc(n).Get("Preferential")),
                        brandName      = hits.Doc(n).Get("BrandName")
                    };
                    if (order <= 0)
                    {
                        info.level = GetProductLevel(hits.Score(n), 1);
                        info.score = GetProductLevel(info.sellCount, 0) + n;
                    }
                }
                catch { }
                finally
                {
                    if (info != null)
                    {
                        list.Add(info);
                    }
                    n++;
                }
            }
            recCount = list.Count();

            //  返回数据
            switch (order)
            {
            case 1:
                //  人气 降序
                return(list.OrderByDescending(i => i.favorCount).Skip(pageLen * (pageNo - 1)).Take(pageLen).ToList());

            case 2:
                //  人气 升序
                return(list.OrderByDescending(i => i.favorCount).Skip(pageLen * (pageNo - 1)).Take(pageLen).ToList());

            case 3:
                //  新品 降序
                return(list.OrderByDescending(i => i.sellingTime).Skip(pageLen * (pageNo - 1)).Take(pageLen).ToList());

            case 4:
                //  新品 升序
                return(list.OrderBy(i => i.sellingTime).Skip(pageLen * (pageNo - 1)).Take(pageLen).ToList());

            case 5:
                //  销量 降序
                return(list.OrderByDescending(i => i.sellCount).Skip(pageLen * (pageNo - 1)).Take(pageLen).ToList());

            case 6:
                //  销量 升序
                return(list.OrderBy(i => i.sellCount).Skip(pageLen * (pageNo - 1)).Take(pageLen).ToList());

            case 7:
                //  价格 降序
                return(list.OrderByDescending(i => i.tradePrice).Skip(pageLen * (pageNo - 1)).Take(pageLen).ToList());

            case 8:
                //  价格 升序
                return(list.OrderBy(i => i.tradePrice).Skip(pageLen * (pageNo - 1)).Take(pageLen).ToList());

            case 9:
                //  综合 升序
                return(list.OrderBy(i => i.level).ThenBy(i => i.score).Skip(pageLen * (pageNo - 1)).Take(pageLen).ToList());

            default:
                return(list.OrderByDescending(i => i.level).ThenBy(i => i.score).Skip(pageLen * (pageNo - 1)).Take(pageLen).ToList());
            }
        }
Example #20
0
        /// <summary>
        /// 商品结果参数 品牌/分类
        /// </summary>
        /// <param name="hits"></param>
        private IList <JXSearchEntity> ProductParaList(Hits hits)
        {
            int id = 0;
            IList <JXSearchEntity> list = new List <JXSearchEntity>();

            try
            {
                int recCount = hits.Length();       //  总记录数
                for (int j = 0; j < recCount; j++)
                {
                    if (hits.Score(j) < 0.01)
                    {
                        continue;
                    }
                    #region 分类1
                    if (hits.Doc(j).Get("CFID1").Length > 0)
                    {
                        id = int.Parse(hits.Doc(j).Get("CFID1"));
                        if (list.Where(g => g.id == id && g.typeID == 2).Count() <= 0)
                        {
                            list.Add(new JXSearchEntity()
                            {
                                id          = id,
                                chineseName = hits.Doc(j).Get("CFName1"),
                                typeID      = 2,
                                parentID    = int.Parse(hits.Doc(j).Get("ParentID1"))
                            });
                        }
                    }
                    #endregion

                    #region 分类2
                    if (hits.Doc(j).Get("CFID2").Length > 0)
                    {
                        id = int.Parse(hits.Doc(j).Get("CFID2"));
                        if (list.Where(g => g.id == id && g.typeID == 2).Count() <= 0)
                        {
                            list.Add(new JXSearchEntity()
                            {
                                id          = id,
                                chineseName = hits.Doc(j).Get("CFName2"),
                                typeID      = 2,
                                parentID    = int.Parse(hits.Doc(j).Get("ParentID2"))
                            });
                        }
                    }
                    #endregion

                    #region 分类3
                    if (hits.Doc(j).Get("CFID3").Length > 0)
                    {
                        id = int.Parse(hits.Doc(j).Get("CFID3"));
                        if (list.Where(g => g.id == id && g.typeID == 2).Count() <= 0)
                        {
                            list.Add(new JXSearchEntity()
                            {
                                id          = id,
                                chineseName = hits.Doc(j).Get("CFName3"),
                                typeID      = 2,
                                parentID    = int.Parse(hits.Doc(j).Get("ParentID3"))
                            });
                        }
                    }
                    #endregion

                    #region 品牌
                    if (hits.Doc(j).Get("BrandID").Length > 0)
                    {
                        id = int.Parse(hits.Doc(j).Get("BrandID"));
                        if (list.Where(g => g.brandID == id && g.typeID == 5).Count() <= 0)
                        {
                            list.Add(new JXSearchEntity()
                            {
                                brandID   = id,
                                brandName = hits.Doc(j).Get("BrandName"),
                                letter    = hits.Doc(j).Get("BrandLetter"),
                                typeID    = 5
                            });
                        }
                    }
                    #endregion
                }
            }
            catch { }
            return(list);
        }
Example #21
0
        } // constructor

        /// <summary>
        /// Searches the keyword index using the keywordQuery.
        ///
        /// See http://www.dotlucene.net/documentation/QuerySyntax.html  for the format of the keywordQuery.
        ///
        /// This function will return a fully-filled array of IndexableFileInfo objects.
        /// </summary>
        /// <param name="keywordQuery"></param>
        /// <param name="queryForHighlighter"></param>
        /// <returns></returns>
        public IndexableFileInfo[] doSearch(string keywordQuery, string queryForHighlighter)
        {
            IndexSearcher searcher;
            IndexReader   indexReader;

            try
            {
                FSDirectory indexDir = FSDirectory.GetDirectory(this.luceneIndexDir, false);
                indexReader = IndexReader.Open(indexDir);
                searcher    = new IndexSearcher(indexReader);
            }
            catch
            {
                // if the luceneIndexDir does not contain index files (yet), IndexSearcher
                // throws a nice Exception.
                return(new IndexableFileInfo[0]);
            }
            List <IndexableFileInfo> arrayList = new List <IndexableFileInfo>();

            try
            {
                string Query = keywordQuery;
                if (Query == String.Empty)
                {
                    return(new IndexableFileInfo[0]);
                }

                string HighlighterQuery = queryForHighlighter;
                // -- weirdly enough, when the query is empty, an exception is thrown during the QueryParser.Parse
                //    this hack gets around that.
                if (HighlighterQuery == String.Empty)
                {
                    HighlighterQuery = Guid.NewGuid().ToString();
                }

                // parse the query, "text" is the default field to search
                // note: use the StandardAnalyzer! (the SimpleAnalyzer doesn't work correctly when searching by fields that are integers!)
                // MultiFieldQueryParser queryParser = new MultiFieldQueryParser(new string[] { "title", "contents" }, new hatWebPortalAnalyzer());
                MultiFieldQueryParser queryParser = new MultiFieldQueryParser(new string[] { "title", "contents" }, new SimpleAnalyzer());
                queryParser.SetDefaultOperator(QueryParser.AND_OPERATOR);

                Query query = queryParser.Parse(Query);

                QueryParser highlightQueryParser = new QueryParser("contents", new hatWebPortalAnalyzer());

                Query highlighterQuery = highlightQueryParser.Parse(HighlighterQuery);

                query = searcher.Rewrite(query); // is this needed?? " Expert: called to re-write queries into primitive queries."

                // search
                Hits hits = searcher.Search(query, Sort.RELEVANCE);

                // create highlighter
                Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter("<strong>", "</strong>"), new QueryScorer(highlighterQuery));

                // -- go through hits and return results

                for (int i = 0; i < hits.Length(); i++)
                {
                    Document d                    = hits.Doc(i);
                    string   filename             = d.Get("filename");
                    string   plainText            = d.Get("contents");
                    string   title                = d.Get("title");
                    string   sectionName          = d.Get("SectionName");
                    string   filenameParams       = d.Get("filenameParams");
                    bool     contentIsPageSummary = Convert.ToBoolean(d.Get("contentIsPageSummary"));
                    double   score                = Convert.ToDouble(hits.Score(i));
                    DateTime lastModified         = DateTools.StringToDate(d.Get("LastModified"));

                    TokenStream tokenStream = new hatWebPortalAnalyzer().TokenStream("contents", new StringReader(plainText));

                    string fragment = plainText;
                    if (!contentIsPageSummary)
                    {
                        fragment = highlighter.GetBestFragments(tokenStream, plainText, 2, "...");
                    }

                    IndexableFileInfo newHit = new IndexableFileInfo(filename, filenameParams, title, fragment, sectionName, lastModified, contentIsPageSummary, score);
                    arrayList.Add(newHit);
                } // for
            }
            finally
            {
                searcher.Close();
                indexReader.Close();
            }


            return(arrayList.ToArray());
        } // SearchActiveDocument
Example #22
0
        /// <summary>
        /// search support multiple modules
        /// </summary>
        /// <param name="siteId"></param>
        /// <param name="isAdmin"></param>
        /// <param name="userRoles"></param>
        /// <param name="queryText"></param>
        /// <param name="highlightResults"></param>
        /// <param name="highlightedFragmentSize"></param>
        /// <param name="pageNumber"></param>
        /// <param name="pageSize"></param>
        /// <param name="totalHits"></param>
        /// <param name="invalidQuery"></param>
        /// <param name="moduleIDs"></param>
        /// <returns></returns>
        public static IndexItemCollection Search(
            int siteId,
            bool isAdmin,
            List <string> userRoles,
            string queryText,
            bool highlightResults,
            int highlightedFragmentSize,
            int pageNumber,
            int pageSize,
            out int totalHits,
            out bool invalidQuery,
            params Guid[] moduleIDs
            )
        {
            invalidQuery = false;
            totalHits    = 0;
            string indexPath            = GetIndexPath(siteId);
            IndexItemCollection results = new IndexItemCollection();

            if (string.IsNullOrEmpty(queryText))
            {
                return(results);
            }

            bool useBackwardCompatibilityMode = true;

            if (
                (ConfigurationManager.AppSettings["SearchUseBackwardCompatibilityMode"] != null) &&
                (ConfigurationManager.AppSettings["SearchUseBackwardCompatibilityMode"] == "false")
                )
            {
                useBackwardCompatibilityMode = false;
            }

            bool IncludeModuleRoleFilters = false;

            if (
                (ConfigurationManager.AppSettings["SearchIncludeModuleRoleFilters"] != null) &&
                (ConfigurationManager.AppSettings["SearchIncludeModuleRoleFilters"] == "true")
                )
            {
                IncludeModuleRoleFilters = true;
            }


            if (IndexReader.IndexExists(indexPath))
            {
                if (log.IsDebugEnabled)
                {
                    log.Debug("Entered Search, indexPath = " + indexPath);
                }

                long startTicks = DateTime.Now.Ticks;

                try
                {
                    BooleanQuery mainQuery = new BooleanQuery();

                    if ((!isAdmin) && (!useBackwardCompatibilityMode))
                    {
                        AddRoleQueries(userRoles, mainQuery);
                    }

                    if ((!isAdmin) && (IncludeModuleRoleFilters))
                    {
                        AddModuleRoleQueries(userRoles, mainQuery);
                    }


                    Query multiQuery = MultiFieldQueryParser.Parse(
                        new string[] { queryText, queryText, queryText, queryText, queryText, queryText.Replace("*", string.Empty) },
                        new string[] { "Title", "ModuleTitle", "contents", "PageName", "PageMetaDesc", "Keyword" },
                        new StandardAnalyzer());

                    mainQuery.Add(multiQuery, BooleanClause.Occur.MUST);


                    if (!useBackwardCompatibilityMode)
                    {
                        Term       beginDateStart = new Term("PublishBeginDate", DateTime.MinValue.ToString("s"));
                        Term       beginDateEnd   = new Term("PublishBeginDate", DateTime.UtcNow.ToString("s"));
                        RangeQuery beginDateQuery = new RangeQuery(beginDateStart, beginDateEnd, true);
                        mainQuery.Add(beginDateQuery, BooleanClause.Occur.MUST);

                        Term       endDateStart = new Term("PublishEndDate", DateTime.UtcNow.ToString("s"));
                        Term       endDateEnd   = new Term("PublishEndDate", DateTime.MaxValue.ToString("s"));
                        RangeQuery endDateQuery = new RangeQuery(endDateStart, endDateEnd, true);
                        mainQuery.Add(endDateQuery, BooleanClause.Occur.MUST);
                    }

                    if (moduleIDs != null && moduleIDs.Length > 0)
                    {
                        BooleanQuery featureFilter = new BooleanQuery();
                        moduleIDs.ToList().ForEach(x => {
                            if (x != Guid.Empty)
                            {
                                featureFilter.Add(new TermQuery(new Term("FeatureId", x.ToString())), BooleanClause.Occur.SHOULD);
                            }
                        });
                        if (featureFilter.Clauses().Count > 0)
                        {
                            mainQuery.Add(featureFilter, BooleanClause.Occur.MUST);
                        }
                    }


                    IndexSearcher searcher = new IndexSearcher(indexPath);
                    // a 0 based colection
                    Hits hits = searcher.Search(mainQuery);

                    int startHit = 0;

                    if (pageNumber > 1)
                    {
                        startHit = ((pageNumber - 1) * pageSize);
                    }


                    totalHits = hits.Length();
                    int end = startHit + pageSize;
                    if (totalHits <= end)
                    {
                        end = totalHits;
                    }
                    int itemsAdded = 0;
                    int itemsToAdd = end;

                    // in backward compatibility mode if multiple pages of results are found we amy not be showing every user the correct
                    // number of hits they can see as we only filter out the current page
                    //we may decrement total hits if filtering results so keep the original count
                    int actualHits = totalHits;

                    if (!useBackwardCompatibilityMode)
                    {
                        // this new way is much cleaner
                        //all filtering is done by query so the hitcount is true
                        //whereas with the old way it could be wrong since there
                        // were possibly results filtered out after the query returned.

                        QueryScorer scorer      = new QueryScorer(multiQuery);
                        Formatter   formatter   = new SimpleHTMLFormatter("<span class='searchterm'>", "</span>");
                        Highlighter highlighter = new Highlighter(formatter, scorer);
                        highlighter.SetTextFragmenter(new SimpleFragmenter(highlightedFragmentSize));


                        for (int i = startHit; i < itemsToAdd; i++)
                        {
                            IndexItem indexItem = new IndexItem(hits.Doc(i), hits.Score(i));

                            if (highlightResults)
                            {
                                try
                                {
                                    TokenStream stream = new StandardAnalyzer().TokenStream("contents", new StringReader(hits.Doc(i).Get("contents")));

                                    string highlightedResult = highlighter.GetBestFragment(stream, hits.Doc(i).Get("contents"));
                                    if (highlightedResult != null)
                                    {
                                        indexItem.Intro = highlightedResult;
                                    }
                                }
                                catch (NullReferenceException) { }
                            }

                            results.Add(indexItem);
                            itemsAdded += 1;
                        }
                    }
                    else
                    {
                        //backward compatible with old indexes
                        int filteredItems = 0;
                        for (int i = startHit; i < itemsToAdd; i++)
                        {
                            bool needToDecrementTotalHits = false;
                            if (
                                (isAdmin) ||
                                (WebUser.IsContentAdmin) ||
                                (WebUser.IsInRoles(hits.Doc(i).Get("ViewRoles")))
                                )
                            {
                                IndexItem indexItem = new IndexItem(hits.Doc(i), hits.Score(i));

                                if (
                                    (DateTime.UtcNow > indexItem.PublishBeginDate) &&
                                    (DateTime.UtcNow < indexItem.PublishEndDate)
                                    )
                                {
                                    results.Add(indexItem);
                                }
                                else
                                {
                                    needToDecrementTotalHits = true;
                                }
                            }
                            else
                            {
                                needToDecrementTotalHits = true;
                            }

                            //filtered out a result so need to decrement
                            if (needToDecrementTotalHits)
                            {
                                filteredItems += 1;
                                totalHits     -= 1;

                                //we also are not getting as many results as the page size so if there are more items
                                //we should increment itemsToAdd
                                if ((itemsAdded + filteredItems) < actualHits)
                                {
                                    itemsToAdd += 1;
                                }
                            }
                        }
                    }



                    searcher.Close();

                    results.ItemCount = itemsAdded;
                    results.PageIndex = pageNumber;

                    results.ExecutionTime = DateTime.Now.Ticks - startTicks;
                }
                catch (ParseException ex)
                {
                    invalidQuery = true;
                    log.Error("handled error for search terms " + queryText, ex);
                    // these parser exceptions are generally caused by
                    // spambots posting too much junk into the search form
                    // heres an option to automatically ban the ip address
                    HandleSpam(queryText, ex);


                    return(results);
                }
                catch (BooleanQuery.TooManyClauses ex)
                {
                    invalidQuery = true;
                    log.Error("handled error for search terms " + queryText, ex);
                    return(results);
                }
            }

            return(results);
        }
Example #23
0
        static void Main(string[] args)
        {
            String indexPath = @"C:\Users\Brandon\Desktop\Multimedia Retrieval\W3 Files\Index";
            //Analyzers build token streams which analyze text
            Analyzer    analyzer = new StandardAnalyzer();
            IndexWriter writer   = new IndexWriter(indexPath, analyzer, true);

            //Set the seedUrl and initialize the crawler
            String         seedUrl   = "http://sydney.edu.au/engineering/it/";
            WebCrawler     crawler   = new WebCrawler();
            Queue <String> linkQueue = new Queue <String>();

            linkQueue.Enqueue(seedUrl);
            HashSet <String> linkSet = new HashSet <String>();

            Console.Write("Sites Explored: 0");

            //Iteratively extract links from the first URL in the frontier
            //and adds its content to index
            while (linkQueue.Count != 0 && linkSet.Count < 50)
            {
                String currentLink = linkQueue.Dequeue();
                try
                {
                    if (linkSet.Contains(currentLink))
                    {
                        continue;
                    }
                    String content = crawler.getUrlContent(currentLink);
                    crawler.getLinks(linkQueue, content, currentLink);
                    linkSet.Add(currentLink);
                    Document doc = new Document();
                    doc.Add(new Field("link", currentLink, Field.Store.YES, Field.Index.NOT_ANALYZED));
                    doc.Add(new Field("content", content, Field.Store.YES, Field.Index.ANALYZED));
                    writer.AddDocument(doc);

                    Console.Write("\rSites Explored: {0}", linkSet.Count);
                }
                catch (Exception) { continue; }
            }
            writer.Optimize();
            writer.Close();
            Console.WriteLine();

            //Execute the search
            String      search   = "suits";
            QueryParser parser   = new QueryParser("content", analyzer);
            Query       query    = parser.Parse(search);
            var         searcher = new IndexSearcher(indexPath);
            Hits        hits     = searcher.Search(query);
            int         results  = hits.Length();

            Console.WriteLine("Found {0} results for \"{1}\"", results, search);
            for (int i = 0; i < results; i++)
            {
                Document doc   = hits.Doc(i);
                float    score = hits.Score(i);
                Console.WriteLine("Result num {0}, score {1}", i + 1, score);
                Console.WriteLine("URL: {0}", doc.Get("link"));
            }
        }
Example #24
0
        public override void Search(string SearchString)
        {
            base.Search(SearchString);
            SearchString = SearchString.ToLower();
            var dir      = FSDirectory.GetDirectory(IndexPath, false);
            var searcher = new IndexSearcher(dir);
            var parser   = new QueryParser(ContentField, new StandardAnalyzer());

            foreach (var s in SearchString.Split(new [] { ' ' }))
            {
                var query = parser.GetFuzzyQuery(ContentField, s, MinSimilarity);

                Hits hits = searcher.Search(query);

                for (int i = 0; i < hits.Length(); i++)
                {
                    Document doc    = hits.Doc(i);
                    var      result = new SearchResult {
                        Score = hits.Score(i), Plugin = doc.Get(PluginField)
                    };

                    //Text des aktuellen Dokuments auslesen
                    string text = doc.Get(ContentField);
                    //Alle indizierten Wörter dieses Dokumentes auslesen
                    var      tpv      = (TermPositionVector)IndexReader.Open(dir).GetTermFreqVector(hits.Id(i), ContentField);
                    String[] DocTerms = tpv.GetTerms();
                    //Die Anzahl der Erscheinungen aller Wörter auslesen
                    int[] freq  = tpv.GetTermFrequencies();
                    var   words = new List <string>(DocTerms);
                    //Hier wollen wir nun die Positionen der Erscheinungen des Suchwortes herausfinden
                    for (int t = 0; t < freq.Length; t++)
                    {
                        //Falls das Suchwort mit dem aktuellen Wort übereinstimmt...
                        if (ContainsSearchString(SearchString, DocTerms[t], words))
                        {
                            //...können wir die Positionen auslesen
                            TermVectorOffsetInfo[] offsets = tpv.GetOffsets(t);
                            //Das Array beinhaltet nun für das Suchwort alle Auftreten mit jeweils Anfang und Ende
                            for (int j = 0; j < offsets.Length; j++)
                            {
                                //Jetz muss nur noch ein kleiner Kontextausschnitt ausgelesen werden, damit der User etwas damit anfangen kann
                                int start        = offsets[j].GetStartOffset();
                                int end          = offsets[j].GetEndOffset();
                                int contextStart = start - ContextLeftOffset;
                                contextStart = contextStart < 0 ? 0 : contextStart;
                                int contextEnd = end + ContextRightOffset;
                                contextEnd = contextEnd > text.Length ? text.Length : contextEnd;
                                //Nun wollen wir noch bis zum Ende des nächsten Wortes lesen, um das Ergebnis besser lesbar zu machen
                                int nextEndSpace = text.IndexOf(" ", contextEnd);
                                contextEnd = nextEndSpace > 0 ? nextEndSpace : contextEnd;
                                //Maximal so viele Zeichen darf der Text nach einem Leerzeichen links von dem Suchergebnis durchsucht werden
                                int leftSpaceOffset = contextStart;
                                //Finden des nächstenLeerzeichens links vom Suchergebnis
                                int nextStartSpace = text.LastIndexOf(" ", contextStart, leftSpaceOffset);
                                //Falls es kein Space in der Nöhe gibt brauchen wir natürlich auch nichts verändern
                                contextStart = nextStartSpace > 0 ? nextStartSpace : contextStart;
                                int contextLength = contextEnd - contextStart;
                                contextLength = contextLength > text.Length ? text.Length : contextLength;
                                //Kontext auslesen
                                string context = text.Substring(contextStart, contextLength);
                                //und den Searchresults zusammen mit dem zugehörigen PlugInNamen und dem HitScore hinzufügen
                                result.Contexts.Add(context);
                            }
                        }
                    }
                    SearchResults.Add(result);
                }
            }
        }
Example #25
0
        /// <summary>
        /// 查询索引
        /// </summary>
        /// <param name="fieldName">FieldName</param>
        /// <param name="keywords">关键字</param>
        /// <param name="pageIndex">当前页</param>
        /// <param name="pageSize">分页大小</param>
        /// <param name="totalRecord">总的记录</param>
        /// <returns>索引列表</returns>
        /// <remarks>2013-08-15 朱成果 创建</remarks>
        public List <CBPdProductIndex> QueryDoc(string fieldName, string keywords, int pageIndex, int pageSize, out int totalRecord)
        {
            var   search = new IndexSearcher(IndexStorePath);
            Query searchQuery;

            if (!string.IsNullOrEmpty(fieldName) && !string.IsNullOrEmpty(keywords))
            {
                #region [关键字查询]
                var          query = new BooleanQuery();
                BooleanQuery childQuery;
                BooleanQuery esenQuery;
                if (fieldName == "ProductName")
                {
                    #region 2016-4-6 杨浩 新增模糊搜索
                    childQuery = new BooleanQuery();
                    esenQuery  = new BooleanQuery();
                    //模糊搜索
                    //esenQuery.Add(new FuzzyQuery(new Term("ProductName", Regex.Replace(keywords, @"\s", ""))), BooleanClause.Occur.SHOULD);

                    //esenQuery.SetBoost(4.0F);


                    //分词 盘古分词
                    var keyWordsSplitBySpace = GetKeyWordsSplitBySpace(keywords);


                    //string keyWordsSplitBySpace = string.Format("{0}^{1}.0", keywords, (int)Math.Pow(3, 5));
                    //不启用分词,直接用模糊搜索
                    QueryParser productNameQueryParser = new QueryParser(global::Lucene.Net.Util.Version.LUCENE_29, "ProductName", new PanGuAnalyzer(true));
                    Query       productNameQuery       = productNameQueryParser.Parse(keyWordsSplitBySpace);
                    childQuery.Add(productNameQuery, BooleanClause.Occur.SHOULD);

                    //以什么开头,输入“ja”就可以搜到包含java和javascript两项结果了
                    Query prefixQuery_productName = new PrefixQuery(new Term("ProductName", keywords.Trim()));

                    //直接模糊匹配,假设你想搜索跟‘wuzza’相似的词语,你可能得到‘fuzzy’和‘wuzzy’。
                    Query fuzzyQuery_productName = new FuzzyQuery(new Term("ProductName", keywords.Trim()));
                    //通配符搜索
                    Query wildcardQuery_productName = new WildcardQuery(new Term("ProductName", string.Format("{0}", keywords.Trim())));

                    childQuery.Add(prefixQuery_productName, BooleanClause.Occur.SHOULD);
                    childQuery.Add(fuzzyQuery_productName, BooleanClause.Occur.SHOULD);
                    childQuery.Add(wildcardQuery_productName, BooleanClause.Occur.SHOULD);
                    childQuery.SetBoost(4.0F);


                    //childQuery.Add(esenQuery, BooleanClause.Occur.SHOULD);
                    query.Add(childQuery, BooleanClause.Occur.MUST);
                    #endregion

                    //childQuery = new BooleanQuery();
                    //esenQuery = new BooleanQuery();
                    ////全词去空格
                    //esenQuery.Add(new TermQuery(new Term("ProductName", Regex.Replace(keywords, @"\s", ""))),
                    //        BooleanClause.Occur.SHOULD);
                    //esenQuery.SetBoost(3.0F);
                    //childQuery.Add(esenQuery, BooleanClause.Occur.SHOULD);

                    //esenQuery = new BooleanQuery();
                    ////分词 盘古分词
                    //esenQuery.Add(new QueryParser("ProductName", new PanGuAnalyzer(true)).Parse(keywords),
                    //    BooleanClause.Occur.SHOULD);

                    ////分词  按空格
                    //var keyColl = keywords.Split(new char[] { ' ' }, StringSplitOptions.RemoveEmptyEntries);
                    //foreach (var item in keyColl)
                    //{
                    //    esenQuery.Add(new TermQuery(new Term("ProductName", item)),
                    //        BooleanClause.Occur.SHOULD);
                    //}
                    //esenQuery.SetBoost(2.9F);
                    //childQuery.Add(esenQuery, BooleanClause.Occur.SHOULD);
                    //query.Add(childQuery, BooleanClause.Occur.MUST);
                }

                else if (fieldName == "Category")
                {
                    childQuery = new BooleanQuery();
                    esenQuery  = new BooleanQuery();
                    esenQuery.Add(new TermQuery(new Term("Category", keywords)),
                                  BooleanClause.Occur.SHOULD);
                    esenQuery.SetBoost(3.0F);
                    childQuery.Add(esenQuery, BooleanClause.Occur.SHOULD);

                    esenQuery = new BooleanQuery();
                    esenQuery.Add(new WildcardQuery(new Term("AssociationCategory", string.Format("*,{0},*", keywords))),
                                  BooleanClause.Occur.SHOULD);
                    esenQuery.SetBoost(2.8F);
                    childQuery.Add(esenQuery, BooleanClause.Occur.SHOULD);

                    query.Add(childQuery, BooleanClause.Occur.MUST);
                }

                else if (fieldName == "BrandSysNo")
                {
                    childQuery = new BooleanQuery();
                    childQuery.Add(new TermQuery(new Term("BrandSysNo", keywords)),
                                   BooleanClause.Occur.SHOULD);
                    childQuery.SetBoost(3.0F);
                    query.Add(childQuery, BooleanClause.Occur.MUST);
                }
                else if (fieldName == "DealerSysNos")
                {
                    childQuery = new BooleanQuery();
                    childQuery.Add(new WildcardQuery(new Term("DealerSysNos", string.Format("*,{0},*", keywords))),
                                   BooleanClause.Occur.SHOULD);
                    childQuery.SetBoost(2.8F);
                    query.Add(childQuery, BooleanClause.Occur.MUST);
                }
                else if (fieldName == "ProductGroupCode")
                {
                    childQuery = new BooleanQuery();
                    childQuery.Add(new WildcardQuery(new Term("ProductGroupCode", string.Format("*,{0},*", keywords))),
                                   BooleanClause.Occur.SHOULD);
                    childQuery.SetBoost(2.8F);
                    query.Add(childQuery, BooleanClause.Occur.MUST);
                }
                else
                {
                    query.Add(new TermQuery(new Term(fieldName, keywords)),
                              BooleanClause.Occur.SHOULD);
                }
                #endregion
                searchQuery = query;
            }
            else
            {
                searchQuery = new WildcardQuery(new Term("ProductName", "*雪花秀*"));
            }
            //排序方式
            var sort = new Sort();
            //搜索
            Hits hits = search.Search(searchQuery, sort);

            totalRecord = hits.Length();//总的记录
            int startIndex = (pageIndex - 1) * pageSize;
            if (startIndex < 0)
            {
                startIndex = 0;
            }
            int endIndex = startIndex + pageSize;
            if (endIndex > totalRecord - 1)
            {
                endIndex = totalRecord - 1;
            }
            List <CBPdProductIndex> lst = new List <CBPdProductIndex>();
            for (int i = startIndex; i <= endIndex; i++)
            {
                var doc = hits.Doc(i);
                lst.Add(
                    new CBPdProductIndex
                {
                    DocID = hits.Id(i),
                    Score = hits.Score(i),
                    AssociationCategory = doc.Get("AssociationCategory"),
                    Attributes          = doc.Get("Attributes"),
                    Barcode             = doc.Get("Barcode"),
                    BrandSysNo          = Convert.ToInt32(doc.Get("BrandSysNo")),
                    Category            = Convert.ToInt32(doc.Get("Category")),
                    DisplayOrder        = Convert.ToInt32(doc.Get("DisplayOrder")),
                    NameAcronymy        = doc.Get("NameAcronymy"),
                    Prices           = doc.Get("Prices"),
                    ProductImage     = doc.Get("ProductImage"),
                    ProductName      = doc.Get("ProductName"),
                    QRCode           = doc.Get("QRCode"),
                    Status           = Convert.ToInt32(doc.Get("Status")),
                    SysNo            = Convert.ToInt32(doc.Get("SysNo")),
                    BasicPrice       = Convert.ToDecimal(doc.Get("BasicPrice")),
                    Price            = Convert.ToDecimal(doc.Get("Price")),
                    DispalySymbol    = 0,
                    RankPrice        = 0.00M,
                    ProductGroupCode = Convert.ToString(doc.Get("ProductGroupCode")),
                    DealerSysNos     = doc.Get("DealerSysNos"),
                    WarehouseSysNos  = doc.Get("WarehouseSysNos")
                });
            }
            search.Close();
            return(lst);
        }
Example #26
0
 public float Score(int i)
 {
     return(hits.Score(i));
 }
        public static Hashtable Search(bool api)
        {
            BillType type;
            int      number;
            int      session = -1;

            if (HttpContext.Current.Request["session"] != null && HttpContext.Current.Request["session"] != "")
            {
                session = int.Parse(HttpContext.Current.Request["session"]);
            }

            string q = HttpContext.Current.Request["q"];

            int start = 0, count = (!api ? 30 : 1000);

            if (HttpContext.Current.Request["start"] != null)
            {
                start = int.Parse(HttpContext.Current.Request["start"]);
            }
            if (HttpContext.Current.Request["count"] != null)
            {
                count = int.Parse(HttpContext.Current.Request["count"]);
            }

            BooleanQuery query = new BooleanQuery();

            Hashtable no_results = new Hashtable();

            no_results["count"] = 0;

            if (q != null && q.IndexOf("*") > -1)
            {
                return(no_results);
            }

            if (!api && session == -1 && q != null)
            {
                int slash = q.IndexOf('/');
                if (slash >= q.Length - 4 && slash > 2)
                {
                    try {
                        session = int.Parse(q.Substring(slash + 1));                       // and if that worked...
                        q       = q.Substring(0, slash);
                        HttpContext.Current.Response.Redirect("billsearch.xpd?session=" + session + "&q=" + HttpUtility.UrlEncode(q));
                        return(null);
                    } catch { }
                }
            }

            if (session == -1)
            {
                session = Util.CurrentSession;
            }

            //Console.Error.WriteLine("Find: " + q);

            string    search_method = "search";
            ArrayList specs         = new ArrayList();
            Hashtable scores        = new Hashtable();

            // Match a bill number exactly
            if (q != null && Bills.ParseID(q, out type, out number))
            {
                if (!api)
                {
                    // Redirect the user right to the bill page.
                    // Don't even check if bill exists.
                    HttpContext.Current.Response.Redirect(
                        Bills.BillLink2(session, type, number));
                    return(null);
                }
                else
                {
                    search_method = "search by bill number";
                    scores[session + EnumsConv.BillTypeToString(type) + number] = 1.0F;
                    specs.Add(new Database.AndSpec(
                                  new Database.SpecEQ("session", session),
                                  new Database.SpecEQ("type", EnumsConv.BillTypeToString(type)),
                                  new Database.SpecEQ("number", number)));
                }
            }

            // Match public law number exactly
            if (!api && q != null && (q.StartsWith("P.L.") || q.StartsWith("PL")))
            {
                try {
                    string num = null;
                    if (q.StartsWith("P.L."))
                    {
                        num = q.Substring(4);
                    }
                    if (q.StartsWith("PL"))
                    {
                        num = q.Substring(2);
                    }
                    num = num.Replace(" ", "");

                    int dash = num.IndexOf('-');
                    int s    = int.Parse(num.Substring(0, dash));

                    TableRow bill = Util.Database.DBSelectFirst("billindex", "session, type, number",
                                                                new Database.SpecEQ("idx", "publiclawnumber"),
                                                                new Database.SpecEQ("session", s),
                                                                new Database.SpecEQ("value", num));

                    if (bill != null)
                    {
                        if (!api)
                        {
                            HttpContext.Current.Response.Redirect(Bills.BillLink3((int)bill["session"], (string)bill["type"], (int)bill["number"]));
                            return(null);
                        }
                        else
                        {
                            search_method = "search by public law number";
                            scores[(int)bill["session"] + (string)bill["type"] + (int)bill["number"]] = 1.0F;
                            specs.Add(new Database.AndSpec(
                                          new Database.SpecEQ("session", (int)bill["session"]),
                                          new Database.SpecEQ("type", (string)bill["type"]),
                                          new Database.SpecEQ("number", (int)bill["number"])));
                        }
                    }
                } catch {
                }
            }

            if (session == -1)
            {
                session = Util.CurrentSession;
            }

            // Match USC reference
            Regex uscexp = new Regex(@"(\d[0-9A-Za-z\-]*)\s+U\.?S\.?C\.?\s+(\d[0-9A-Za-z\-]*)((\s*\([^\) ]+\))*)",
                                     RegexOptions.IgnoreCase);
            Match uscmc = (q == null ? null : uscexp.Match(q));

            if (uscmc != null && uscmc.Success)
            {
                string title     = uscmc.Groups[1].Value;
                string section   = uscmc.Groups[2].Value;
                string paragraph = uscmc.Groups[3].Value;

                string[] ps  = paragraph.Split('[', '(', ')', ' ');
                int      psi = 0; while (psi < ps.Length - 1 && ps[psi] == "")
                {
                    psi++;
                }
                int pse = ps.Length - 1; while (pse > 0 && ps[pse] == "")
                {
                    pse--;
                }
                if (ps.Length != 0)
                {
                    paragraph = "_" + String.Join("_", ps, psi, pse - psi + 1);
                }

                Table table = Util.Database.DBSelect("billusc", "session, type, number",
                                                     new Database.SpecEQ("session", session),
                                                     new Database.OrSpec(
                                                         new Database.SpecEQ("ref", title + "_" + section + paragraph),
                                                         new Database.SpecStartsWith("ref", title + "_" + section + paragraph + "_")));
                foreach (TableRow bill in table)
                {
                    search_method = "search by U.S.C. section";
                    scores[(int)bill["session"] + (string)bill["type"] + (int)bill["number"]] = 1.0F;
                    specs.Add(new Database.AndSpec(
                                  new Database.SpecEQ("session", (int)bill["session"]),
                                  new Database.SpecEQ("type", (string)bill["type"]),
                                  new Database.SpecEQ("number", (int)bill["number"])));
                }
            }

            int total_count = -1;

            if (specs.Count == 0)
            {
                if (q != null && q.Trim() != "")
                {
                    BooleanQuery query1 = new BooleanQuery();
                    query.Add(query1, BooleanClause.Occur.MUST);
                    try {
                        /*if (!q.StartsWith("-")) {
                         *      PhraseQuery pq = new PhraseQuery();
                         *      pq.Add( new Term("shorttitles", q) );
                         *      pq.SetBoost((float)4);
                         *      query1.Add(pq, false, false);
                         * }*/

                        Query query_titles2 = new QueryParser("shorttitles", new StandardAnalyzer()).Parse(q);
                        query_titles2.SetBoost((float)3);
                        query1.Add(query_titles2, BooleanClause.Occur.SHOULD);

                        Query query_titles1 = new QueryParser("officialtitles", new StandardAnalyzer()).Parse(q);
                        query_titles1.SetBoost((float)2);
                        query1.Add(query_titles1, BooleanClause.Occur.SHOULD);

                        Query query_summary = new QueryParser("summary", new StandardAnalyzer()).Parse(q);
                        query1.Add(query_summary, BooleanClause.Occur.SHOULD);

                        Query query_text = new QueryParser("fulltext", new StandardAnalyzer()).Parse(q);
                        query1.Add(query_text, BooleanClause.Occur.SHOULD);
                    } catch (Exception e) {
                        return(no_results);
                    }
                }

                string   chamber   = HttpContext.Current.Request["chamber"];
                string[] status    = HttpContext.Current.Request["status"] == null ? null : HttpContext.Current.Request["status"].Split(',');
                string   sponsor   = HttpContext.Current.Request["sponsor"];
                string   cosponsor = HttpContext.Current.Request["cosponsor"];

                if (chamber != null && (chamber == "s" || chamber == "h"))
                {
                    query.Add(new WildcardQuery(new Term("type", chamber + "*")), BooleanClause.Occur.MUST);
                }
                if (status != null && status[0] != "")
                {
                    List <Term> terms = new List <Term>();
                    foreach (string s in status)
                    {
                        terms.Add(new Term("state", s));
                    }
                    MultiPhraseQuery mpq = new MultiPhraseQuery();
                    mpq.Add(terms.ToArray());
                    query.Add(mpq, BooleanClause.Occur.MUST);
                }
                if (sponsor != null && sponsor != "")
                {
                    query.Add(new TermQuery(new Term("sponsor", sponsor)), BooleanClause.Occur.MUST);
                }
                if (cosponsor != null && cosponsor != "")
                {
                    query.Add(new TermQuery(new Term("cosponsor", cosponsor)), BooleanClause.Occur.MUST);
                }

                IndexSearcher searcher = new IndexSearcher(Util.DataPath + Path.DirectorySeparatorChar + session + Path.DirectorySeparatorChar + "index.bills.lucene");

                Sort sort = null;
                if (HttpContext.Current.Request["sort"] != null && HttpContext.Current.Request["sort"] == "introduced")
                {
                    sort = new Sort(new SortField("introduced", SortField.STRING, true));
                }
                if (HttpContext.Current.Request["sort"] != null && HttpContext.Current.Request["sort"] == "lastaction")
                {
                    sort = new Sort(new SortField("lastaction", SortField.STRING, true));
                }

                Hits hits = searcher.Search(query, sort == null ? new Sort() : sort);

                int end = hits.Length();
                if (start + count < end)
                {
                    end = start + count;
                }
                total_count = hits.Length();

                for (int i = start; i < end; i++)
                {
                    Document doc         = hits.Doc(i);
                    string   billsession = doc.Get("session");
                    string   billtype    = doc.Get("type");
                    string   billnumber  = doc.Get("number");

                    int istatus = (int)EnumsConv.BillStatusFromString(doc.Get("status"));

                    float score;
                    if (sort == null)             // readjust the score based on status
                    {
                        score = hits.Score(i) + istatus / (float)8 * (float).2;
                    }
                    else             // keep order from Lucene
                    {
                        score = -i;
                    }

                    scores[billsession + billtype + billnumber] = score;
                    specs.Add(new Database.AndSpec(
                                  new Database.SpecEQ("session", billsession),
                                  new Database.SpecEQ("type", billtype),
                                  new Database.SpecEQ("number", billnumber)));
                }

                if (HttpContext.Current.Request["sort"] != null && HttpContext.Current.Request["sort"] == "hits" && specs.Count > 0)
                {
                    Table hitsinfo = Util.Database.DBSelect("billhits", "*",
                                                            Database.OrSpec.New((Database.Spec[])specs.ToArray(typeof(Database.Spec))));
                    foreach (TableRow billhits in hitsinfo)
                    {
                        scores["" + billhits["session"] + billhits["type"] + billhits["number"]] = (float)(int)billhits["hits1"];
                    }
                }
            }

            if (specs.Count == 0)
            {
                return(no_results);
            }

            Table billinfo = Util.Database.DBSelect("billstatus", "*",
                                                    Database.OrSpec.New((Database.Spec[])specs.ToArray(typeof(Database.Spec))));

            if (total_count == -1)
            {
                total_count = billinfo.Rows;
            }

            ArrayList ret = new ArrayList();

            foreach (TableRow r in billinfo)
            {
                ret.Add(r);
            }

            BillHitComparer bhc = new BillHitComparer();

            bhc.scores = scores;
            ret.Sort(bhc);

            Hashtable ret2 = new Hashtable();

            ret2["count"]   = total_count;
            ret2["method"]  = search_method;
            ret2["results"] = ret;

            return(ret2);
        }
        //TODO: The last param, Cache isn't used.  Remove it and update dependant projects.  (Version 1.3)
        public static SearchResults <Document> GetDocuments(QueryParser defaultQueryParser, QueryParser customQueryParser, IndexSearcher indexSearcher, string query, int pageNumber, int pageSize, bool shouldDocumentsBeClustered, string sort, int maximumNumberOfDocumentsToScore, Cache cache)
        {
            Query query2 = customQueryParser.Parse(query);

            Hits hits = null;

            if (!string.IsNullOrEmpty(sort))
            {
                string[] sorts = sort.ToLower().Split(",".ToCharArray(), StringSplitOptions.RemoveEmptyEntries);

                List <SortField> sortFields = new List <SortField>(sorts.Length / 2);

                for (int i = 0; i < sorts.Length; i++)
                {
                    if (sorts[i].Split(' ')[1] == "asc")
                    {
                        sortFields.Add(new SortField(sorts[i].Split(' ')[0], false));
                    }
                    else
                    {
                        sortFields.Add(new SortField(sorts[i].Split(' ')[0], true));
                    }
                }

                hits = indexSearcher.Search(query2, new Sort(sortFields.ToArray()));
            }
            else
            {
                hits = indexSearcher.Search(query2);
            }

            SearchResults <Document> searchResults = new SearchResults <Document>();

            searchResults.Documents = new List <Document>();
            searchResults.Query     = query2;

            if (hits.Length() != 0)
            {
                Dictionary <string, string> domains = new Dictionary <string, string>();

                PriorityQueue <Document> priorityQueue = new PriorityQueue <Document>();

                //Get the Hits!!!
                //TODO: Optimize this!!! (Version 1.3)
                for (int j = 0; j < hits.Length() && searchResults.Documents.Count < maximumNumberOfDocumentsToScore && priorityQueue.Count < maximumNumberOfDocumentsToScore; j++)
                {
                    Document document = hits.Doc(j);

                    float score = hits.Score(j);

                    document.Add(new Field("documentid", j.ToString(), Field.Store.YES, Field.Index.NO));
                    document.Add(new Field("relevancyscore", score.ToString(), Field.Store.YES, Field.Index.NO));

                    if (!string.IsNullOrEmpty(sort))
                    {
                        if (shouldDocumentsBeClustered)
                        {
                            if (document.GetField("domain") != null)
                            {
                                string domain = document.GetField("domain").StringValue();

                                if (!domains.ContainsKey(domain))
                                {
                                    domains.Add(domain, null);

                                    if (searchResults.Documents.Count < pageSize && j >= (pageNumber * pageSize) - pageSize)
                                    {
                                        searchResults.Documents.Add(document);
                                    }
                                }
                            }
                        }
                        else
                        {
                            if (searchResults.Documents.Count < pageSize && j >= (pageNumber * pageSize) - pageSize)
                            {
                                searchResults.Documents.Add(document);
                            }
                        }
                    }
                    else
                    {
                        priorityQueue.Enqueue(document, score * double.Parse(document.GetField("strength").StringValue()));
                    }
                }

                if (string.IsNullOrEmpty(sort))
                {
                    for (int i = 0; i < hits.Length() && priorityQueue.Count != 0; i++)
                    {
                        Document document = priorityQueue.Dequeue();

                        if (shouldDocumentsBeClustered)
                        {
                            if (document.GetField("domain") != null)
                            {
                                string domain = document.GetField("domain").StringValue();

                                if (!domains.ContainsKey(domain))
                                {
                                    domains.Add(domain, null);

                                    if (searchResults.Documents.Count < pageSize && i >= (pageNumber * pageSize) - pageSize)
                                    {
                                        searchResults.Documents.Add(document);
                                    }
                                }
                                else
                                {
                                    i--;
                                }
                            }
                        }
                        else
                        {
                            if (searchResults.Documents.Count < pageSize && i >= (pageNumber * pageSize) - pageSize)
                            {
                                searchResults.Documents.Add(document);
                            }
                        }
                    }
                }

                if (shouldDocumentsBeClustered)
                {
                    searchResults.TotalNumberOfHits = domains.Count;
                }
                else
                {
                    searchResults.TotalNumberOfHits = hits.Length();
                }
            }

            return(searchResults);
        }
Example #29
0
        private void DoNormalSearch(int offset, int limit, NamespaceFilter namespaces)
        {
            string encsearchterm = String.Format("title:({0})^4 OR ({1})", searchterm, searchterm);

            DateTime now = DateTime.UtcNow;
            Query    query;

            /* If we fail to parse the query, it's probably due to illegal
             * use of metacharacters, so we escape them all and try again.
             */
            try {
                query = state.Parse(encsearchterm);
            } catch (Exception e) {
                string escaped = "";
                for (int i = 0; i < searchterm.Length; ++i)
                {
                    escaped += "\\" + searchterm[i];
                }
                encsearchterm = "title:(" + escaped + ")^4 OR (" + escaped + ")";
                try {
                    query = state.Parse(encsearchterm);
                } catch (Exception e2) {
                    log.Error("Problem parsing search term: " + e2.Message + "\n" + e2.StackTrace);
                    return;
                }
            }
            Hits hits = null;

            try {
                hits = state.Searcher.Search(query);
            } catch (Exception e) {
                log.Error("Error searching: " + e.Message + "\n" + e.StackTrace);
                return;
            }

            SendHeaders(200, "OK");

            int numhits = hits.Length();

            LogRequest(searchterm, query, numhits, now);

            SendOutputLine(numhits.ToString());

            if (numhits == 0)
            {
                string spelfix = MakeSpelFix(searchterm);
                SendOutputLine(HttpUtility.UrlEncode(spelfix, Encoding.UTF8));
            }
            else
            {
                // Lucene's filters seem to want to run over the entire
                // document set, which is really slow. We'll do namespace
                // checks as we go along, and stop once we've seen enough.
                //
                // The good side is that we can return the first N documents
                // pretty quickly. The bad side is that the total hits
                // number we return is bogus: it's for all namespaces combined.
                int matches = 0;
                //string lastMatch = "";
                for (int i = 0; i < numhits && i < maxoffset; i++)
                {
                    Document doc           = hits.Doc(i);
                    string   pageNamespace = doc.Get("namespace");
                    if (namespaces.filter(pageNamespace))
                    {
                        if (matches++ < offset)
                        {
                            continue;
                        }
                        string title = doc.Get("title");

                        /*
                         * string squish = pageNamespace+":"+title;
                         * if (lastMatch.Equals(squish)) {
                         *      // skip duplicate results due to indexing bugs
                         *      maxoffset++;
                         *      matches--;
                         *      continue;
                         * }
                         * lastMatch = squish;
                         */
                        float score = hits.Score(i);
                        SendResultLine(score, pageNamespace, title);
                        if (matches >= (limit + offset))
                        {
                            break;
                        }
                    }
                }
            }
        }
        public static void  Main(String[] a)
        {
            String indexName = "localhost_index";
            String fn        = "c:/Program Files/Apache Group/Apache/htdocs/manual/vhosts/index.html.en";
            Uri    url       = null;

            for (int i = 0; i < a.Length; i++)
            {
                if (a[i].Equals("-i"))
                {
                    indexName = a[++i];
                }
                else if (a[i].Equals("-f"))
                {
                    fn = a[++i];
                }
                else if (a[i].Equals("-url"))
                {
                    url = new Uri(a[++i]);
                }
            }

            StreamWriter temp_writer;

            temp_writer           = new StreamWriter(Console.OpenStandardOutput(), Console.Out.Encoding);
            temp_writer.AutoFlush = true;
            StreamWriter o = temp_writer;
            IndexReader  r = IndexReader.Open(indexName);

            o.WriteLine("Open index " + indexName + " which has " + r.NumDocs() + " docs");

            LuceneMoreLikeThis mlt = new LuceneMoreLikeThis(r);

            o.WriteLine("Query generation parameters:");
            o.WriteLine(mlt.DescribeParams());
            o.WriteLine();

            Query query = null;

            if (url != null)
            {
                o.WriteLine("Parsing URL: " + url);
                query = mlt.Like(url);
            }
            else if (fn != null)
            {
                o.WriteLine("Parsing file: " + fn);
                query = mlt.Like(new FileInfo(fn));
            }

            o.WriteLine("q: " + query);
            o.WriteLine();
            IndexSearcher searcher = new IndexSearcher(indexName);

            Hits hits = searcher.Search(query);
            int  len  = hits.Length();

            o.WriteLine("found: " + len + " documents matching");
            o.WriteLine();
            for (int i = 0; i < Math.Min(25, len); i++)
            {
                Document d       = hits.Doc(i);
                String   summary = d.Get("summary");
                o.WriteLine("score  : " + hits.Score(i));
                o.WriteLine("url    : " + d.Get("url"));
                o.WriteLine("\ttitle  : " + d.Get("title"));
                if (summary != null)
                {
                    o.WriteLine("\tsummary: " + d.Get("summary"));
                }
                o.WriteLine();
            }
        }