Beispiel #1
0
        /// <summary>
        /// 搜索LUCENE数据
        /// </summary>
        /// <param name="indexType"></param>
        /// <param name="query"></param>
        /// <param name="sort"></param>
        /// <param name="pagerInfo"></param>
        /// <param name="callback"></param>
        /// <returns></returns>
        public static List <Document> SearchLuceneData(string directoryPath, Query query, Sort sort, PagerInfo pagerInfo, Action <Document> callback)
        {
            List <Document> list = new List <Document>();

            FSDirectory   directory     = FSDirectory.Open(new System.IO.DirectoryInfo(directoryPath), new NoLockFactory());
            IndexReader   indexReader   = IndexReader.Open(directory, true);
            IndexSearcher indexSearcher = new IndexSearcher(indexReader);

            ScoreDoc[] docs;
            int        totalCount;
            int        startOffset;
            int        endOffset;

            if (sort != null)
            {
                TopFieldDocs resultFieldDocs = indexSearcher.Search(query, null, indexSearcher.MaxDoc(), sort);
                totalCount            = resultFieldDocs.totalHits;
                pagerInfo.RecordCount = totalCount;
                startOffset           = (pagerInfo.PageIndex - 1) * pagerInfo.PageSize;
                endOffset             = pagerInfo.PageIndex * pagerInfo.PageSize;
                if (endOffset >= totalCount)
                {
                    endOffset = totalCount;
                }
                docs = resultFieldDocs.scoreDocs;
            }
            else
            {
                TopDocs resultFieldDocs = indexSearcher.Search(query, null, indexSearcher.MaxDoc());
                totalCount            = resultFieldDocs.totalHits;
                pagerInfo.RecordCount = totalCount;
                startOffset           = (pagerInfo.PageIndex - 1) * pagerInfo.PageSize;
                endOffset             = pagerInfo.PageIndex * pagerInfo.PageSize;
                if (endOffset >= totalCount)
                {
                    endOffset = totalCount;
                }
                docs = resultFieldDocs.scoreDocs;
            }

            if (totalCount > 0)
            {
                for (int i = startOffset; i < endOffset; i++)
                {
                    ScoreDoc hit = docs[i];
                    Document doc = indexSearcher.Doc(hit.doc);

                    list.Add(doc);
                    if (callback != null)
                    {
                        callback(doc);
                    }
                }
            }

            indexSearcher.Close();
            directory.Close();

            return(list);
        }
Beispiel #2
0
    private static void SearchSomething(String searchText)
    {
        Directory        directory = FSDirectory.Open(new DirectoryInfo("LuceneIndex"));
        StandardAnalyzer analyzer  = new StandardAnalyzer(Version.LUCENE_29);
        IndexSearcher    searcher  = new IndexSearcher(directory, true);
        int results = 0;

        if (searcher.MaxDoc() > 0)
        {
            BooleanQuery            booleanQuery = new BooleanQuery();
            Lucene.Net.Search.Query query1       = new WildcardQuery(new Term("path", searchText));
            booleanQuery.Add(query1, BooleanClause.Occur.SHOULD);
            TopDocs topDocs = searcher.Search(booleanQuery, searcher.MaxDoc());
            results = topDocs.ScoreDocs.Length;
            Console.WriteLine("Found {0} results", results);
            for (int i = 0; i < results; i++)
            {
                ScoreDoc scoreDoc = topDocs.ScoreDocs[i];
                float    score    = scoreDoc.Score;
                int      docId    = scoreDoc.Doc;
                Document doc      = searcher.Doc(docId);
                Console.WriteLine("Result num {0}, score {1}", i + 1, score);
                Console.WriteLine("Text found: {0}\r\n", doc.Get("path"));
            }
        }
        searcher.Close();
        directory.Close();
    }
Beispiel #3
0
        public static List <T> RawQuery <T>(Query query, SortOptions sortOptions = null)
            where T : class
        {
            List <T> results = new List <T>();

            IndexSearcher searcher = GetIndexSearcher();

            TopDocsCollector collector;

            if (sortOptions == null)
            {
                collector = TopScoreDocCollector.create(searcher.MaxDoc(), true);
            }
            else
            {
                collector = TopFieldCollector.create(
                    new Sort(new SortField(sortOptions.FieldName, (int)sortOptions.FieldType, sortOptions.Ascending)),
                    searcher.MaxDoc(),
                    false,
                    false,
                    false,
                    true
                    );
            }

            searcher.Search(query, collector);
            var topDocs   = collector.TopDocs();
            var scoreDocs = topDocs.ScoreDocs;

            var maxRecord = scoreDocs.Length;

            for (int index = 0; index < maxRecord && index < scoreDocs.Length; index++)
            {
                ScoreDoc scoreDoc = scoreDocs[index];
                Document doc      = searcher.Doc(scoreDoc.doc);

                var data = doc.Get("Data");

                var result = JsonConvert.DeserializeObject(data, _jsonSerializerSettings) as T;

                if (result == null)
                {
                    continue;
                }

                results.Add(result);
            }

            return(results);
        }
Beispiel #4
0
        public static List <SearchItem> GetAllItems()
        {
            string path = Directory.GetCurrentDirectory() + @"\mtad\";
            //建立索引搜索,指定索引目录
            IndexSearcher searcher = new IndexSearcher(FSDirectory.Open(new System.IO.DirectoryInfo(path)), true);
            //获取最大文档数量
            var count       = searcher.MaxDoc();
            var searchItems = new List <SearchItem>();

            for (int i = 0; i < count; i++)
            {
                var document = searcher.Doc(i);
                //var fields = document.GetFields();
                var id        = document.GetValues("id")[0];
                var tablename = document.GetValues("tablename")[0];
                var acronym   = document.GetValues("acronym")[0];
                var english   = document.GetValues("english")[0];
                var chinese   = document.GetValues("chinese")[0];
                var explain   = document.GetValues("explain")[0];

                var searchItem = new SearchItem
                {
                    Id        = id,
                    TableName = tablename,
                    Acronym   = acronym,
                    English   = english,
                    Chinese   = chinese,
                    Explain   = explain
                };
                searchItems.Add(searchItem);
            }
            return(searchItems);
        }
        internal static ScoreDoc[] Search(IndexSearcher searcher, string searchfield, List <string> keywords, int max_doc_num)
        {
            QueryParser queryparser = new QueryParser(version, searchfield, standardAnalyzer);
            string      queryStr    = "";

            for (int i = 0; i < keywords.Count - 1; i++)
            {
                queryStr += keywords[i] + " OR ";
            }
            queryStr += keywords[keywords.Count - 1];

            Query   query = queryparser.Parse(queryStr);
            TopDocs hits  = searcher.Search(query, null, (int)Math.Min(searcher.MaxDoc(), max_doc_num));

            ScoreDoc[] scoredocs = hits.scoreDocs;

            return(scoredocs);

            //List<Document> docs = new List<Document>();
            //foreach (var scoredoc in scoredocs)
            //{
            //    docs.Add(searcher.XmlDoc(scoredoc.doc));
            //}
            //return docs;
        }
Beispiel #6
0
        public CardDescription[] Read(Lucene.Net.Store.Directory dir, ReadProcessChangedInvoker processchanged)
        {
            ArrayList cards    = new ArrayList(MinCapacity);
            Query     query    = new MatchAllDocsQuery();
            Searcher  searcher = new IndexSearcher(dir, true);
            TopDocs   td       = searcher.Search(query, null, searcher.MaxDoc());

            ScoreDoc[] docs = td.scoreDocs;

            int length = docs.Length;

            for (int i = 0; i < length; i++)
            {
                Document doc = searcher.Doc(docs[i].doc);
                cards.Add(ParseCard(doc));
                if (processchanged != null)
                {
                    processchanged.Invoke(length, i + 1);
                }
            }

            searcher.Close();

            return((CardDescription[])cards.ToArray(typeof(CardDescription)));
        }
Beispiel #7
0
        public string checkIndex()
        {
            try
            {
                searcher = new IndexSearcher(this.pathIndex);
                searcher.Close();
            }
            catch (IOException)
            {
                return("-");
            }

            return(searcher.MaxDoc().ToString());
        }
Beispiel #8
0
        public void checkIndex()
        {
            try
            {
                searcher = new IndexSearcher(this.pathIndex);
                searcher.Close();
            }
            catch (IOException)
            {
                FncRebuildIndex(this.FilePath);
                //status("The index doesn't exist or is damaged. Please rebuild it.", true);
                return;
            }

            string msg = String.Format("Index is ready. It contains {0} documents.", searcher.MaxDoc());

            status(msg);
        }
Beispiel #9
0
        public List <LuceneResult> Search(Query query, Sort sort)
        {
            var searcher  = new IndexSearcher(_rd);
            var collector = TopFieldCollector.create(sort ?? new Sort(), searcher.MaxDoc(), false, true, true, sort == null);

            searcher.Search(query, collector);
            var docs     = collector.TopDocs();
            var maxscore = docs.GetMaxScore();

            // Note: cheap way to avoid div/zero
            if (maxscore == 0)
            {
                maxscore = 1;
            }
            return((from hit in docs.scoreDocs
                    let score = hit.score / maxscore
                                where score >= 0.001f
                                select new LuceneResult(searcher.Doc(hit.doc), score)).ToList());
        }
Beispiel #10
0
        public override CardDescription[] Read(string dirname, ReadProcessChangedInvoker processchanged)
        {
            if (dirname == null || dirname.Length <= 0)
            {
                return(null);
            }

            if (!Directory.Exists(dirname))
            {
                return(null);
            }

            if (dirname[dirname.Length - 1] != '\\')
            {
                dirname += "\\";
            }

            ArrayList cards = new ArrayList(MinCapacity);
            Query     query = new MatchAllDocsQuery();

            Lucene.Net.Store.Directory dir = new Lucene.Net.Store.SimpleFSDirectory(new DirectoryInfo(dirname), new Lucene.Net.Store.SimpleFSLockFactory());
            Searcher searcher = new IndexSearcher(dir, true);

            TopDocs td = searcher.Search(query, null, searcher.MaxDoc());

            ScoreDoc[] docs = td.scoreDocs;

            int length = docs.Length;

            for (int i = 0; i < length; i++)
            {
                Document doc = searcher.Doc(docs[i].doc);
                cards.Add(ParseCard(doc));
                if (processchanged != null)
                {
                    processchanged.Invoke(length, i + 1);
                }
            }

            searcher.Close();

            return((CardDescription[])cards.ToArray(typeof(CardDescription)));
        }
        public static List <int> Search(IndexSearcher searcher, string queryStr, string queryField, int docCnt = -1)
        {
            if (docCnt == -1)
            {
                docCnt = searcher.MaxDoc();
            }
            QueryParser queryparser = new QueryParser(version, queryField, standardAnalyzer);

            queryStr = queryStr.Replace("-", "");
            if (String.IsNullOrEmpty(queryStr))
            {
                return(new List <int>());
            }
            Query query = queryparser.Parse(queryStr);
            var   docs  = searcher.Search(query, null, docCnt).scoreDocs;

            List <int> docIDs = new List <int>();

            foreach (var scoreDoc in docs)
            {
                docIDs.Add(scoreDoc.doc);
            }
            return(docIDs);
        }
Beispiel #12
0
        static TestIndex()
        {
            Directory   directory = new RAMDirectory();
            IndexWriter writer    = new IndexWriter(directory, null, true);

            writer.SetMaxFieldLength(MaxNumberOfTermsPerDocument);
            var      pathTokenStream      = new PathTokenStream("");
            var      contentTokenStream   = new SimpleTokenStream("");
            var      externalsTokenStream = new PathTokenStream("");
            Field    field_id             = new Field("id", "", Field.Store.YES, Field.Index.UN_TOKENIZED);
            Field    field_rev_first      = new Field(FieldName.RevisionFirst, "", Field.Store.NO, Field.Index.UN_TOKENIZED);
            Field    field_rev_last       = new Field(FieldName.RevisionLast, "", Field.Store.NO, Field.Index.UN_TOKENIZED);
            Document doc = new Document();

            doc.Add(field_id);
            doc.Add(new Field(FieldName.Path, pathTokenStream));
            doc.Add(new Field(FieldName.Content, contentTokenStream));
            doc.Add(new Field(FieldName.Externals, externalsTokenStream));
            doc.Add(field_rev_first);
            doc.Add(field_rev_last);
            for (int i = 0; i < Data.GetLength(0); ++i)
            {
                string id = Data[i, 1];
                field_id.SetValue(id);
                pathTokenStream.SetText(id);
                int rev_first = Revision.Head;
                if (id.StartsWith("/revisions"))
                {
                    contentTokenStream.SetText("");
                    externalsTokenStream.SetText("");
                    rev_first = int.Parse(Data[i, 2]);
                }
                else
                {
                    contentTokenStream.SetText(Data[i, 2]);
                    externalsTokenStream.SetText(Data[i, 3]);
                }
                field_rev_first.SetValue(RevisionFieldValue(rev_first));
                field_rev_last.SetValue(HeadRevisionFieldValue());
                writer.AddDocument(doc);

                if (id.StartsWith("/revisions") && Data[i, 3] != null) // update last revision
                {
                    // Change the last revision
                    // Warning: It is not possible to load a document from the index
                    // We have to rebuild/reparse it from the scratch
                    writer.DeleteDocuments(new Term("id", id));
                    pathTokenStream.SetText(id);
                    contentTokenStream.SetText("");
                    externalsTokenStream.SetText("");
                    int rev_last = int.Parse(Data[i, 3]);
                    field_rev_last.SetValue(RevisionFieldValue(rev_last));
                    id        += "@" + rev_first;
                    Data[i, 1] = id;
                    field_id.SetValue(id);
                    writer.AddDocument(doc);
                }
            }

            // delete non existent document test
            writer.DeleteDocuments(new Term("id", "bliflaiwj123dj33"));

            writer.Optimize();
            writer.Close();

            Searcher = new IndexSearcher(directory);
            Assert.AreEqual(Data.GetLength(0), Searcher.MaxDoc()); // smoke test for index creation
        }
Beispiel #13
0
        public virtual PagedList <Models.ResultObject> Search(string key, int pageIndex, int pageSize, params string[] folders)
        {
            var indexDirectory = FSDirectory.Open(new DirectoryInfo(indexDir));

            if (!IndexReader.IndexExists(indexDirectory) || string.IsNullOrEmpty(key) && (folders == null || folders.Length == 0))
            {
                return(new PagedList <ResultObject>(new ResultObject[0], pageIndex, pageSize, 0));
            }

            var query = new BooleanQuery();

            key = QueryParser.Escape(key.Trim().ToLower());

            if (string.IsNullOrEmpty(key))
            {
                key = "*:*";
            }

            QueryParser titleParser = new QueryParser(Lucene.Net.Util.Version.LUCENE_29, Converter.TitleFieldName, this.Analyzer);
            var         titleQuery  = titleParser.Parse(key);

            titleQuery.SetBoost(2);
            query.Add(titleQuery, BooleanClause.Occur.SHOULD);

            QueryParser bodyParser = new QueryParser(Lucene.Net.Util.Version.LUCENE_29, Converter.BodyFieldName, this.Analyzer);
            var         bodyQuery  = bodyParser.Parse(key);

            bodyQuery.SetBoost(1);
            query.Add(bodyQuery, BooleanClause.Occur.SHOULD);

            QueryWrapperFilter filter = null;

            if (folders != null && folders.Length > 0)
            {
                var folderQuery = new BooleanQuery();
                //QueryParser folderParser = new QueryParser(Lucene.Net.Util.Version.LUCENE_29, "FolderName", this.Analyzer);
                foreach (var folder in folders)
                {
                    var termQuery = new TermQuery(new Term("FolderName", folder));
                    termQuery.SetBoost(3);
                    folderQuery.Add(termQuery, BooleanClause.Occur.SHOULD);
                }

                filter = new QueryWrapperFilter(folderQuery);
            }

            var searcher = new IndexSearcher(indexDirectory, true);
            TopDocsCollector collecltor = TopScoreDocCollector.create(searcher.MaxDoc(), false);

            if (filter == null)
            {
                searcher.Search(query, collecltor);
            }
            else
            {
                searcher.Search(query, filter, collecltor);
            }


            Lucene.Net.Highlight.Highlighter lighter =
                new Highlighter(new SimpleHTMLFormatter("<strong class='highlight'>", "</strong>"), new Lucene.Net.Highlight.QueryScorer((Query)query));


            var startIndex = (pageIndex - 1) * pageSize;

            List <ResultObject> results = new List <ResultObject>();

            foreach (var doc in collecltor.TopDocs(startIndex, pageSize).ScoreDocs)
            {
                var          document = searcher.Doc(doc.doc);
                ResultObject result   = Converter.ToResultObject(lighter, document);
                if (result != null)
                {
                    results.Add(result);
                }
            }

            return(new PagedList <ResultObject>(results, pageIndex, pageSize, collecltor.GetTotalHits()));
        }
Beispiel #14
0
        public static void RawIndexToIndex(string inputFolder, string outputFolder)
        {
            IndexWriter tweetWriter = new IndexWriter(new SimpleFSDirectory(new DirectoryInfo(outputFolder)),
                                                      new StandardAnalyzer(Version.LUCENE_29), new IndexWriter.MaxFieldLength(int.MaxValue));
            HashSet <string> stopwords = new HashSet <string>(Stopwords);

            var           iodir     = new DirectoryInfo(inputFolder);
            var           directory = FSDirectory.Open(iodir);
            IndexSearcher searcher  = new IndexSearcher(directory);

            for (int i = 0; i < searcher.MaxDoc(); i++)
            {
                if (i % 10000 == 0)
                {
                    Console.Out.WriteLine(i);
                }
                Document doc  = searcher.Doc(i);
                string   text = doc.Get("Text");
                //bool isRetweet = bool.Parse(doc.Get("IsRetweet"));
                var           type     = AnalyzeTweet(text, stopwords);
                var           dic      = RefineTweet(text, type, stopwords);
                string[]      items    = text.Split(' ');
                List <string> words    = new List <string>();
                List <string> hashtags = new List <string>();
                List <string> mentions = new List <string>();
                List <string> retweets = new List <string>();
                for (int j = 0; j < items.Length; j++)
                {
                    if (dic.ContainsKey(j))
                    {
                        if (type[j] == WordType.Hashtag)
                        {
                            hashtags.Add(dic[j]);
                        }
                        if (type[j] == WordType.Mention)
                        {
                            mentions.Add(dic[j]);
                        }
                        if (type[j] == WordType.Retweet)
                        {
                            retweets.Add(dic[j]);
                        }
                        if (type[j] == WordType.Word)
                        {
                            words.Add(dic[j]);
                        }
                    }
                }

                if (hashtags.Count > 0)
                {
                    doc.Add(new Field("Hashtag", hashtags.Aggregate("", (a, b) => a + " " + b).Substring(1),
                                      Field.Store.YES, Field.Index.ANALYZED));
                }
                else
                {
                    doc.Add(new Field("Hashtag", "", Field.Store.YES, Field.Index.ANALYZED));
                }
                if (mentions.Count > 0)
                {
                    doc.Add(new Field("Mention", mentions.Aggregate("", (a, b) => a + " " + b).Substring(1),
                                      Field.Store.YES, Field.Index.ANALYZED));
                }
                else
                {
                    doc.Add(new Field("Mention", "", Field.Store.YES, Field.Index.ANALYZED));
                }
                if (retweets.Count > 0)
                {
                    doc.Add(new Field("Retweet", retweets.Aggregate("", (a, b) => a + " " + b).Substring(1),
                                      Field.Store.YES, Field.Index.ANALYZED));
                }
                else
                {
                    doc.Add(new Field("Retweet", "", Field.Store.YES, Field.Index.ANALYZED));
                }
                if (words.Count > 0)
                {
                    doc.Add(new Field("Word", words.Aggregate("", (a, b) => a + " " + b), Field.Store.YES,
                                      Field.Index.ANALYZED));
                }
                else
                {
                    doc.Add(new Field("Word", "", Field.Store.YES, Field.Index.ANALYZED));
                }

                if (hashtags.Count < 5 && words.Count > 3)
                {
                    tweetWriter.AddDocument(doc);
                }
            }
            tweetWriter.Optimize();
            tweetWriter.Close();
        }