C# (CSharp) Hatfield.Web.Portal.Search.Lucene IndexableFileInfo 예제들

프로그래밍 언어: C# (CSharp)

네임스페이스/패키지 이름: Hatfield.Web.Portal.Search.Lucene

클래스/타입: IndexableFileInfo

hotexamples.com에서의 예제들: 5

C# (CSharp) Hatfield.Web.Portal.Search.Lucene IndexableFileInfo - 5개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 C# (CSharp)의 Hatfield.Web.Portal.Search.Lucene.IndexableFileInfo에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

        private bool isInIndex(IndexableFileInfo fileInfo)
        {
            IndexSearcher searcher = new IndexSearcher(this.luceneIndexDir);

            try
            {
                BooleanQuery bq = new BooleanQuery();
                bq.Add(new TermQuery(new Term("filename", fileInfo.Filename)), BooleanClause.Occur.MUST);

                bq.Add(new TermQuery(new Term("LastModified", DateTools.DateToString(fileInfo.LastModified, DateTools.Resolution.SECOND))), BooleanClause.Occur.MUST);

                Hits hits  = searcher.Search(bq);
                int  count = hits.Length();

                if (count > 0)
                {
                    return(true);
                }
            }
            catch (Exception ex)
            {
                Console.Write(ex.Message);
            }
            finally
            {
                searcher.Close();
            }
            return(false);
        }

예제 #2

파일 보기

        public static void doIndex(string LuceneIndexDir, string SpellingIndexDir, IndexCreationMode indexCreationMode, IndexableFileInfo[] fileInfos, object ThreadState, onAddFileToIndex AddFileToIndex)
        {
            if (indexing)
            {
                return;
            }
            lock (padlock)
            {
                indexing = true;


                LuceneIndexer indexer = new LuceneIndexer(LuceneIndexDir, indexCreationMode); // create new index
                try
                {
                    for (int i = 0; i < fileInfos.Length; i++)
                    {
                        IndexableFileInfo fi = fileInfos[i];
                        if (AddFileToIndex != null)
                        {
                            AddFileToIndex(fi, (double)i / (double)fileInfos.Length);
                        }

                        indexer.addFileInfoToIndex(fi);
                    } // foreach
                }
                finally
                {
                    indexer.CloseIndexWriter(OptimizeMode.DoNotOptimize);
                }
                if (indexCreationMode == IndexCreationMode.AppendToExistingIndex)
                {
                    removeAllDuplicateAndDeletedFiles(fileInfos, LuceneIndexDir, indexCreationMode);
                }
                try
                {
                    doSpellCheckerIndexing(LuceneIndexDir, SpellingIndexDir);
                }
                catch
                { }



                indexing = false;
            }
        } // doIndex

예제 #3

파일 보기

        public void addFileInfoToIndex(IndexableFileInfo fileInfo)
        {
            if ((_indexCreationMode == IndexCreationMode.AppendToExistingIndex) && isInIndex(fileInfo))
            {
                return;
            }

            bool fileExistsOnDisk = System.IO.File.Exists(fileInfo.Filename);

            if (fileExistsOnDisk && fileInfo.Contents == "")
            {
                fileInfo.Contents = IFilterFileContents.getFileContents(fileInfo.Filename);
            }

            Document doc = new Document();

            /* From http://www.webreference.com/programming/lucene/2/
             * Field.Keyword Isn't analyzed, but is indexed and stored in the index verbatim. This type is suitable for fields whose original value should be preserved in its entirety, such as URLs, file system paths, dates, personal names, Social Security numbers, telephone numbers, and so on. For example, we used the file system path in Indexer (listing 1.1) as a Keyword field.
             * Field.UnIndexed Is neither analyzed nor indexed, but its value is stored in the index as is. This type is suitable for fields that you need to display with search results (such as a URL or database primary key), but whose values you'll never search directly. Since the original value of a field of this type is stored in the index, this type isn't suitable for storing fields with very large values, if index size is an issue.
             * Field.UnStored The opposite of UnIndexed. This field type is analyzed and indexed but isn't stored in the index. It's suitable for indexing a large amount of text that doesn't need to be retrieved in its original form, such as bodies of web pages, or any other type of text document
             * Field.Text Is analyzed, and is indexed. This implies that fields of this type can be searched against, but be cautious about the field size. If the data indexed is a String, it's also stored; but if the data (as in our Indexer example) is from a Reader, it isn't stored. This is often a source of confusion, so take note of this difference when using Field.Text.
             */

            // -- add fields to the document
            // doc.Add(Field.Keyword("docId", dmsDoc.DocumentId.ToString()));

            doc.Add(new Field("contents", fileInfo.Contents, Field.Store.YES, Field.Index.TOKENIZED));          // can be searched and is analyzed
            doc.Add(new Field("filename", fileInfo.Filename, Field.Store.YES, Field.Index.UN_TOKENIZED));       // can be searched, but is not analyzed
            doc.Add(new Field("filenameParams", fileInfo.FilenameParameters, Field.Store.YES, Field.Index.NO)); // can not be searched
            doc.Add(new Field("contentIsPageSummary", Convert.ToString(fileInfo.ContentIsPageSummary), Field.Store.YES, Field.Index.NO));

            doc.Add(new Field("SectionName", fileInfo.SectionName, Field.Store.YES, Field.Index.UN_TOKENIZED));


            doc.Add(new Field("LastModified", DateTools.DateToString(fileInfo.LastModified, DateTools.Resolution.SECOND), Field.Store.YES, Field.Index.UN_TOKENIZED));

            Field titleField = new Field("title", fileInfo.Title, Field.Store.YES, Field.Index.TOKENIZED);

            titleField.SetBoost(TitleFieldBoost); // default value is 1.0
            doc.Add(titleField);

            if (fileExistsOnDisk)
            {
                System.IO.DirectoryInfo di = new System.IO.DirectoryInfo(new System.IO.FileInfo(fileInfo.Filename).Directory.FullName);
                doc.Add(new Field("directparentdirectory", di.FullName, Field.Store.YES, Field.Index.UN_TOKENIZED));
                while (di != null)
                {
                    doc.Add(new Field("parentdirectory", di.FullName, Field.Store.YES, Field.Index.UN_TOKENIZED));
                    di = di.Parent;
                }


                string ext = System.IO.Path.GetExtension(fileInfo.Filename);
                ext = ext.ToLower();
                if (ext.StartsWith("."))
                {
                    ext = ext.Substring(1);
                }
                doc.Add(new Field("filetype", ext, Field.Store.YES, Field.Index.UN_TOKENIZED));
            }

            doc.Add(new Field("dateIndexed", DateTools.DateToString(DateTime.Now, DateTools.Resolution.SECOND), Field.Store.YES, Field.Index.NO));
            // -- add the document to the index
            writer.AddDocument(doc);
        } // addDMSDocToIndex

예제 #4

파일 보기

        } // constructor

        /// <summary>
        /// Searches the keyword index using the keywordQuery.
        ///
        /// See http://www.dotlucene.net/documentation/QuerySyntax.html  for the format of the keywordQuery.
        ///
        /// This function will return a fully-filled array of IndexableFileInfo objects.
        /// </summary>
        /// <param name="keywordQuery"></param>
        /// <param name="queryForHighlighter"></param>
        /// <returns></returns>
        public IndexableFileInfo[] doSearch(string keywordQuery, string queryForHighlighter)
        {
            IndexSearcher searcher;
            IndexReader   indexReader;

            try
            {
                FSDirectory indexDir = FSDirectory.GetDirectory(this.luceneIndexDir, false);
                indexReader = IndexReader.Open(indexDir);
                searcher    = new IndexSearcher(indexReader);
            }
            catch
            {
                // if the luceneIndexDir does not contain index files (yet), IndexSearcher
                // throws a nice Exception.
                return(new IndexableFileInfo[0]);
            }
            List <IndexableFileInfo> arrayList = new List <IndexableFileInfo>();

            try
            {
                string Query = keywordQuery;
                if (Query == String.Empty)
                {
                    return(new IndexableFileInfo[0]);
                }

                string HighlighterQuery = queryForHighlighter;
                // -- weirdly enough, when the query is empty, an exception is thrown during the QueryParser.Parse
                //    this hack gets around that.
                if (HighlighterQuery == String.Empty)
                {
                    HighlighterQuery = Guid.NewGuid().ToString();
                }

                // parse the query, "text" is the default field to search
                // note: use the StandardAnalyzer! (the SimpleAnalyzer doesn't work correctly when searching by fields that are integers!)
                // MultiFieldQueryParser queryParser = new MultiFieldQueryParser(new string[] { "title", "contents" }, new hatWebPortalAnalyzer());
                MultiFieldQueryParser queryParser = new MultiFieldQueryParser(new string[] { "title", "contents" }, new SimpleAnalyzer());
                queryParser.SetDefaultOperator(QueryParser.AND_OPERATOR);

                Query query = queryParser.Parse(Query);

                QueryParser highlightQueryParser = new QueryParser("contents", new hatWebPortalAnalyzer());

                Query highlighterQuery = highlightQueryParser.Parse(HighlighterQuery);

                query = searcher.Rewrite(query); // is this needed?? " Expert: called to re-write queries into primitive queries."

                // search
                Hits hits = searcher.Search(query, Sort.RELEVANCE);

                // create highlighter
                Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter("<strong>", "</strong>"), new QueryScorer(highlighterQuery));

                // -- go through hits and return results

                for (int i = 0; i < hits.Length(); i++)
                {
                    Document d                    = hits.Doc(i);
                    string   filename             = d.Get("filename");
                    string   plainText            = d.Get("contents");
                    string   title                = d.Get("title");
                    string   sectionName          = d.Get("SectionName");
                    string   filenameParams       = d.Get("filenameParams");
                    bool     contentIsPageSummary = Convert.ToBoolean(d.Get("contentIsPageSummary"));
                    double   score                = Convert.ToDouble(hits.Score(i));
                    DateTime lastModified         = DateTools.StringToDate(d.Get("LastModified"));

                    TokenStream tokenStream = new hatWebPortalAnalyzer().TokenStream("contents", new StringReader(plainText));

                    string fragment = plainText;
                    if (!contentIsPageSummary)
                    {
                        fragment = highlighter.GetBestFragments(tokenStream, plainText, 2, "...");
                    }

                    IndexableFileInfo newHit = new IndexableFileInfo(filename, filenameParams, title, fragment, sectionName, lastModified, contentIsPageSummary, score);
                    arrayList.Add(newHit);
                } // for
            }
            finally
            {
                searcher.Close();
                indexReader.Close();
            }


            return(arrayList.ToArray());
        } // SearchActiveDocument

예제 #5

파일 보기

        public IndexableFileInfo[] getRelatedFiles(string title, int maxResultsToReturn)
        {
            // http://blogs.intesoft.net/post/2008/04/NHibernateSearch-using-LuceneNET-Full-Text-Index-(Part-3).aspx
            Analyzer     analyzer = new StandardAnalyzer();
            BooleanQuery query    = new BooleanQuery();

            if (title.Trim() != "")
            {
                Query titleQ = Similarity.Net.SimilarityQueries.FormSimilarQuery(title, analyzer, "title", null);
                titleQ.SetBoost(LuceneIndexer.TitleFieldBoost);
                query.Add(titleQ, BooleanClause.Occur.SHOULD);

                Query contents = Similarity.Net.SimilarityQueries.FormSimilarQuery(title, analyzer, "contents", null);
                query.Add(contents, BooleanClause.Occur.SHOULD);
            }


            // avoid the page being similar to itself!
            // query.Add(new TermQuery(new Term("title", title)), BooleanClause.Occur.MUST_NOT);


            /// IndexReader ir = ...
            /// IndexSearcher is = ...
            /// <b>
            /// MoreLikeThis mlt = new MoreLikeThis(ir);
            /// Reader target = ... </b><em>// orig source of doc you want to find similarities to</em><b>
            /// Query query = mlt.Like( target);
            /// </b>
            /// Hits hits = is.Search(query);

            FSDirectory   indexDir = FSDirectory.GetDirectory(this.luceneIndexDir, false);
            IndexSearcher searcher;

            try
            {
                searcher = new IndexSearcher(indexDir);
            }
            catch
            {
                // if the luceneIndexDir does not contain index files (yet), IndexSearcher
                // throws a nice Exception.
                return(new IndexableFileInfo[0]);
            }


            List <IndexableFileInfo> arrayList = new List <IndexableFileInfo>();

            Hits hits = searcher.Search(query);

            try
            {
                int num = Math.Min(maxResultsToReturn, hits.Length());

                for (int i = 0; i < num; i++)
                {
                    Document d                    = hits.Doc(i);
                    string   filename             = d.Get("filename");
                    string   plainText            = d.Get("contents");
                    string   doctitle             = d.Get("title");
                    string   filenameParams       = d.Get("filenameParams");
                    bool     contentIsPageSummary = Convert.ToBoolean(d.Get("contentIsPageSummary"));
                    DateTime lastModified         = DateTools.StringToDate(d.Get("LastModified"));
                    string   fragment             = plainText;
                    string   sectionName          = d.Get("SectionName");

                    IndexableFileInfo newHit = new IndexableFileInfo(filename, filenameParams, doctitle, fragment, sectionName, lastModified, contentIsPageSummary);
                    arrayList.Add(newHit);
                } // for
            }
            finally
            {
                searcher.Close();
            }

            return(arrayList.ToArray());
        }