/// <summary> /// Remove documents from the existing index. /// </summary> /// <param name="directoryInfo">The top level relative directory information where all the files that are to be removed are located.</param> /// <param name="files">The array of all files that are to be removed relative to the directory info.</param> /// <param name="documents">The supported documents search filter, used to indicate what files are to be removed.</param> /// <returns>The array of queries that indicate which documents are to be removed.</returns> public Query[] RemoveDocuments(DirectoryInfo directoryInfo, string[] files, SupportedDocumentExtension documents) { List <Query> queries = new List <Query>(); // Create the query for each documents that need to be removed. for (int i = 0; i < files.Length; i++) { string file = files[i]; string document = file.Replace(directoryInfo.Root.FullName, "").ToLower().Replace("\\", "/"); // Create the query. BooleanQuery query = new BooleanQuery(); query.Add(new TermQuery(new Term("path", document)), BooleanClause.Occur.MUST); // Add the query. queries.Add(query); } // Return the list of queries. return(queries.ToArray()); }
/// <summary> /// Add documents. /// </summary> /// <param name="writer">The index writer.</param> /// <param name="facetWriter">The facet index writer.</param> /// <param name="directoryInfo">The directory information where all the files that are to be added are located.</param> /// <param name="files">The list of files that are to be added.</param> /// <param name="documents">The supported documents search filter, used to indicate what files are to be added.</param> /// <param name="facetField">The facet field information.</param> /// <param name="config">The facet configuration information.</param> public void AddDocuments(Lucene.Net.Index.IndexWriter writer, DirectoryTaxonomyWriter facetWriter, DirectoryInfo directoryInfo, string[] files, SupportedDocumentExtension documents, FacetField facetField, FacetsConfig config) { FieldType pathFieldType = new Lucene.Net.Documents.FieldType() { Indexed = true, Tokenized = false, Stored = true, IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, }; FieldType contentFieldType = new Lucene.Net.Documents.FieldType() { Indexed = true, Tokenized = documents.TokenizeContent, Stored = documents.StoreContent, IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, }; // For each file. for (int i = 0; i < files.Length; i++) { // If the file exists if (File.Exists(files[i])) { Lucene.Net.Documents.Document document = new Lucene.Net.Documents.Document(); try { FileInfo fileInfo = new FileInfo(files[i]); string file = files[i].Replace(directoryInfo.Root.FullName, "").ToLower(); Lucene.Net.Documents.Field path = new Field("path", file.ToLower().Replace("\\", "/"), pathFieldType); Lucene.Net.Documents.Field modified = new Field("modified", fileInfo.LastWriteTime.ToShortDateString() + " " + fileInfo.LastWriteTime.ToShortTimeString(), pathFieldType); // Add the fields. document.Add(facetField); document.Add(path); document.Add(modified); // Create the stream reader. OpenDocument(files[i]); string content = Nequeo.Xml.Document.ExtractContent(_xDocument); // If content exists. if (!String.IsNullOrEmpty(content)) { // Split the white spaces from the text. string[] words = content.Words(); // If words exist. if (words != null && words.Length > 0) { // Add the query for each word. for (int j = 0; j < words.Length; j++) { // Format the word. string word = words[j].ToLower().RemovePunctuationFromStartAndEnd(); // If a word exists. if (!String.IsNullOrEmpty(word)) { Lucene.Net.Documents.Field contentField = new Field("facetcontent", word, contentFieldType); document.Add(contentField); } } } } // Add the document. writer.AddDocument(config.Build(facetWriter, document)); _document.Close(); // Commit after a set number of documents. documents.TotalDocumentSize += fileInfo.Length; if (documents.TotalDocumentSize > documents.MaxDocumentSizePerCommit) { // Commit the index. writer.Commit(); facetWriter.Commit(); documents.TotalDocumentSize = 0; } } catch (Exception) { throw; } finally { CloseDocument(); } } } }
/// <summary> /// Add documents. /// </summary> /// <param name="writer">The index writer.</param> /// <param name="directoryInfo">The directory information where all the files that are to be added are located.</param> /// <param name="files">The list of files that are to be added.</param> /// <param name="documents">The supported documents search filter, used to indicate what files are to be added.</param> public void AddDocuments(Lucene.Net.Index.IndexWriter writer, DirectoryInfo directoryInfo, string[] files, SupportedDocumentExtension documents) { System.Windows.Forms.RichTextBox textbox = new System.Windows.Forms.RichTextBox(); FieldType pathFieldType = new Lucene.Net.Documents.FieldType() { Indexed = true, Tokenized = false, Stored = true, IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, }; FieldType contentFieldType = new Lucene.Net.Documents.FieldType() { Indexed = true, Tokenized = documents.TokenizeContent, Stored = documents.StoreContent, IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, }; // For each file. for (int i = 0; i < files.Length; i++) { // If the file exists if (File.Exists(files[i])) { Lucene.Net.Documents.Document document = new Lucene.Net.Documents.Document(); System.IO.StreamReader stream = null; try { FileInfo fileInfo = new FileInfo(files[i]); string file = files[i].Replace(directoryInfo.Root.FullName, "").ToLower(); Lucene.Net.Documents.Field path = new Field("path", file.ToLower().Replace("\\", "/"), pathFieldType); Lucene.Net.Documents.Field modified = new Field("modified", fileInfo.LastWriteTime.ToShortDateString() + " " + fileInfo.LastWriteTime.ToShortTimeString(), pathFieldType); // Add the fields. document.Add(path); document.Add(modified); // Create the stream reader. stream = new StreamReader(files[i]); string contentRtf = stream.ReadToEnd(); textbox.Rtf = contentRtf; string content = textbox.Text; // If content exists. if (!String.IsNullOrEmpty(content)) { // Split the white spaces from the text. string[] words = content.Words(); // If words exist. if (words != null && words.Length > 0) { // Add the query for each word. for (int j = 0; j < words.Length; j++) { // Format the word. string word = words[j].ToLower().RemovePunctuationFromStartAndEnd(); // If a word exists. if (!String.IsNullOrEmpty(word)) { Lucene.Net.Documents.Field contentField = new Field("content", word, contentFieldType); document.Add(contentField); } } } } // Add the document. writer.AddDocument(document.Fields); stream.Close(); // Commit after a set number of documents. documents.TotalDocumentSize += fileInfo.Length; if (documents.TotalDocumentSize > documents.MaxDocumentSizePerCommit) { writer.Commit(); documents.TotalDocumentSize = 0; } } catch (Exception) { throw; } finally { if (stream != null) { stream.Dispose(); } } } } }
/// <summary> /// Add documents. /// </summary> /// <param name="writer">The index writer.</param> /// <param name="directoryInfo">The directory information where all the files that are to be added are located.</param> /// <param name="documents">The supported documents search filter, used to indicate what files are to be added.</param> public void AddDocuments(Lucene.Net.Index.IndexWriter writer, DirectoryInfo directoryInfo, SupportedDocumentExtension documents) { Nequeo.IO.Directory directory = new Nequeo.IO.Directory(); // Select the document format filter. // If html has been selected. if (documents.SupportedDocuments.HasFlag(SupportedDocuments.Html)) { // Create the html filter. HtmlFilter htmlFilter = new HtmlFilter(); string[] files = directory.GetFiles(directoryInfo.FullName, documents.GetFormattedSearchPatterns(SupportedDocuments.Html)); htmlFilter.AddDocuments(writer, directoryInfo, files, documents); } // If pdf has been selected. if (documents.SupportedDocuments.HasFlag(SupportedDocuments.Pdf)) { // Create the pdf filter. PdfFilter pdfFilter = new PdfFilter(); string[] files = directory.GetFiles(directoryInfo.FullName, documents.GetFormattedSearchPatterns(SupportedDocuments.Pdf)); pdfFilter.AddDocuments(writer, directoryInfo, files, documents); } // If rtf has been selected. if (documents.SupportedDocuments.HasFlag(SupportedDocuments.Rtf)) { // Create the rtf filter. RtfFilter rtfFilter = new RtfFilter(); string[] files = directory.GetFiles(directoryInfo.FullName, documents.GetFormattedSearchPatterns(SupportedDocuments.Rtf)); rtfFilter.AddDocuments(writer, directoryInfo, files, documents); } // If txt has been selected. if (documents.SupportedDocuments.HasFlag(SupportedDocuments.Txt)) { // Create the txt filter. TxtFilter txtFilter = new TxtFilter(); string[] files = directory.GetFiles(directoryInfo.FullName, documents.GetFormattedSearchPatterns(SupportedDocuments.Txt)); txtFilter.AddDocuments(writer, directoryInfo, files, documents); } // If xml has been selected. if (documents.SupportedDocuments.HasFlag(SupportedDocuments.Xml)) { // Create the xml filter. XmlFilter xmlFilter = new XmlFilter(); string[] files = directory.GetFiles(directoryInfo.FullName, documents.GetFormattedSearchPatterns(SupportedDocuments.Xml)); xmlFilter.AddDocuments(writer, directoryInfo, files, documents); } // If docx has been selected. if (documents.SupportedDocuments.HasFlag(SupportedDocuments.Docx)) { // Create the docx filter. MSDocFilter docxFilter = new MSDocFilter(); string[] files = directory.GetFiles(directoryInfo.FullName, documents.GetFormattedSearchPatterns(SupportedDocuments.Docx)); docxFilter.AddDocuments(writer, directoryInfo, files, documents); } }