/// <summary>
        /// Remove documents from the existing index.
        /// </summary>
        /// <param name="directoryInfo">The top level relative directory information where all the files that are to be removed are located.</param>
        /// <param name="files">The array of all files that are to be removed relative to the directory info.</param>
        /// <param name="documents">The supported documents search filter, used to indicate what files are to be removed.</param>
        /// <returns>The array of queries that indicate which documents are to be removed.</returns>
        public Query[] RemoveDocuments(DirectoryInfo directoryInfo, string[] files, SupportedDocumentExtension documents)
        {
            List <Query> queries = new List <Query>();

            // Create the query for each documents that need to be removed.
            for (int i = 0; i < files.Length; i++)
            {
                string file     = files[i];
                string document = file.Replace(directoryInfo.Root.FullName, "").ToLower().Replace("\\", "/");

                // Create the query.
                BooleanQuery query = new BooleanQuery();
                query.Add(new TermQuery(new Term("path", document)), BooleanClause.Occur.MUST);

                // Add the query.
                queries.Add(query);
            }

            // Return the list of queries.
            return(queries.ToArray());
        }
Exemple #2
0
        /// <summary>
        /// Add documents.
        /// </summary>
        /// <param name="writer">The index writer.</param>
        /// <param name="facetWriter">The facet index writer.</param>
        /// <param name="directoryInfo">The directory information where all the files that are to be added are located.</param>
        /// <param name="files">The list of files that are to be added.</param>
        /// <param name="documents">The supported documents search filter, used to indicate what files are to be added.</param>
        /// <param name="facetField">The facet field information.</param>
        /// <param name="config">The facet configuration information.</param>
        public void AddDocuments(Lucene.Net.Index.IndexWriter writer, DirectoryTaxonomyWriter facetWriter,
                                 DirectoryInfo directoryInfo, string[] files, SupportedDocumentExtension documents, FacetField facetField, FacetsConfig config)
        {
            FieldType pathFieldType = new Lucene.Net.Documents.FieldType()
            {
                Indexed      = true,
                Tokenized    = false,
                Stored       = true,
                IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS,
            };
            FieldType contentFieldType = new Lucene.Net.Documents.FieldType()
            {
                Indexed      = true,
                Tokenized    = documents.TokenizeContent,
                Stored       = documents.StoreContent,
                IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS,
            };

            // For each file.
            for (int i = 0; i < files.Length; i++)
            {
                // If the file exists
                if (File.Exists(files[i]))
                {
                    Lucene.Net.Documents.Document document = new Lucene.Net.Documents.Document();

                    try
                    {
                        FileInfo fileInfo = new FileInfo(files[i]);
                        string   file     = files[i].Replace(directoryInfo.Root.FullName, "").ToLower();

                        Lucene.Net.Documents.Field path     = new Field("path", file.ToLower().Replace("\\", "/"), pathFieldType);
                        Lucene.Net.Documents.Field modified = new Field("modified", fileInfo.LastWriteTime.ToShortDateString() + " " + fileInfo.LastWriteTime.ToShortTimeString(), pathFieldType);

                        // Add the fields.
                        document.Add(facetField);
                        document.Add(path);
                        document.Add(modified);

                        // Create the stream reader.
                        OpenDocument(files[i]);
                        string content = Nequeo.Xml.Document.ExtractContent(_xDocument);

                        // If content exists.
                        if (!String.IsNullOrEmpty(content))
                        {
                            // Split the white spaces from the text.
                            string[] words = content.Words();

                            // If words exist.
                            if (words != null && words.Length > 0)
                            {
                                // Add the query for each word.
                                for (int j = 0; j < words.Length; j++)
                                {
                                    // Format the word.
                                    string word = words[j].ToLower().RemovePunctuationFromStartAndEnd();

                                    // If a word exists.
                                    if (!String.IsNullOrEmpty(word))
                                    {
                                        Lucene.Net.Documents.Field contentField = new Field("facetcontent", word, contentFieldType);
                                        document.Add(contentField);
                                    }
                                }
                            }
                        }

                        // Add the document.
                        writer.AddDocument(config.Build(facetWriter, document));
                        _document.Close();

                        // Commit after a set number of documents.
                        documents.TotalDocumentSize += fileInfo.Length;
                        if (documents.TotalDocumentSize > documents.MaxDocumentSizePerCommit)
                        {
                            // Commit the index.
                            writer.Commit();
                            facetWriter.Commit();
                            documents.TotalDocumentSize = 0;
                        }
                    }
                    catch (Exception)
                    {
                        throw;
                    }
                    finally
                    {
                        CloseDocument();
                    }
                }
            }
        }
Exemple #3
0
        /// <summary>
        /// Add documents.
        /// </summary>
        /// <param name="writer">The index writer.</param>
        /// <param name="directoryInfo">The directory information where all the files that are to be added are located.</param>
        /// <param name="files">The list of files that are to be added.</param>
        /// <param name="documents">The supported documents search filter, used to indicate what files are to be added.</param>
        public void AddDocuments(Lucene.Net.Index.IndexWriter writer, DirectoryInfo directoryInfo, string[] files, SupportedDocumentExtension documents)
        {
            System.Windows.Forms.RichTextBox textbox = new System.Windows.Forms.RichTextBox();

            FieldType pathFieldType = new Lucene.Net.Documents.FieldType()
            {
                Indexed      = true,
                Tokenized    = false,
                Stored       = true,
                IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS,
            };
            FieldType contentFieldType = new Lucene.Net.Documents.FieldType()
            {
                Indexed      = true,
                Tokenized    = documents.TokenizeContent,
                Stored       = documents.StoreContent,
                IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS,
            };

            // For each file.
            for (int i = 0; i < files.Length; i++)
            {
                // If the file exists
                if (File.Exists(files[i]))
                {
                    Lucene.Net.Documents.Document document = new Lucene.Net.Documents.Document();
                    System.IO.StreamReader        stream   = null;

                    try
                    {
                        FileInfo fileInfo = new FileInfo(files[i]);
                        string   file     = files[i].Replace(directoryInfo.Root.FullName, "").ToLower();

                        Lucene.Net.Documents.Field path     = new Field("path", file.ToLower().Replace("\\", "/"), pathFieldType);
                        Lucene.Net.Documents.Field modified = new Field("modified", fileInfo.LastWriteTime.ToShortDateString() + " " + fileInfo.LastWriteTime.ToShortTimeString(), pathFieldType);

                        // Add the fields.
                        document.Add(path);
                        document.Add(modified);

                        // Create the stream reader.
                        stream = new StreamReader(files[i]);
                        string contentRtf = stream.ReadToEnd();
                        textbox.Rtf = contentRtf;
                        string content = textbox.Text;

                        // If content exists.
                        if (!String.IsNullOrEmpty(content))
                        {
                            // Split the white spaces from the text.
                            string[] words = content.Words();

                            // If words exist.
                            if (words != null && words.Length > 0)
                            {
                                // Add the query for each word.
                                for (int j = 0; j < words.Length; j++)
                                {
                                    // Format the word.
                                    string word = words[j].ToLower().RemovePunctuationFromStartAndEnd();

                                    // If a word exists.
                                    if (!String.IsNullOrEmpty(word))
                                    {
                                        Lucene.Net.Documents.Field contentField = new Field("content", word, contentFieldType);
                                        document.Add(contentField);
                                    }
                                }
                            }
                        }

                        // Add the document.
                        writer.AddDocument(document.Fields);
                        stream.Close();

                        // Commit after a set number of documents.
                        documents.TotalDocumentSize += fileInfo.Length;
                        if (documents.TotalDocumentSize > documents.MaxDocumentSizePerCommit)
                        {
                            writer.Commit();
                            documents.TotalDocumentSize = 0;
                        }
                    }
                    catch (Exception)
                    {
                        throw;
                    }
                    finally
                    {
                        if (stream != null)
                        {
                            stream.Dispose();
                        }
                    }
                }
            }
        }
        /// <summary>
        /// Add documents.
        /// </summary>
        /// <param name="writer">The index writer.</param>
        /// <param name="directoryInfo">The directory information where all the files that are to be added are located.</param>
        /// <param name="documents">The supported documents search filter, used to indicate what files are to be added.</param>
        public void AddDocuments(Lucene.Net.Index.IndexWriter writer, DirectoryInfo directoryInfo, SupportedDocumentExtension documents)
        {
            Nequeo.IO.Directory directory = new Nequeo.IO.Directory();

            // Select the document format filter.
            // If html has been selected.
            if (documents.SupportedDocuments.HasFlag(SupportedDocuments.Html))
            {
                // Create the html filter.
                HtmlFilter htmlFilter = new HtmlFilter();
                string[]   files      = directory.GetFiles(directoryInfo.FullName, documents.GetFormattedSearchPatterns(SupportedDocuments.Html));
                htmlFilter.AddDocuments(writer, directoryInfo, files, documents);
            }

            // If pdf has been selected.
            if (documents.SupportedDocuments.HasFlag(SupportedDocuments.Pdf))
            {
                // Create the pdf filter.
                PdfFilter pdfFilter = new PdfFilter();
                string[]  files     = directory.GetFiles(directoryInfo.FullName, documents.GetFormattedSearchPatterns(SupportedDocuments.Pdf));
                pdfFilter.AddDocuments(writer, directoryInfo, files, documents);
            }

            // If rtf has been selected.
            if (documents.SupportedDocuments.HasFlag(SupportedDocuments.Rtf))
            {
                // Create the rtf filter.
                RtfFilter rtfFilter = new RtfFilter();
                string[]  files     = directory.GetFiles(directoryInfo.FullName, documents.GetFormattedSearchPatterns(SupportedDocuments.Rtf));
                rtfFilter.AddDocuments(writer, directoryInfo, files, documents);
            }

            // If txt has been selected.
            if (documents.SupportedDocuments.HasFlag(SupportedDocuments.Txt))
            {
                // Create the txt filter.
                TxtFilter txtFilter = new TxtFilter();
                string[]  files     = directory.GetFiles(directoryInfo.FullName, documents.GetFormattedSearchPatterns(SupportedDocuments.Txt));
                txtFilter.AddDocuments(writer, directoryInfo, files, documents);
            }

            // If xml has been selected.
            if (documents.SupportedDocuments.HasFlag(SupportedDocuments.Xml))
            {
                // Create the xml filter.
                XmlFilter xmlFilter = new XmlFilter();
                string[]  files     = directory.GetFiles(directoryInfo.FullName, documents.GetFormattedSearchPatterns(SupportedDocuments.Xml));
                xmlFilter.AddDocuments(writer, directoryInfo, files, documents);
            }

            // If docx has been selected.
            if (documents.SupportedDocuments.HasFlag(SupportedDocuments.Docx))
            {
                // Create the docx filter.
                MSDocFilter docxFilter = new MSDocFilter();
                string[]    files      = directory.GetFiles(directoryInfo.FullName, documents.GetFormattedSearchPatterns(SupportedDocuments.Docx));
                docxFilter.AddDocuments(writer, directoryInfo, files, documents);
            }
        }