Esempio n. 1
0
        public static Document Document(FileInfo f)
        {
            // make a new, empty document
            Document doc = new Document();

            // Add the url as a field named "path".  Use a field that is
            // indexed (i.e. searchable), but don't tokenize the field into words.
            doc.Add(new Field("path", f.FullName.Replace(dirSep, '/'), Field.Store.YES, Field.Index.NOT_ANALYZED));

            // Add the last modified date of the file a field named "modified".
            // Use a field that is indexed (i.e. searchable), but don't tokenize
            // the field into words.
            doc.Add(new Field("modified", DateTools.TimeToString(f.LastWriteTime.Millisecond, DateTools.Resolution.MINUTE), Field.Store.YES, Field.Index.NOT_ANALYZED));

            // Add the uid as a field, so that index can be incrementally maintained.
            // This field is not stored with document, it is indexed, but it is not
            // tokenized prior to indexing.
            doc.Add(new Field("uid", Uid(f), Field.Store.NO, Field.Index.NOT_ANALYZED));

            using (var fileStream = f.OpenRead())
            {
                var parser = new HTMLParser(fileStream);

                // Add the tag-stripped contents as a Reader-valued Text field so it will
                // get tokenized and indexed.
                doc.Add(new Field("contents", parser.GetReader()));

                // Add the summary as a field that is stored and returned with
                // hit documents for display.
                doc.Add(new Field("summary", parser.GetSummary(), Field.Store.YES, Field.Index.NO));

                // Add the title as a field that it can be searched and that is stored.
                doc.Add(new Field("title", parser.GetTitle(), Field.Store.YES, Field.Index.ANALYZED));

                // return the document
                return(doc);
            }
        }
Esempio n. 2
0
        public static Document Document(FileInfo f)
		{
			// make a new, empty document
			Document doc = new Document();
			
			// Add the url as a field named "path".  Use a field that is 
			// indexed (i.e. searchable), but don't tokenize the field into words.
			doc.Add(new Field("path", f.FullName.Replace(dirSep, '/'), Field.Store.YES, Field.Index.NOT_ANALYZED));
			
			// Add the last modified date of the file a field named "modified".  
			// Use a field that is indexed (i.e. searchable), but don't tokenize
			// the field into words.
			doc.Add(new Field("modified", DateTools.TimeToString(f.LastWriteTime.Millisecond, DateTools.Resolution.MINUTE), Field.Store.YES, Field.Index.NOT_ANALYZED));
			
			// Add the uid as a field, so that index can be incrementally maintained.
			// This field is not stored with document, it is indexed, but it is not
			// tokenized prior to indexing.
			doc.Add(new Field("uid", Uid(f), Field.Store.NO, Field.Index.NOT_ANALYZED));

            using (var fileStream = f.OpenRead())
            {
                var parser = new HTMLParser(fileStream);

                // Add the tag-stripped contents as a Reader-valued Text field so it will
                // get tokenized and indexed.
                doc.Add(new Field("contents", parser.GetReader()));

                // Add the summary as a field that is stored and returned with
                // hit documents for display.
                doc.Add(new Field("summary", parser.GetSummary(), Field.Store.YES, Field.Index.NO));

                // Add the title as a field that it can be searched and that is stored.
                doc.Add(new Field("title", parser.GetTitle(), Field.Store.YES, Field.Index.ANALYZED));

                // return the document
                return doc;
            }
        }
Esempio n. 3
0
        public static Document Document(System.IO.FileInfo f)
        {
            // make a new, empty document
            Document doc = new Document();
			
            // Add the url as a field named "url".  Use an UnIndexed field, so
            // that the url is just stored with the document, but is not searchable.
            doc.Add(Field.UnIndexed("url", f.FullName.Replace(dirSep, '/')));
			
            // Add the last modified date of the file a field named "modified".  Use a
            // Keyword field, so that it's searchable, but so that no attempt is made
            // to tokenize the field into words.
            doc.Add(Field.Keyword("modified", DateField.TimeToString(((f.LastWriteTime.Ticks - 621355968000000000) / 10000))));
			
            // Add the uid as a field, so that index can be incrementally maintained.
            // This field is not stored with document, it is indexed, but it is not
            // tokenized prior to indexing.
            doc.Add(new Field("uid", UID(f), false, true, false));
			
            HTMLParser parser = new HTMLParser(f);
			
            // Add the tag-stripped contents as a Reader-valued Text field so it will
            // get tokenized and indexed.
            doc.Add(Field.Text("contents", parser.GetReader()));
			
            // Add the summary as an UnIndexed field, so that it is stored and returned
            // with hit documents for display.
            doc.Add(Field.UnIndexed("summary", parser.GetSummary()));
			
            // Add the title as a separate Text field, so that it can be searched
            // separately.
            doc.Add(Field.Text("title", parser.GetTitle()));
			
            // return the document
            return doc;
        }
Esempio n. 4
0
        public static Document Document(System.IO.FileInfo f)
        {
            // make a new, empty document
            Document doc = new Document();

            // Add the url as a field named "url".  Use an UnIndexed field, so
            // that the url is just stored with the document, but is not searchable.
            doc.Add(Field.UnIndexed("url", f.FullName.Replace(dirSep, '/')));

            // Add the last modified date of the file a field named "modified".  Use a
            // Keyword field, so that it's searchable, but so that no attempt is made
            // to tokenize the field into words.
            doc.Add(Field.Keyword("modified", DateField.TimeToString(((f.LastWriteTime.Ticks - 621355968000000000) / 10000))));

            // Add the uid as a field, so that index can be incrementally maintained.
            // This field is not stored with document, it is indexed, but it is not
            // tokenized prior to indexing.
            doc.Add(new Field("uid", UID(f), false, true, false));

            HTMLParser parser = new HTMLParser(f);

            // Add the tag-stripped contents as a Reader-valued Text field so it will
            // get tokenized and indexed.
            doc.Add(Field.Text("contents", parser.GetReader()));

            // Add the summary as an UnIndexed field, so that it is stored and returned
            // with hit documents for display.
            doc.Add(Field.UnIndexed("summary", parser.GetSummary()));

            // Add the title as a separate Text field, so that it can be searched
            // separately.
            doc.Add(Field.Text("title", parser.GetTitle()));

            // return the document
            return(doc);
        }