public static Document Document(FileInfo f) { // make a new, empty document Document doc = new Document(); // Add the url as a field named "path". Use a field that is // indexed (i.e. searchable), but don't tokenize the field into words. doc.Add(new Field("path", f.FullName.Replace(dirSep, '/'), Field.Store.YES, Field.Index.NOT_ANALYZED)); // Add the last modified date of the file a field named "modified". // Use a field that is indexed (i.e. searchable), but don't tokenize // the field into words. doc.Add(new Field("modified", DateTools.TimeToString(f.LastWriteTime.Millisecond, DateTools.Resolution.MINUTE), Field.Store.YES, Field.Index.NOT_ANALYZED)); // Add the uid as a field, so that index can be incrementally maintained. // This field is not stored with document, it is indexed, but it is not // tokenized prior to indexing. doc.Add(new Field("uid", Uid(f), Field.Store.NO, Field.Index.NOT_ANALYZED)); using (var fileStream = f.OpenRead()) { var parser = new HTMLParser(fileStream); // Add the tag-stripped contents as a Reader-valued Text field so it will // get tokenized and indexed. doc.Add(new Field("contents", parser.GetReader())); // Add the summary as a field that is stored and returned with // hit documents for display. doc.Add(new Field("summary", parser.GetSummary(), Field.Store.YES, Field.Index.NO)); // Add the title as a field that it can be searched and that is stored. doc.Add(new Field("title", parser.GetTitle(), Field.Store.YES, Field.Index.ANALYZED)); // return the document return(doc); } }
public static Document Document(FileInfo f) { // make a new, empty document Document doc = new Document(); // Add the url as a field named "path". Use a field that is // indexed (i.e. searchable), but don't tokenize the field into words. doc.Add(new Field("path", f.FullName.Replace(dirSep, '/'), Field.Store.YES, Field.Index.NOT_ANALYZED)); // Add the last modified date of the file a field named "modified". // Use a field that is indexed (i.e. searchable), but don't tokenize // the field into words. doc.Add(new Field("modified", DateTools.TimeToString(f.LastWriteTime.Millisecond, DateTools.Resolution.MINUTE), Field.Store.YES, Field.Index.NOT_ANALYZED)); // Add the uid as a field, so that index can be incrementally maintained. // This field is not stored with document, it is indexed, but it is not // tokenized prior to indexing. doc.Add(new Field("uid", Uid(f), Field.Store.NO, Field.Index.NOT_ANALYZED)); using (var fileStream = f.OpenRead()) { var parser = new HTMLParser(fileStream); // Add the tag-stripped contents as a Reader-valued Text field so it will // get tokenized and indexed. doc.Add(new Field("contents", parser.GetReader())); // Add the summary as a field that is stored and returned with // hit documents for display. doc.Add(new Field("summary", parser.GetSummary(), Field.Store.YES, Field.Index.NO)); // Add the title as a field that it can be searched and that is stored. doc.Add(new Field("title", parser.GetTitle(), Field.Store.YES, Field.Index.ANALYZED)); // return the document return doc; } }
public static Document Document(System.IO.FileInfo f) { // make a new, empty document Document doc = new Document(); // Add the url as a field named "url". Use an UnIndexed field, so // that the url is just stored with the document, but is not searchable. doc.Add(Field.UnIndexed("url", f.FullName.Replace(dirSep, '/'))); // Add the last modified date of the file a field named "modified". Use a // Keyword field, so that it's searchable, but so that no attempt is made // to tokenize the field into words. doc.Add(Field.Keyword("modified", DateField.TimeToString(((f.LastWriteTime.Ticks - 621355968000000000) / 10000)))); // Add the uid as a field, so that index can be incrementally maintained. // This field is not stored with document, it is indexed, but it is not // tokenized prior to indexing. doc.Add(new Field("uid", UID(f), false, true, false)); HTMLParser parser = new HTMLParser(f); // Add the tag-stripped contents as a Reader-valued Text field so it will // get tokenized and indexed. doc.Add(Field.Text("contents", parser.GetReader())); // Add the summary as an UnIndexed field, so that it is stored and returned // with hit documents for display. doc.Add(Field.UnIndexed("summary", parser.GetSummary())); // Add the title as a separate Text field, so that it can be searched // separately. doc.Add(Field.Text("title", parser.GetTitle())); // return the document return doc; }
public static Document Document(System.IO.FileInfo f) { // make a new, empty document Document doc = new Document(); // Add the url as a field named "url". Use an UnIndexed field, so // that the url is just stored with the document, but is not searchable. doc.Add(Field.UnIndexed("url", f.FullName.Replace(dirSep, '/'))); // Add the last modified date of the file a field named "modified". Use a // Keyword field, so that it's searchable, but so that no attempt is made // to tokenize the field into words. doc.Add(Field.Keyword("modified", DateField.TimeToString(((f.LastWriteTime.Ticks - 621355968000000000) / 10000)))); // Add the uid as a field, so that index can be incrementally maintained. // This field is not stored with document, it is indexed, but it is not // tokenized prior to indexing. doc.Add(new Field("uid", UID(f), false, true, false)); HTMLParser parser = new HTMLParser(f); // Add the tag-stripped contents as a Reader-valued Text field so it will // get tokenized and indexed. doc.Add(Field.Text("contents", parser.GetReader())); // Add the summary as an UnIndexed field, so that it is stored and returned // with hit documents for display. doc.Add(Field.UnIndexed("summary", parser.GetSummary())); // Add the title as a separate Text field, so that it can be searched // separately. doc.Add(Field.Text("title", parser.GetTitle())); // return the document return(doc); }