C# (CSharp) Lucene.Net.Demo.Html.HTMLParser 예제들

프로그래밍 언어: C# (CSharp)

hotexamples.com에서의 예제들: 4

C# (CSharp) Lucene.Net.Demo.Html.HTMLParser - 4개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 C# (CSharp)의 Lucene.Net.Demo.Html.HTMLParser에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

GetReader(2)

GetSummary(2)

GetTitle(2)

예제 #1

파일 보기

파일: HTMLDocument.cs 프로젝트: mundher/lucene.net

        public static Document Document(FileInfo f)
        {
            // make a new, empty document
            Document doc = new Document();

            // Add the url as a field named "path".  Use a field that is
            // indexed (i.e. searchable), but don't tokenize the field into words.
            doc.Add(new Field("path", f.FullName.Replace(dirSep, '/'), Field.Store.YES, Field.Index.NOT_ANALYZED));

            // Add the last modified date of the file a field named "modified".
            // Use a field that is indexed (i.e. searchable), but don't tokenize
            // the field into words.
            doc.Add(new Field("modified", DateTools.TimeToString(f.LastWriteTime.Millisecond, DateTools.Resolution.MINUTE), Field.Store.YES, Field.Index.NOT_ANALYZED));

            // Add the uid as a field, so that index can be incrementally maintained.
            // This field is not stored with document, it is indexed, but it is not
            // tokenized prior to indexing.
            doc.Add(new Field("uid", Uid(f), Field.Store.NO, Field.Index.NOT_ANALYZED));

            using (var fileStream = f.OpenRead())
            {
                var parser = new HTMLParser(fileStream);

                // Add the tag-stripped contents as a Reader-valued Text field so it will
                // get tokenized and indexed.
                doc.Add(new Field("contents", parser.GetReader()));

                // Add the summary as a field that is stored and returned with
                // hit documents for display.
                doc.Add(new Field("summary", parser.GetSummary(), Field.Store.YES, Field.Index.NO));

                // Add the title as a field that it can be searched and that is stored.
                doc.Add(new Field("title", parser.GetTitle(), Field.Store.YES, Field.Index.ANALYZED));

                // return the document
                return(doc);
            }
        }

예제 #2

파일 보기

파일: HTMLDocument.cs 프로젝트: raol/lucene.net

        public static Document Document(FileInfo f)
		{
			// make a new, empty document
			Document doc = new Document();
			
			// Add the url as a field named "path".  Use a field that is 
			// indexed (i.e. searchable), but don't tokenize the field into words.
			doc.Add(new Field("path", f.FullName.Replace(dirSep, '/'), Field.Store.YES, Field.Index.NOT_ANALYZED));
			
			// Add the last modified date of the file a field named "modified".  
			// Use a field that is indexed (i.e. searchable), but don't tokenize
			// the field into words.
			doc.Add(new Field("modified", DateTools.TimeToString(f.LastWriteTime.Millisecond, DateTools.Resolution.MINUTE), Field.Store.YES, Field.Index.NOT_ANALYZED));
			
			// Add the uid as a field, so that index can be incrementally maintained.
			// This field is not stored with document, it is indexed, but it is not
			// tokenized prior to indexing.
			doc.Add(new Field("uid", Uid(f), Field.Store.NO, Field.Index.NOT_ANALYZED));

            using (var fileStream = f.OpenRead())
            {
                var parser = new HTMLParser(fileStream);

                // Add the tag-stripped contents as a Reader-valued Text field so it will
                // get tokenized and indexed.
                doc.Add(new Field("contents", parser.GetReader()));

                // Add the summary as a field that is stored and returned with
                // hit documents for display.
                doc.Add(new Field("summary", parser.GetSummary(), Field.Store.YES, Field.Index.NO));

                // Add the title as a field that it can be searched and that is stored.
                doc.Add(new Field("title", parser.GetTitle(), Field.Store.YES, Field.Index.ANALYZED));

                // return the document
                return doc;
            }
        }

예제 #3

파일 보기

파일: HTMLDocument.cs 프로젝트: emtees/old-code

        public static Document Document(System.IO.FileInfo f)
        {
            // make a new, empty document
            Document doc = new Document();
			
            // Add the url as a field named "url".  Use an UnIndexed field, so
            // that the url is just stored with the document, but is not searchable.
            doc.Add(Field.UnIndexed("url", f.FullName.Replace(dirSep, '/')));
			
            // Add the last modified date of the file a field named "modified".  Use a
            // Keyword field, so that it's searchable, but so that no attempt is made
            // to tokenize the field into words.
            doc.Add(Field.Keyword("modified", DateField.TimeToString(((f.LastWriteTime.Ticks - 621355968000000000) / 10000))));
			
            // Add the uid as a field, so that index can be incrementally maintained.
            // This field is not stored with document, it is indexed, but it is not
            // tokenized prior to indexing.
            doc.Add(new Field("uid", UID(f), false, true, false));
			
            HTMLParser parser = new HTMLParser(f);
			
            // Add the tag-stripped contents as a Reader-valued Text field so it will
            // get tokenized and indexed.
            doc.Add(Field.Text("contents", parser.GetReader()));
			
            // Add the summary as an UnIndexed field, so that it is stored and returned
            // with hit documents for display.
            doc.Add(Field.UnIndexed("summary", parser.GetSummary()));
			
            // Add the title as a separate Text field, so that it can be searched
            // separately.
            doc.Add(Field.Text("title", parser.GetTitle()));
			
            // return the document
            return doc;
        }

예제 #4

파일 보기

파일: HTMLDocument.cs 프로젝트: yonder/mono

        public static Document Document(System.IO.FileInfo f)
        {
            // make a new, empty document
            Document doc = new Document();

            // Add the url as a field named "url".  Use an UnIndexed field, so
            // that the url is just stored with the document, but is not searchable.
            doc.Add(Field.UnIndexed("url", f.FullName.Replace(dirSep, '/')));

            // Add the last modified date of the file a field named "modified".  Use a
            // Keyword field, so that it's searchable, but so that no attempt is made
            // to tokenize the field into words.
            doc.Add(Field.Keyword("modified", DateField.TimeToString(((f.LastWriteTime.Ticks - 621355968000000000) / 10000))));

            // Add the uid as a field, so that index can be incrementally maintained.
            // This field is not stored with document, it is indexed, but it is not
            // tokenized prior to indexing.
            doc.Add(new Field("uid", UID(f), false, true, false));

            HTMLParser parser = new HTMLParser(f);

            // Add the tag-stripped contents as a Reader-valued Text field so it will
            // get tokenized and indexed.
            doc.Add(Field.Text("contents", parser.GetReader()));

            // Add the summary as an UnIndexed field, so that it is stored and returned
            // with hit documents for display.
            doc.Add(Field.UnIndexed("summary", parser.GetSummary()));

            // Add the title as a separate Text field, so that it can be searched
            // separately.
            doc.Add(Field.Text("title", parser.GetTitle()));

            // return the document
            return(doc);
        }