Lucene.Net.Documents.Document.GetField C# (CSharp) Exemples de code

Exemple #1

0

Afficher le fichier

 public override void OnIndexEntryCreated(string entryKey, Lucene.Net.Documents.Document document)
 {
     lock (parent.DataTable)
     {
         parent.DataTable.Rows.Add(entryKey, document.GetField("Project").StringValue);
     }
 }

Exemple #2

0

Afficher le fichier

Fichier : TestRAMDirectory.cs Projet : stgwilli/ravendb

        public virtual void  TestRAMDirectoryString()
        {
            MockRAMDirectory ramDir = new MockRAMDirectory(indexDir.FullName);

            // Check size
            Assert.AreEqual(ramDir.SizeInBytes(), ramDir.GetRecomputedSizeInBytes());

            // open reader to test document count
            IndexReader reader = IndexReader.Open(ramDir);

            Assert.AreEqual(docsToAdd, reader.NumDocs());

            // open search zo check if all doc's are there
            IndexSearcher searcher = new IndexSearcher(reader);

            // search for all documents
            for (int i = 0; i < docsToAdd; i++)
            {
                Document doc = searcher.Doc(i);
                Assert.IsTrue(doc.GetField("content") != null);
            }

            // cleanup
            reader.Close();
            searcher.Close();
        }

Exemple #3

0

Afficher le fichier

Fichier : MapReduceQueryResultRetriever.cs Projet : rstonkus/ravendb

        protected override unsafe Document DirectGet(Lucene.Net.Documents.Document input, string id, IState state)
        {
            var reduceValue = input.GetField(Constants.Documents.Indexing.Fields.ReduceKeyValueFieldName).GetBinaryValue(state);

            var result = new BlittableJsonReaderObject((byte *)_context.PinObjectAndGetAddress(reduceValue), reduceValue.Length, _context);

            return(new Document
            {
                Data = result
            });
        }

Exemple #4

0

Afficher le fichier

Fichier : Extractor.cs Projet : mechanicpanic/Chat-Corpora-Annotator

 public static void Extract()
 {
     if (LuceneService.DirReader != null)
     {
         //This is actually pathetic. Please let me leave and enjoy my shitty code, thank you.
         for (int i = 0; i < LuceneService.DirReader.MaxDoc; i++)
         {
             Lucene.Net.Documents.Document document = LuceneService.DirReader.Document(i);
             CoreDocument coredoc = GetAnnotatedDocument(document.GetField(ProjectInfo.TextFieldKey).GetStringValue());
             ExtractNERTags(coredoc, document);
             //IsQuestionList.Add(document.GetField("id").GetStringValue(), DetectQuestion(coredoc));
             if (DetectQuestion(coredoc))
             {
                 IsQuestionList.Add(document.GetField("id").GetInt32Value().Value);
             }
             ExtractKeyPhrases(coredoc, document.GetField("id").GetInt32Value().Value);
             System.Console.WriteLine(i);
         }
     }
 }

Exemple #5

0

Afficher le fichier

Fichier : TestFieldsReader.cs Projet : raj581/Marvin

 public virtual void  Test()
 {
     Assert.IsTrue(dir != null);
     Assert.IsTrue(fieldInfos != null);
     try
     {
         FieldsReader reader = new FieldsReader(dir, "test", fieldInfos);
         Assert.IsTrue(reader != null);
         Assert.IsTrue(reader.Size() == 1);
         Document doc = reader.Doc(0);
         Assert.IsTrue(doc != null);
         Assert.IsTrue(doc.GetField("textField1") != null);
         Field field = doc.GetField("textField2");
         Assert.IsTrue(field != null);
         Assert.IsTrue(field.IsTermVectorStored() == true);
         reader.Close();
     }
     catch (System.IO.IOException e)
     {
         System.Console.Error.WriteLine(e.StackTrace);
         Assert.IsTrue(false);
     }
 }

Exemple #6

0

Afficher le fichier

        protected override unsafe Document DirectGet(Lucene.Net.Documents.Document input, string id, DocumentFields fields, IState state)
        {
            var reduceValue = input.GetField(Constants.Documents.Indexing.Fields.ReduceKeyValueFieldName).GetBinaryValue(state);

            var allocation = _context.GetMemory(reduceValue.Length);

            UnmanagedWriteBuffer buffer = new UnmanagedWriteBuffer(_context, allocation);

            buffer.Write(reduceValue, 0, reduceValue.Length);

            var result = new BlittableJsonReaderObject(allocation.Address, reduceValue.Length, _context, buffer);

            return(new Document
            {
                Data = result
            });
        }

Exemple #7

0

Afficher le fichier

        private bool VerifyIndex(Directory directory, int startAt)
        {
            bool        fail   = false;
            IndexReader reader = IndexReader.Open(directory, true, null);

            int max = reader.MaxDoc;

            for (int i = 0; i < max; i++)
            {
                Document temp = reader.Document(i, null);
                //System.out.println("doc "+i+"="+temp.getField("count").StringValue(null)());
                //compare the index doc number to the value that it should be
                if (!temp.GetField("count").StringValue(null).Equals((i + startAt) + ""))
                {
                    fail = true;
                    System.Console.Out.WriteLine("Document " + (i + startAt) + " is returning document " + temp.GetField("count").StringValue(null));
                }
            }
            reader.Close();
            return(fail);
        }

Exemple #8

0

Afficher le fichier

Fichier : Extractor.cs Projet : mechanicpanic/Chat-Corpora-Annotator

        private static void ExtractNouns(CoreDocument coredoc, Lucene.Net.Documents.Document document)
        {
            List <string> nouns = new List <string>();

            for (int i = 0; i < coredoc.sentences().size(); i++)
            {
                CoreSentence sent = (CoreSentence)coredoc.sentences().get(i);
                for (int j = 0; j < sent.tokens().size(); j++)
                {
                    // Condition: if the word is a noun (posTag starts with "NN")
                    if (sent.posTags() != null && sent.posTags().get(j) != null)
                    {
                        string posTags = sent.posTags().get(j).ToString();
                        if (posTags.Contains("NN"))
                        {
                            var noun = sent.tokens().get(j).ToString();
                            noun = noun.Remove(noun.Length - 2);
                            nouns.Add(noun);
                        }
                    }
                }
            }
            NounPhrases.Add(document.GetField("id").GetInt32Value().Value, nouns);
        }

Exemple #9

0

Afficher le fichier

Fichier : TestBackwardsCompatibility.cs Projet : vivekshimpi01/lucene.net

        public virtual void  searchIndex(System.String dirName, System.String oldName)
        {
            //QueryParser parser = new QueryParser("contents", new WhitespaceAnalyzer());
            //Query query = parser.parse("handle:1");

            dirName = FullDir(dirName);

            Directory     dir      = FSDirectory.Open(new System.IO.FileInfo(dirName));
            IndexSearcher searcher = new IndexSearcher(dir);
            IndexReader   reader   = searcher.GetIndexReader();

            _TestUtil.CheckIndex(dir);

            for (int i = 0; i < 35; i++)
            {
                if (!reader.IsDeleted(i))
                {
                    Document d = reader.Document(i);
                    System.Collections.IList fields = d.GetFields();
                    if (!oldName.StartsWith("19.") && !oldName.StartsWith("20.") && !oldName.StartsWith("21.") && !oldName.StartsWith("22."))
                    {
                        if (d.GetField("content3") == null)
                        {
                            Assert.AreEqual(5, fields.Count);
                            Field f = (Field)d.GetField("id");
                            Assert.AreEqual("" + i, f.StringValue());

                            f = (Field)d.GetField("utf8");
                            Assert.AreEqual("Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", f.StringValue());

                            f = (Field)d.GetField("autf8");
                            Assert.AreEqual("Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", f.StringValue());

                            f = (Field)d.GetField("content2");
                            Assert.AreEqual("here is more content with aaa aaa aaa", f.StringValue());

                            f = (Field)d.GetField("fie\u2C77ld");
                            Assert.AreEqual("field with non-ascii name", f.StringValue());
                        }
                    }
                }
                // Only ID 7 is deleted
                else
                {
                    Assert.AreEqual(7, i);
                }
            }

            ScoreDoc[] hits = searcher.Search(new TermQuery(new Term("content", "aaa")), null, 1000).ScoreDocs;

            // First document should be #21 since it's norm was
            // increased:
            Document d2 = searcher.Doc(hits[0].doc);

            Assert.AreEqual("21", d2.Get("id"), "didn't get the right document first");

            TestHits(hits, 34, searcher.GetIndexReader());

            if (!oldName.StartsWith("19.") && !oldName.StartsWith("20.") && !oldName.StartsWith("21.") && !oldName.StartsWith("22."))
            {
                // Test on indices >= 2.3
                hits = searcher.Search(new TermQuery(new Term("utf8", "\u0000")), null, 1000).ScoreDocs;
                Assert.AreEqual(34, hits.Length);
                hits = searcher.Search(new TermQuery(new Term("utf8", "Lu\uD834\uDD1Ece\uD834\uDD60ne")), null, 1000).ScoreDocs;
                Assert.AreEqual(34, hits.Length);
                hits = searcher.Search(new TermQuery(new Term("utf8", "ab\ud917\udc17cd")), null, 1000).ScoreDocs;
                Assert.AreEqual(34, hits.Length);
            }

            searcher.Close();
            dir.Close();
        }

Exemple #10

0

Afficher le fichier

Fichier : DocumentBuilderSpec.cs Projet : DuckZer0/FluentLucene

            public void array_is_flat()
            {
                var result = _document.GetField("ListOfItems").StringValue();

                result.ShouldEqual("One Two");
            }

Exemple #11

0

Afficher le fichier

Fichier : TestIndexWriter.cs Projet : Rationalle/ravendb

		public virtual void  TestIndexStoreCombos()
		{
			MockRAMDirectory dir = new MockRAMDirectory();
			IndexWriter w = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED);
			byte[] b = new byte[50];
			for (int i = 0; i < 50; i++)
				b[i] = (byte) (i + 77);
			
			Document doc = new Document();
			Field f = new Field("binary", b, 10, 17, Field.Store.YES);
			f.SetTokenStream(new WhitespaceTokenizer(new System.IO.StringReader("doc1field1")));
			Field f2 = new Field("string", "value", Field.Store.YES, Field.Index.ANALYZED);
			f2.SetTokenStream(new WhitespaceTokenizer(new System.IO.StringReader("doc1field2")));
			doc.Add(f);
			doc.Add(f2);
			w.AddDocument(doc);
			
			// add 2 docs to test in-memory merging
			f.SetTokenStream(new WhitespaceTokenizer(new System.IO.StringReader("doc2field1")));
			f2.SetTokenStream(new WhitespaceTokenizer(new System.IO.StringReader("doc2field2")));
			w.AddDocument(doc);
			
			// force segment flush so we can force a segment merge with doc3 later.
			w.Commit();
			
			f.SetTokenStream(new WhitespaceTokenizer(new System.IO.StringReader("doc3field1")));
			f2.SetTokenStream(new WhitespaceTokenizer(new System.IO.StringReader("doc3field2")));
			
			w.AddDocument(doc);
			w.Commit();
			w.Optimize(); // force segment merge.
			
			IndexReader ir = IndexReader.Open(dir);
			doc = ir.Document(0);
			f = doc.GetField("binary");
			b = f.GetBinaryValue();
			Assert.IsTrue(b != null);
			Assert.AreEqual(17, b.Length, 17);
			Assert.AreEqual(87, b[0]);
			
			Assert.IsTrue(ir.Document(0).GetFieldable("binary").IsBinary());
			Assert.IsTrue(ir.Document(1).GetFieldable("binary").IsBinary());
			Assert.IsTrue(ir.Document(2).GetFieldable("binary").IsBinary());
			
			Assert.AreEqual("value", ir.Document(0).Get("string"));
			Assert.AreEqual("value", ir.Document(1).Get("string"));
			Assert.AreEqual("value", ir.Document(2).Get("string"));
			
			
			// test that the terms were indexed.
			Assert.IsTrue(ir.TermDocs(new Term("binary", "doc1field1")).Next());
			Assert.IsTrue(ir.TermDocs(new Term("binary", "doc2field1")).Next());
			Assert.IsTrue(ir.TermDocs(new Term("binary", "doc3field1")).Next());
			Assert.IsTrue(ir.TermDocs(new Term("string", "doc1field2")).Next());
			Assert.IsTrue(ir.TermDocs(new Term("string", "doc2field2")).Next());
			Assert.IsTrue(ir.TermDocs(new Term("string", "doc3field2")).Next());
			
			ir.Close();
			dir.Close();
		}

Exemple #12

0

Afficher le fichier

Fichier : TestIndexWriter.cs Projet : Rationalle/ravendb

		public virtual void  TestMergeCompressedFields()
		{
			System.IO.FileInfo indexDir = new System.IO.FileInfo(System.IO.Path.Combine(SupportClass.AppSettings.Get("tempDir", ""), "mergecompressedfields"));
			Directory dir = FSDirectory.Open(indexDir);
			try
			{
				for (int i = 0; i < 5; i++)
				{
					// Must make a new writer & doc each time, w/
					// different fields, so bulk merge of stored fields
					// cannot run:
					IndexWriter w = new IndexWriter(dir, new WhitespaceAnalyzer(), i == 0, IndexWriter.MaxFieldLength.UNLIMITED);
					try
					{
						w.SetMergeFactor(5);
						w.SetMergeScheduler(new SerialMergeScheduler());
						Document doc = new Document();
						doc.Add(new Field("test1", "this is some data that will be compressed this this this", Field.Store.COMPRESS, Field.Index.NO));
						doc.Add(new Field("test2", new byte[20], Field.Store.COMPRESS));
						doc.Add(new Field("field" + i, "random field", Field.Store.NO, Field.Index.ANALYZED));
						w.AddDocument(doc);
					}
					finally
					{
						w.Close();
					}
				}
				
				byte[] cmp = new byte[20];
				
				IndexReader r = IndexReader.Open(dir);
				try
				{
					for (int i = 0; i < 5; i++)
					{
						Document doc = r.Document(i);
						Assert.AreEqual(doc.GetField("test1").StringValue(), "this is some data that will be compressed this this this");
						byte[] b = doc.GetField("test2").BinaryValue();
                        Assert.AreEqual(b.Length, cmp.Length);
                        for (int j = 0; j < b.Length; j++)
                            Assert.AreEqual(b[j], cmp[j]);
					}
				}
				finally
				{
					r.Close();
				}
			}
			finally
			{
				dir.Close();
				_TestUtil.RmDir(indexDir);
			}
		}

Exemple #13

0

Afficher le fichier

Fichier : TestIndexWriter.cs Projet : Rationalle/ravendb

		public virtual void  TestBinaryFieldOffsetLength()
		{
			MockRAMDirectory dir = new MockRAMDirectory();
			IndexWriter w = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED);
			byte[] b = new byte[50];
			for (int i = 0; i < 50; i++)
				b[i] = (byte) (i + 77);
			
			Document doc = new Document();
			Field f = new Field("binary", b, 10, 17, Field.Store.YES);
			byte[] bx = f.GetBinaryValue();
			Assert.IsTrue(bx != null);
			Assert.AreEqual(50, bx.Length);
			Assert.AreEqual(10, f.GetBinaryOffset());
			Assert.AreEqual(17, f.GetBinaryLength());
			doc.Add(f);
			w.AddDocument(doc);
			w.Close();
			
			IndexReader ir = IndexReader.Open(dir);
			doc = ir.Document(0);
			f = doc.GetField("binary");
			b = f.GetBinaryValue();
			Assert.IsTrue(b != null);
			Assert.AreEqual(17, b.Length, 17);
			Assert.AreEqual(87, b[0]);
			ir.Close();
			dir.Close();
		}

Exemple #14

0

Afficher le fichier

Fichier : Extractor.cs Projet : mechanicpanic/Chat-Corpora-Annotator

        private static void ExtractNERTags(CoreDocument coredoc, Lucene.Net.Documents.Document document)
        {
            //I have no clue as to why NER-tagged messages are stored like that. I guess there is some deep idea behind copying the same info over and over again (or, most likely, this is because some documents have more than one sentence. even tho its stil really stupid)
            if (coredoc != null)
            {
                List nerList = coredoc.entityMentions();
                if (nerList.size() > 0)
                {
                    for (int j = 0; j < nerList.size(); j++)
                    {
                        CoreEntityMention em = (CoreEntityMention)nerList.get(j);
                        //Does this need to be a switch case?
                        if (em.entityType() == "DATE")
                        {
                            var datekey = document.GetField("id").GetInt32Value().Value;
                            if (!DateList.ContainsKey(datekey))
                            {
                                DateList.Add(datekey, em.text());
                            }
                            else
                            {
                                DateList.TryUpdate(datekey, DateList[datekey] + ", " + em.text());
                            }
                        }
                        if (em.entityType() == "TIME")
                        {
                            var timekey = document.GetField("id").GetInt32Value().Value;
                            if (!TimeList.ContainsKey(timekey))
                            {
                                TimeList.Add(timekey, em.text());
                            }
                            else
                            {
                                TimeList.TryUpdate(timekey, TimeList[timekey] + ", " + em.text());
                            }
                        }

                        if (em.entityType() == "LOCATION")
                        {
                            var lockey = document.GetField("id").GetInt32Value().Value;
                            if (!LocList.ContainsKey(lockey))
                            {
                                LocList.Add(lockey, em.text());
                            }
                            else
                            {
                                LocList.TryUpdate(lockey, LocList[lockey] + ", " + em.text());
                            }
                        }
                        if (em.entityType() == "ORGANIZATION")
                        {
                            var orgkey = document.GetField("id").GetInt32Value().Value;
                            if (!OrgList.ContainsKey(orgkey))
                            {
                                OrgList.Add(orgkey, em.text());
                            }
                            else
                            {
                                OrgList.TryUpdate(orgkey, OrgList[orgkey] + ", " + em.text());
                            }
                        }

                        if (em.entityType() == "URL")
                        {
                            var urlkey = document.GetField("id").GetInt32Value().Value;
                            if (!URLList.ContainsKey(urlkey))
                            {
                                URLList.Add(urlkey, em.text());
                            }
                            else
                            {
                                URLList.TryUpdate(urlkey, OrgList[urlkey] + ", " + em.text());
                            }
                        }
                    }
                }
            }
        }

Exemple #15

0

Afficher le fichier

 public override void OnIndexEntryCreated(string indexName, string entryKey, Lucene.Net.Documents.Document document)
 {
     DataTable.Rows.Add(entryKey, document.GetField("Project").StringValue());
 }

C# (CSharp) Lucene.Net.Documents.Document.GetField Exemples