예제 #1
0
        protected virtual void  SetUp()
        {
            for (int i = 0; i < testFields.Length; i++)
            {
                fieldInfos.Add(testFields[i], true, true);
            }

            try
            {
                System.Array.Sort(testTerms);
                for (int j = 0; j < 5; j++)
                {
                    writer = new TermVectorsWriter(dir, seg, fieldInfos);
                    writer.OpenDocument();

                    for (int k = 0; k < testFields.Length; k++)
                    {
                        writer.OpenField(testFields[k]);
                        for (int i = 0; i < testTerms.Length; i++)
                        {
                            writer.AddTerm(testTerms[i], i);
                        }
                        writer.CloseField();
                    }
                    writer.CloseDocument();
                    writer.Close();
                }
            }
            catch (System.IO.IOException e)
            {
                System.Console.Error.WriteLine(e.StackTrace);
                Assert.IsTrue(false);
            }
        }
예제 #2
0
 public virtual void  TestWriter()
 {
     try
     {
         TermVectorsWriter writer = new TermVectorsWriter(dir, seg, fieldInfos);
         writer.OpenDocument();
         Assert.IsTrue(writer.IsDocumentOpen() == true);
         WriteField(writer, testFields[0]);
         writer.CloseDocument();
         writer.Close();
         Assert.IsTrue(writer.IsDocumentOpen() == false);
         //Check to see the files were created
         Assert.IsTrue(dir.FileExists(seg + TermVectorsWriter.TVD_EXTENSION));
         Assert.IsTrue(dir.FileExists(seg + TermVectorsWriter.TVX_EXTENSION));
         //Now read it back in
         TermVectorsReader reader = new TermVectorsReader(dir, seg, fieldInfos);
         Assert.IsTrue(reader != null);
         CheckTermVector(reader, 0, testFields[0]);
     }
     catch (System.IO.IOException e)
     {
         System.Console.Error.WriteLine(e.StackTrace);
         Assert.IsTrue(false);
     }
 }
예제 #3
0
		protected virtual void  SetUp()
		{
			for (int i = 0; i < testFields.Length; i++)
			{
				fieldInfos.Add(testFields[i], true, true);
			}
			
			try
			{
				System.Array.Sort(testTerms);
				for (int j = 0; j < 5; j++)
				{
					writer = new TermVectorsWriter(dir, seg, fieldInfos);
					writer.OpenDocument();
					
					for (int k = 0; k < testFields.Length; k++)
					{
						writer.OpenField(testFields[k]);
						for (int i = 0; i < testTerms.Length; i++)
						{
							writer.AddTerm(testTerms[i], i);
						}
						writer.CloseField();
					}
					writer.CloseDocument();
					writer.Close();
				}
			}
			catch (System.IO.IOException e)
			{
				System.Console.Error.WriteLine(e.StackTrace);
				Assert.IsTrue(false);
			}
		}
예제 #4
0
        private void  WriteDocument(TermVectorsWriter writer, int numFields)
        {
            writer.OpenDocument();
            Assert.IsTrue(writer.IsDocumentOpen() == true);

            for (int j = 0; j < numFields; j++)
            {
                WriteField(writer, testFields[j]);
            }
            writer.CloseDocument();
            Assert.IsTrue(writer.IsDocumentOpen() == false);
        }
예제 #5
0
		private void  WritePostings(Posting[] postings, System.String segment)
		{
			IndexOutput freq = null, prox = null;
			TermInfosWriter tis = null;
			TermVectorsWriter termVectorWriter = null;
			try
			{
				//open files for inverse index storage
				freq = directory.CreateOutput(segment + ".frq");
				prox = directory.CreateOutput(segment + ".prx");
				tis = new TermInfosWriter(directory, segment, fieldInfos, termIndexInterval);
				TermInfo ti = new TermInfo();
				System.String currentField = null;
				
				for (int i = 0; i < postings.Length; i++)
				{
					Posting posting = postings[i];
					
					// add an entry to the dictionary with pointers to prox and freq files
					ti.Set(1, freq.GetFilePointer(), prox.GetFilePointer(), - 1);
					tis.Add(posting.term, ti);
					
					// add an entry to the freq file
					int postingFreq = posting.freq;
					if (postingFreq == 1)
					// optimize freq=1
						freq.WriteVInt(1);
					// set low bit of doc num.
					else
					{
						freq.WriteVInt(0); // the document number
						freq.WriteVInt(postingFreq); // frequency in doc
					}
					
					int lastPosition = 0; // write positions
					int[] positions = posting.positions;
					for (int j = 0; j < postingFreq; j++)
					{
						// use delta-encoding
						int position = positions[j];
						prox.WriteVInt(position - lastPosition);
						lastPosition = position;
					}
					// check to see if we switched to a new field
					System.String termField = posting.term.Field();
					if (currentField != termField)
					{
						// changing field - see if there is something to save
						currentField = termField;
						FieldInfo fi = fieldInfos.FieldInfo(currentField);
						if (fi.storeTermVector)
						{
							if (termVectorWriter == null)
							{
								termVectorWriter = new TermVectorsWriter(directory, segment, fieldInfos);
								termVectorWriter.OpenDocument();
							}
							termVectorWriter.OpenField(currentField);
						}
						else if (termVectorWriter != null)
						{
							termVectorWriter.CloseField();
						}
					}
					if (termVectorWriter != null && termVectorWriter.IsFieldOpen())
					{
						termVectorWriter.AddTerm(posting.term.Text(), postingFreq, posting.positions, posting.offsets);
					}
				}
				if (termVectorWriter != null)
					termVectorWriter.CloseDocument();
			}
			finally
			{
				// make an effort to close all streams we can but remember and re-throw
				// the first exception encountered in this process
				System.IO.IOException keep = null;
				if (freq != null)
					try
					{
						freq.Close();
					}
					catch (System.IO.IOException e)
					{
						if (keep == null)
							keep = e;
					}
				if (prox != null)
					try
					{
						prox.Close();
					}
					catch (System.IO.IOException e)
					{
						if (keep == null)
							keep = e;
					}
				if (tis != null)
					try
					{
						tis.Close();
					}
					catch (System.IO.IOException e)
					{
						if (keep == null)
							keep = e;
					}
				if (termVectorWriter != null)
					try
					{
						termVectorWriter.Close();
					}
					catch (System.IO.IOException e)
					{
						if (keep == null)
							keep = e;
					}
				if (keep != null)
				{
					throw new System.IO.IOException(keep.StackTrace);
				}
			}
		}
예제 #6
0
		public virtual void  TestWriter()
		{
			try
			{
				TermVectorsWriter writer = new TermVectorsWriter(dir, seg, fieldInfos);
				writer.OpenDocument();
				Assert.IsTrue(writer.IsDocumentOpen() == true);
				WriteField(writer, testFields[0]);
				writer.CloseDocument();
				writer.Close();
				Assert.IsTrue(writer.IsDocumentOpen() == false);
				//Check to see the files were created
				Assert.IsTrue(dir.FileExists(seg + TermVectorsWriter.TVD_EXTENSION));
				Assert.IsTrue(dir.FileExists(seg + TermVectorsWriter.TVX_EXTENSION));
				//Now read it back in
				TermVectorsReader reader = new TermVectorsReader(dir, seg, fieldInfos);
				Assert.IsTrue(reader != null);
				CheckTermVector(reader, 0, testFields[0]);
			}
			catch (System.IO.IOException e)
			{
				System.Console.Error.WriteLine(e.StackTrace);
				Assert.IsTrue(false);
			}
		}
예제 #7
0
		private void  WriteDocument(TermVectorsWriter writer, int numFields)
		{
			writer.OpenDocument();
			Assert.IsTrue(writer.IsDocumentOpen() == true);
			
			for (int j = 0; j < numFields; j++)
			{
				WriteField(writer, testFields[j]);
			}
			writer.CloseDocument();
			Assert.IsTrue(writer.IsDocumentOpen() == false);
		}
예제 #8
0
        private void  WritePostings(Posting[] postings, System.String segment)
        {
            IndexOutput       freq = null, prox = null;
            TermInfosWriter   tis              = null;
            TermVectorsWriter termVectorWriter = null;

            try
            {
                //open files for inverse index storage
                freq = directory.CreateOutput(segment + ".frq");
                prox = directory.CreateOutput(segment + ".prx");
                tis  = new TermInfosWriter(directory, segment, fieldInfos, termIndexInterval);
                TermInfo      ti           = new TermInfo();
                System.String currentField = null;

                for (int i = 0; i < postings.Length; i++)
                {
                    Posting posting = postings[i];

                    // add an entry to the dictionary with pointers to prox and freq files
                    ti.Set(1, freq.GetFilePointer(), prox.GetFilePointer(), -1);
                    tis.Add(posting.term, ti);

                    // add an entry to the freq file
                    int postingFreq = posting.freq;
                    if (postingFreq == 1)
                    {
                        // optimize freq=1
                        freq.WriteVInt(1);
                    }
                    // set low bit of doc num.
                    else
                    {
                        freq.WriteVInt(0);                         // the document number
                        freq.WriteVInt(postingFreq);               // frequency in doc
                    }

                    int   lastPosition = 0;                   // write positions
                    int[] positions    = posting.positions;
                    for (int j = 0; j < postingFreq; j++)
                    {
                        // use delta-encoding
                        int position = positions[j];
                        prox.WriteVInt(position - lastPosition);
                        lastPosition = position;
                    }
                    // check to see if we switched to a new field
                    System.String termField = posting.term.Field();
                    if (currentField != termField)
                    {
                        // changing field - see if there is something to save
                        currentField = termField;
                        FieldInfo fi = fieldInfos.FieldInfo(currentField);
                        if (fi.storeTermVector)
                        {
                            if (termVectorWriter == null)
                            {
                                termVectorWriter = new TermVectorsWriter(directory, segment, fieldInfos);
                                termVectorWriter.OpenDocument();
                            }
                            termVectorWriter.OpenField(currentField);
                        }
                        else if (termVectorWriter != null)
                        {
                            termVectorWriter.CloseField();
                        }
                    }
                    if (termVectorWriter != null && termVectorWriter.IsFieldOpen())
                    {
                        termVectorWriter.AddTerm(posting.term.Text(), postingFreq, posting.positions, posting.offsets);
                    }
                }
                if (termVectorWriter != null)
                {
                    termVectorWriter.CloseDocument();
                }
            }
            finally
            {
                // make an effort to close all streams we can but remember and re-throw
                // the first exception encountered in this process
                System.IO.IOException keep = null;
                if (freq != null)
                {
                    try
                    {
                        freq.Close();
                    }
                    catch (System.IO.IOException e)
                    {
                        if (keep == null)
                        {
                            keep = e;
                        }
                    }
                }
                if (prox != null)
                {
                    try
                    {
                        prox.Close();
                    }
                    catch (System.IO.IOException e)
                    {
                        if (keep == null)
                        {
                            keep = e;
                        }
                    }
                }
                if (tis != null)
                {
                    try
                    {
                        tis.Close();
                    }
                    catch (System.IO.IOException e)
                    {
                        if (keep == null)
                        {
                            keep = e;
                        }
                    }
                }
                if (termVectorWriter != null)
                {
                    try
                    {
                        termVectorWriter.Close();
                    }
                    catch (System.IO.IOException e)
                    {
                        if (keep == null)
                        {
                            keep = e;
                        }
                    }
                }
                if (keep != null)
                {
                    throw new System.IO.IOException(keep.StackTrace);
                }
            }
        }