Exemple #1
0
 public virtual void  TestWriter()
 {
     try
     {
         TermVectorsWriter writer = new TermVectorsWriter(dir, seg, fieldInfos);
         writer.OpenDocument();
         Assert.IsTrue(writer.IsDocumentOpen() == true);
         WriteField(writer, testFields[0]);
         writer.CloseDocument();
         writer.Close();
         Assert.IsTrue(writer.IsDocumentOpen() == false);
         //Check to see the files were created
         Assert.IsTrue(dir.FileExists(seg + TermVectorsWriter.TVD_EXTENSION));
         Assert.IsTrue(dir.FileExists(seg + TermVectorsWriter.TVX_EXTENSION));
         //Now read it back in
         TermVectorsReader reader = new TermVectorsReader(dir, seg, fieldInfos);
         Assert.IsTrue(reader != null);
         CheckTermVector(reader, 0, testFields[0]);
     }
     catch (System.IO.IOException e)
     {
         System.Console.Error.WriteLine(e.StackTrace);
         Assert.IsTrue(false);
     }
 }
Exemple #2
0
        /// <summary> Merge the TermVectors from each of the segments into the new one.</summary>
        /// <throws>  IOException </throws>
        private void  MergeVectors()
        {
            TermVectorsWriter termVectorsWriter = new TermVectorsWriter(directory, segment, fieldInfos);

            try
            {
                for (int r = 0; r < readers.Count; r++)
                {
                    IndexReader reader = (IndexReader)readers[r];
                    int         maxDoc = reader.MaxDoc();
                    for (int docNum = 0; docNum < maxDoc; docNum++)
                    {
                        // skip deleted docs
                        if (reader.IsDeleted(docNum))
                        {
                            continue;
                        }
                        termVectorsWriter.AddAllDocVectors(reader.GetTermFreqVectors(docNum));
                        if (checkAbort != null)
                        {
                            checkAbort.Work(300);
                        }
                    }
                }
            }
            finally
            {
                termVectorsWriter.Close();
            }
        }
Exemple #3
0
 public virtual void  TestMultipleDocuments()
 {
     try
     {
         TermVectorsWriter writer = new TermVectorsWriter(dir, seg, fieldInfos);
         Assert.IsTrue(writer != null);
         for (int i = 0; i < 10; i++)
         {
             WriteDocument(writer, testFields.Length);
         }
         writer.Close();
     }
     catch (System.IO.IOException e)
     {
         System.Console.Error.WriteLine(e.StackTrace);
         Assert.IsTrue(false);
     }
     //Do some arbitrary tests
     try
     {
         TermVectorsReader reader = new TermVectorsReader(dir, seg, fieldInfos);
         for (int i = 0; i < 10; i++)
         {
             Assert.IsTrue(reader != null);
             CheckTermVector(reader, 5, testFields[0]);
             CheckTermVector(reader, 2, testFields[2]);
         }
     }
     catch (System.IO.IOException e)
     {
         System.Console.Error.WriteLine(e.StackTrace);
         Assert.IsTrue(false);
     }
 }
Exemple #4
0
        protected virtual void  SetUp()
        {
            for (int i = 0; i < testFields.Length; i++)
            {
                fieldInfos.Add(testFields[i], true, true);
            }

            try
            {
                System.Array.Sort(testTerms);
                for (int j = 0; j < 5; j++)
                {
                    writer = new TermVectorsWriter(dir, seg, fieldInfos);
                    writer.OpenDocument();

                    for (int k = 0; k < testFields.Length; k++)
                    {
                        writer.OpenField(testFields[k]);
                        for (int i = 0; i < testTerms.Length; i++)
                        {
                            writer.AddTerm(testTerms[i], i);
                        }
                        writer.CloseField();
                    }
                    writer.CloseDocument();
                    writer.Close();
                }
            }
            catch (System.IO.IOException e)
            {
                System.Console.Error.WriteLine(e.StackTrace);
                Assert.IsTrue(false);
            }
        }
		protected virtual void  SetUp()
		{
			for (int i = 0; i < testFields.Length; i++)
			{
				fieldInfos.Add(testFields[i], true, true);
			}
			
			try
			{
				System.Array.Sort(testTerms);
				for (int j = 0; j < 5; j++)
				{
					writer = new TermVectorsWriter(dir, seg, fieldInfos);
					writer.OpenDocument();
					
					for (int k = 0; k < testFields.Length; k++)
					{
						writer.OpenField(testFields[k]);
						for (int i = 0; i < testTerms.Length; i++)
						{
							writer.AddTerm(testTerms[i], i);
						}
						writer.CloseField();
					}
					writer.CloseDocument();
					writer.Close();
				}
			}
			catch (System.IO.IOException e)
			{
				System.Console.Error.WriteLine(e.StackTrace);
				Assert.IsTrue(false);
			}
		}
        /// <summary> Merge the TermVectors from each of the segments into the new one.</summary>
        /// <throws>  IOException </throws>
        private void  MergeVectors()
        {
            TermVectorsWriter termVectorsWriter = new TermVectorsWriter(directory, segment, fieldInfos);

            try
            {
                int idx = 0;
                for (System.Collections.IEnumerator iter = readers.GetEnumerator(); iter.MoveNext();)
                {
                    SegmentReader     matchingSegmentReader = matchingSegmentReaders[idx++];
                    TermVectorsReader matchingVectorsReader = null;
                    if (matchingSegmentReader != null)
                    {
                        TermVectorsReader vectorsReader = matchingSegmentReader.GetTermVectorsReaderOrig();

                        // If the TV* files are an older format then they cannot read raw docs:
                        if (vectorsReader != null && vectorsReader.CanReadRawDocs())
                        {
                            matchingVectorsReader = vectorsReader;
                        }
                    }
                    IndexReader reader = (IndexReader)iter.Current;
                    if (reader.HasDeletions())
                    {
                        CopyVectorsWithDeletions(termVectorsWriter, matchingVectorsReader, reader);
                    }
                    else
                    {
                        CopyVectorsNoDeletions(termVectorsWriter, matchingVectorsReader, reader);
                    }
                }
            }
            finally
            {
                termVectorsWriter.Close();
            }

            System.String fileName = segment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION;
            long          tvxSize  = directory.FileLength(fileName);

            if (4 + ((long)mergedDocs) * 16 != tvxSize)
            {
                // This is most likely a bug in Sun JRE 1.6.0_04/_05;
                // we detect that the bug has struck, here, and
                // throw an exception to prevent the corruption from
                // entering the index.  See LUCENE-1282 for
                // details.
                throw new System.SystemException("mergeVectors produced an invalid result: mergedDocs is " + mergedDocs + " but tvx size is " + tvxSize + " file=" + fileName + " file exists?=" + directory.FileExists(fileName) + "; now aborting this merge to prevent index corruption");
            }
        }
Exemple #7
0
		/// <summary> Merge the TermVectors from each of the segments into the new one.</summary>
		/// <throws>  IOException </throws>
		private void  MergeVectors()
		{
			TermVectorsWriter termVectorsWriter = new TermVectorsWriter(directory, segment, fieldInfos);
			
			try
			{
				int idx = 0;
				for (System.Collections.IEnumerator iter = readers.GetEnumerator(); iter.MoveNext(); )
				{
					SegmentReader matchingSegmentReader = matchingSegmentReaders[idx++];
					TermVectorsReader matchingVectorsReader = null;
					if (matchingSegmentReader != null)
					{
						TermVectorsReader vectorsReader = matchingSegmentReader.GetTermVectorsReaderOrig();
						
						// If the TV* files are an older format then they cannot read raw docs:
						if (vectorsReader != null && vectorsReader.CanReadRawDocs())
						{
							matchingVectorsReader = vectorsReader;
						}
					}
					IndexReader reader = (IndexReader) iter.Current;
					if (reader.HasDeletions())
					{
						CopyVectorsWithDeletions(termVectorsWriter, matchingVectorsReader, reader);
					}
					else
					{
						CopyVectorsNoDeletions(termVectorsWriter, matchingVectorsReader, reader);
					}
				}
			}
			finally
			{
				termVectorsWriter.Close();
			}
			
			System.String fileName = segment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION;
			long tvxSize = directory.FileLength(fileName);
			
			if (4 + ((long) mergedDocs) * 16 != tvxSize)
			// This is most likely a bug in Sun JRE 1.6.0_04/_05;
			// we detect that the bug has struck, here, and
			// throw an exception to prevent the corruption from
			// entering the index.  See LUCENE-1282 for
			// details.
				throw new System.SystemException("mergeVectors produced an invalid result: mergedDocs is " + mergedDocs + " but tvx size is " + tvxSize + " file=" + fileName + " file exists?=" + directory.FileExists(fileName) + "; now aborting this merge to prevent index corruption");
		}
		public virtual void  TestWriter()
		{
			try
			{
				TermVectorsWriter writer = new TermVectorsWriter(dir, seg, fieldInfos);
				writer.OpenDocument();
				Assert.IsTrue(writer.IsDocumentOpen() == true);
				WriteField(writer, testFields[0]);
				writer.CloseDocument();
				writer.Close();
				Assert.IsTrue(writer.IsDocumentOpen() == false);
				//Check to see the files were created
				Assert.IsTrue(dir.FileExists(seg + TermVectorsWriter.TVD_EXTENSION));
				Assert.IsTrue(dir.FileExists(seg + TermVectorsWriter.TVX_EXTENSION));
				//Now read it back in
				TermVectorsReader reader = new TermVectorsReader(dir, seg, fieldInfos);
				Assert.IsTrue(reader != null);
				CheckTermVector(reader, 0, testFields[0]);
			}
			catch (System.IO.IOException e)
			{
				System.Console.Error.WriteLine(e.StackTrace);
				Assert.IsTrue(false);
			}
		}
		public virtual void  TestMultipleDocuments()
		{
			
			try
			{
				TermVectorsWriter writer = new TermVectorsWriter(dir, seg, fieldInfos);
				Assert.IsTrue(writer != null);
				for (int i = 0; i < 10; i++)
				{
					WriteDocument(writer, testFields.Length);
				}
				writer.Close();
			}
			catch (System.IO.IOException e)
			{
				System.Console.Error.WriteLine(e.StackTrace);
				Assert.IsTrue(false);
			}
			//Do some arbitrary tests
			try
			{
				TermVectorsReader reader = new TermVectorsReader(dir, seg, fieldInfos);
				for (int i = 0; i < 10; i++)
				{
					Assert.IsTrue(reader != null);
					CheckTermVector(reader, 5, testFields[0]);
					CheckTermVector(reader, 2, testFields[2]);
				}
			}
			catch (System.IO.IOException e)
			{
				System.Console.Error.WriteLine(e.StackTrace);
				Assert.IsTrue(false);
			}
		}
        /// <summary> Merge the TermVectors from each of the segments into the new one.</summary>
        /// <throws>  IOException </throws>
        private void MergeVectors()
        {
            TermVectorsWriter termVectorsWriter = new TermVectorsWriter(directory, segment, fieldInfos);

            try
            {
                for (int r = 0; r < readers.Count; r++)
                {
                    IndexReader reader = (IndexReader) readers[r];
                    int maxDoc = reader.MaxDoc();
                    for (int docNum = 0; docNum < maxDoc; docNum++)
                    {
                        // skip deleted docs
                        if (reader.IsDeleted(docNum))
                            continue;
                        termVectorsWriter.AddAllDocVectors(reader.GetTermFreqVectors(docNum));
                    }
                }
            }
            finally
            {
                termVectorsWriter.Close();
            }
        }
        /// <summary> Merge the TermVectors from each of the segments into the new one.</summary>
        /// <throws>  IOException </throws>
        private void MergeVectors()
        {
            TermVectorsWriter termVectorsWriter =
              new TermVectorsWriter(directory, segment, fieldInfos);

            try
            {
                for (int r = 0; r < readers.Count; r++)
                {
                    SegmentReader matchingSegmentReader = matchingSegmentReaders[r];
                    TermVectorsReader matchingVectorsReader;
                    bool hasMatchingReader;
                    if (matchingSegmentReader != null)
                    {
                        matchingVectorsReader = matchingSegmentReader.termVectorsReaderOrig;

                        // If the TV* files are an older format then they
                        // cannot read raw docs:
                        if (matchingVectorsReader != null && !matchingVectorsReader.CanReadRawDocs())
                        {
                            matchingVectorsReader = null;
                            hasMatchingReader = false;
                        }
                        else
                            hasMatchingReader = matchingVectorsReader != null;

                    }
                    else
                    {
                        hasMatchingReader = false;
                        matchingVectorsReader = null;
                    }
                    IndexReader reader = (IndexReader)readers[r];
                    bool hasDeletions = reader.HasDeletions();
                    int maxDoc = reader.MaxDoc();
                    for (int docNum = 0; docNum < maxDoc; )
                    {
                        // skip deleted docs
                        if (!hasDeletions || !reader.IsDeleted(docNum))
                        {
                            if (hasMatchingReader)
                            {
                                // We can optimize this case (doing a bulk
                                // byte copy) since the field numbers are
                                // identical
                                int start = docNum;
                                int numDocs = 0;
                                do
                                {
                                    docNum++;
                                    numDocs++;
                                    if (docNum >= maxDoc)
                                        break;
                                    if (hasDeletions && matchingSegmentReader.IsDeleted(docNum))
                                    {
                                        docNum++;
                                        break;
                                    }
                                } while (numDocs < MAX_RAW_MERGE_DOCS);

                                matchingVectorsReader.RawDocs(rawDocLengths, rawDocLengths2, start, numDocs);
                                termVectorsWriter.AddRawDocuments(matchingVectorsReader, rawDocLengths, rawDocLengths2, numDocs);
                                if (checkAbort != null)
                                    checkAbort.Work(300 * numDocs);
                            }
                            else
                            {
                                // NOTE: it's very important to first assign
                                // to vectors then pass it to
                                // termVectorsWriter.addAllDocVectors; see
                                // LUCENE-1282
                                TermFreqVector[] vectors = reader.GetTermFreqVectors(docNum);
                                termVectorsWriter.AddAllDocVectors(vectors);
                                docNum++;
                                if (checkAbort != null)
                                    checkAbort.Work(300);
                            }
                        }
                        else
                            docNum++;
                    }
                }
            }
            finally
            {
                termVectorsWriter.Close();
            }

            long tvxSize = directory.FileLength(segment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION);

            // {{dougsale-2.4.0}
            // this shouldn't be a problem for us - if it is,
            // then it's not a JRE bug
            //if (4 + mergedDocs * 16 != tvxSize)
            //  // This is most likely a bug in Sun JRE 1.6.0_04/_05;
            //  // we detect that the bug has struck, here, and
            //  // throw an exception to prevent the corruption from
            //  // entering the index.  See LUCENE-1282 for
            //  // details.
            //  throw new RuntimeException("mergeVectors produced an invalid result: mergedDocs is " + mergedDocs + " but tvx size is " + tvxSize + "; now aborting this merge to prevent index corruption");
        }
Exemple #12
0
        /// <summary> Merge the TermVectors from each of the segments into the new one.</summary>
        /// <throws>  IOException </throws>
        private void MergeVectors()
        {
            TermVectorsWriter termVectorsWriter =
                new TermVectorsWriter(directory, segment, fieldInfos);

            try
            {
                for (int r = 0; r < readers.Count; r++)
                {
                    SegmentReader     matchingSegmentReader = matchingSegmentReaders[r];
                    TermVectorsReader matchingVectorsReader;
                    bool hasMatchingReader;
                    if (matchingSegmentReader != null)
                    {
                        matchingVectorsReader = matchingSegmentReader.termVectorsReaderOrig;

                        // If the TV* files are an older format then they
                        // cannot read raw docs:
                        if (matchingVectorsReader != null && !matchingVectorsReader.CanReadRawDocs())
                        {
                            matchingVectorsReader = null;
                            hasMatchingReader     = false;
                        }
                        else
                        {
                            hasMatchingReader = matchingVectorsReader != null;
                        }
                    }
                    else
                    {
                        hasMatchingReader     = false;
                        matchingVectorsReader = null;
                    }
                    IndexReader reader       = (IndexReader)readers[r];
                    bool        hasDeletions = reader.HasDeletions();
                    int         maxDoc       = reader.MaxDoc();
                    for (int docNum = 0; docNum < maxDoc;)
                    {
                        // skip deleted docs
                        if (!hasDeletions || !reader.IsDeleted(docNum))
                        {
                            if (hasMatchingReader)
                            {
                                // We can optimize this case (doing a bulk
                                // byte copy) since the field numbers are
                                // identical
                                int start   = docNum;
                                int numDocs = 0;
                                do
                                {
                                    docNum++;
                                    numDocs++;
                                    if (docNum >= maxDoc)
                                    {
                                        break;
                                    }
                                    if (hasDeletions && matchingSegmentReader.IsDeleted(docNum))
                                    {
                                        docNum++;
                                        break;
                                    }
                                } while (numDocs < MAX_RAW_MERGE_DOCS);

                                matchingVectorsReader.RawDocs(rawDocLengths, rawDocLengths2, start, numDocs);
                                termVectorsWriter.AddRawDocuments(matchingVectorsReader, rawDocLengths, rawDocLengths2, numDocs);
                                if (checkAbort != null)
                                {
                                    checkAbort.Work(300 * numDocs);
                                }
                            }
                            else
                            {
                                // NOTE: it's very important to first assign
                                // to vectors then pass it to
                                // termVectorsWriter.addAllDocVectors; see
                                // LUCENE-1282
                                TermFreqVector[] vectors = reader.GetTermFreqVectors(docNum);
                                termVectorsWriter.AddAllDocVectors(vectors);
                                docNum++;
                                if (checkAbort != null)
                                {
                                    checkAbort.Work(300);
                                }
                            }
                        }
                        else
                        {
                            docNum++;
                        }
                    }
                }
            }
            finally
            {
                termVectorsWriter.Close();
            }

            long tvxSize = directory.FileLength(segment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION);

            // {{dougsale-2.4.0}
            // this shouldn't be a problem for us - if it is,
            // then it's not a JRE bug
            //if (4 + mergedDocs * 16 != tvxSize)
            //  // This is most likely a bug in Sun JRE 1.6.0_04/_05;
            //  // we detect that the bug has struck, here, and
            //  // throw an exception to prevent the corruption from
            //  // entering the index.  See LUCENE-1282 for
            //  // details.
            //  throw new RuntimeException("mergeVectors produced an invalid result: mergedDocs is " + mergedDocs + " but tvx size is " + tvxSize + "; now aborting this merge to prevent index corruption");
        }
		/// <summary> Merge the TermVectors from each of the segments into the new one.</summary>
		/// <throws>  IOException </throws>
		private void  MergeVectors()
		{
			TermVectorsWriter termVectorsWriter = new TermVectorsWriter(directory, segment, fieldInfos);
			
			try
			{
				for (int r = 0; r < readers.Count; r++)
				{
					IndexReader reader = (IndexReader) readers[r];
					int maxDoc = reader.MaxDoc();
					for (int docNum = 0; docNum < maxDoc; docNum++)
					{
						// skip deleted docs
						if (reader.IsDeleted(docNum))
							continue;
						termVectorsWriter.AddAllDocVectors(reader.GetTermFreqVectors(docNum));
						if (checkAbort != null)
							checkAbort.Work(300);
					}
				}
			}
			finally
			{
				termVectorsWriter.Close();
			}

            System.Diagnostics.Debug.Assert(4 + mergedDocs * 8 == directory.FileLength(segment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION),
                "after MergeVectors: tvx size mismatch: " + mergedDocs + " docs vs " +
                directory.FileLength(segment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION) +
                " length in bytes of " + segment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION);
		}