internal override void CloseDocStore(SegmentWriteState state) { lock (this) { if (tvx != null) { // At least one doc in this run had term vectors // enabled Fill(state.numDocsInStore - docWriter.DocStoreOffset); tvx.Close(); tvf.Close(); tvd.Close(); tvx = null; System.Diagnostics.Debug.Assert(state.docStoreSegmentName != null); System.String fileName = state.docStoreSegmentName + "." + IndexFileNames.VECTORS_INDEX_EXTENSION; if (4 + ((long)state.numDocsInStore) * 16 != state.directory.FileLength(fileName)) { throw new System.SystemException("after flush: tvx size mismatch: " + state.numDocsInStore + " docs vs " + state.directory.FileLength(fileName) + " length in bytes of " + fileName + " file exists?=" + state.directory.FileExists(fileName)); } state.flushedFiles.Add(state.docStoreSegmentName + "." + IndexFileNames.VECTORS_INDEX_EXTENSION); state.flushedFiles.Add(state.docStoreSegmentName + "." + IndexFileNames.VECTORS_FIELDS_EXTENSION); state.flushedFiles.Add(state.docStoreSegmentName + "." + IndexFileNames.VECTORS_DOCUMENTS_EXTENSION); docWriter.RemoveOpenFile(state.docStoreSegmentName + "." + IndexFileNames.VECTORS_INDEX_EXTENSION); docWriter.RemoveOpenFile(state.docStoreSegmentName + "." + IndexFileNames.VECTORS_FIELDS_EXTENSION); docWriter.RemoveOpenFile(state.docStoreSegmentName + "." + IndexFileNames.VECTORS_DOCUMENTS_EXTENSION); lastDocID = 0; } } }
public void Dispose() { // Move to protected method if class becomes unsealed if (doClose) { try { if (fieldsStream != null) { try { fieldsStream.Close(); } finally { fieldsStream = null; } } } catch (System.IO.IOException) { try { if (indexStream != null) { try { indexStream.Close(); } finally { indexStream = null; } } } catch (System.IO.IOException) { // Ignore so we throw only first IOException hit } throw; } finally { if (indexStream != null) { try { indexStream.Close(); } finally { indexStream = null; } } } } }
public void Dispose() { // Move to protected method if class becomes unsealed if (out_Renamed != null) { out_Renamed.Close(); } }
public override void Close() { _fileMutex.WaitOne(); try { var fileName = _name; //make sure it's all written out //we are only checking for null here in case Close is called multiple times if (_cacheDirIndexOutput != null) { _cacheDirIndexOutput.Flush(); _cacheDirIndexOutput.Close(); IndexInput cacheInput = null; try { cacheInput = CacheDirectory.OpenInput(fileName); } catch (IOException e) { //This would occur if the file doesn't exist! we previously threw when that happens so we'll keep //doing that for now but this is quicker than first checking if it exists and then opening it. throw; } if (cacheInput != null) { IndexOutput masterOutput = null; try { masterOutput = MasterDirectory.CreateOutput(fileName); cacheInput.CopyTo(masterOutput, fileName); } finally { masterOutput?.Close(); cacheInput?.Close(); } } #if FULLDEBUG Trace.WriteLine($"CLOSED WRITESTREAM {_name}"); #endif // clean up _cacheDirIndexOutput = null; } GC.SuppressFinalize(this); } finally { _fileMutex.ReleaseMutex(); } }
private void MergeNorms() { byte[] normBuffer = null; IndexOutput output = null; try { int numFieldInfos = fieldInfos.Size(); for (int i = 0; i < numFieldInfos; i++) { FieldInfo fi = fieldInfos.FieldInfo(i); if (fi.isIndexed && !fi.omitNorms) { if (output == null) { output = directory.CreateOutput(segment + "." + IndexFileNames.NORMS_EXTENSION); output.WriteBytes(NORMS_HEADER, NORMS_HEADER.Length); } foreach (IndexReader reader in readers) { int maxDoc = reader.MaxDoc; if (normBuffer == null || normBuffer.Length < maxDoc) { // the buffer is too small for the current segment normBuffer = new byte[maxDoc]; } reader.Norms(fi.name, normBuffer, 0); if (!reader.HasDeletions) { //optimized case for segments without deleted docs output.WriteBytes(normBuffer, maxDoc); } else { // this segment has deleted docs, so we have to // check for every doc if it is deleted or not for (int k = 0; k < maxDoc; k++) { if (!reader.IsDeleted(k)) { output.WriteByte(normBuffer[k]); } } } checkAbort.Work(maxDoc); } } } } finally { if (output != null) { output.Close(); } } }
public override void Close() { _fileMutex.WaitOne(); try { string fileName = _name; // make sure it's all written out _indexOutput.Flush(); long originalLength = _indexOutput.Length(); _indexOutput.Close(); Stream blobStream; // optionally put a compressor around the blob stream if (_azureDirectory.ShouldCompressFile(_name)) { blobStream = CompressStream(fileName, originalLength); } else { blobStream = new StreamInput(CacheDirectory.OpenInput(fileName)); } try { // push the blobStream up to the cloud _blob.UploadFromStream(blobStream); // set the metadata with the original index file properties _blob.Metadata["CachedLength"] = originalLength.ToString(); _blob.Metadata["CachedLastModified"] = CacheDirectory.FileModified(fileName).ToString(); _blob.SetMetadata(); #if FULLDEBUG Trace.WriteLine($"PUT {blobStream.Length} bytes to {_name} in cloud"); #endif } finally { blobStream.Dispose(); } #if FULLDEBUG Trace.WriteLine($"CLOSED WRITESTREAM {_name}"); #endif // clean up _indexOutput = null; _blobContainer = null; _blob = null; GC.SuppressFinalize(this); } finally { _fileMutex.ReleaseMutex(); } }
public void Write(Directory d, System.String name) { IndexOutput output = d.CreateOutput(name); try { Write(output); } finally { output.Close(); } }
/// <summary>Close all streams. </summary> public void Dispose() { // Move to a protected method if class becomes unsealed // make an effort to close all streams we can but remember and re-throw // the first exception encountered in this process System.IO.IOException keep = null; if (tvx != null) { try { tvx.Close(); } catch (System.IO.IOException e) { keep = e; } } if (tvd != null) { try { tvd.Close(); } catch (System.IO.IOException e) { if (keep == null) { keep = e; } } } if (tvf != null) { try { tvf.Close(); } catch (System.IO.IOException e) { if (keep == null) { keep = e; } } } if (keep != null) { throw new System.IO.IOException(keep.StackTrace); } }
public override void Close() { _fileMutex.WaitOne(); try { string fileName = _name; // make sure it's all written out if (_indexOutput != null) { _indexOutput.Flush(); _indexOutput.Close(); } if (CacheDirectory.FileExists(fileName)) { //open stream to read cache file using (var cacheStream = new StreamInput(CacheDirectory.OpenInput(fileName))) // push the blobStream up to the master using (var masterStream = new StreamOutput(MasterDirectory.CreateOutput(fileName))) { cacheStream.CopyTo(masterStream); masterStream.Flush(); Trace.WriteLine(string.Format("PUT {1} bytes to {0} in cloud", _name, cacheStream.Length)); } //sync the last file write times - at least get them close. //TODO: The alternative would be to force both directory instances to be FSDirectory, // or try casting the master directory to FSDirectory to get the raw FileInfo and manually // set the lastmodified time - this should work though MasterDirectory.TouchFile(fileName); CacheDirectory.TouchFile(fileName); #if FULLDEBUG Debug.WriteLine(string.Format("CLOSED WRITESTREAM {0}", _name)); #endif } // clean up _indexOutput = null; GC.SuppressFinalize(this); } finally { _fileMutex.ReleaseMutex(); } }
/// <summary>Writes this vector to the file <c>name</c> in Directory /// <c>d</c>, in a format that can be read by the constructor /// <see cref="BitVector(Directory, String)" />. /// </summary> public void Write(Directory d, System.String name) { IndexOutput output = d.CreateOutput(name); try { if (IsSparse()) { WriteDgaps(output); // sparse bit-set more efficiently saved as d-gaps. } else { WriteBits(output); } } finally { output.Close(); } }
/// <summary> /// This will sync the requested file from master storage to the local fast cache storage /// </summary> /// <param name="fileName"></param> private void SyncLocally(string fileName) { //get the master file stream IndexInput masterInput = null; try { masterInput = MasterDirectory.OpenInput(fileName); } catch (IOException ex) { //this will be a file not found (FileNotFoundException) //TODO: It has been seen that OpenInput on the master can throw an exception due to a lucene file not found - which is very odd // we need to check if the master is being written first before the sync dir. And if the file does not exist in the master, // or the sync dir, then something has gone wrong, that shouldn't happen and we'll need to deal with that differently // because the index will be in a state where it's just not readable. //Hrmmm what to do? There's actually nothing that can be done :/ if we return false here then the instance of this item would be null //which will then cause exceptions further on and take down the app pool anyways. I've looked through the Lucene source and there //is no safety net to check against this situation, it just happily throws exceptions on a background thread. _loggingService.Log(new LogEntry(LogLevel.Error, ex, $"File not found")); throw ex; } if (masterInput != null) { IndexOutput cacheOutput = null; try { cacheOutput = CacheDirectory.CreateOutput(fileName); masterInput.CopyTo(cacheOutput, fileName); } finally { cacheOutput?.Close(); masterInput?.Close(); } } }
/// <summary>Merge files with the extensions added up to now. /// All files with these extensions are combined sequentially into the /// compound stream. After successful merge, the source files /// are deleted. /// </summary> /// <throws> IllegalStateException if close() had been called before or </throws> /// <summary> if no file has been added to this object /// </summary> public void Dispose() { // Extract into protected method if class ever becomes unsealed // TODO: Dispose shouldn't throw exceptions! if (merged) { throw new SystemException("Merge already performed"); } if ((entries.Count == 0)) { throw new SystemException("No entries to merge have been defined"); } merged = true; // open the compound stream IndexOutput os = null; try { os = directory.CreateOutput(fileName); // Write the number of entries os.WriteVInt(entries.Count); // Write the directory with all offsets at 0. // Remember the positions of directory entries so that we can // adjust the offsets later long totalSize = 0; foreach (FileEntry fe in entries) { fe.directoryOffset = os.FilePointer; os.WriteLong(0); // for now os.WriteString(fe.file); totalSize += directory.FileLength(fe.file); } // Pre-allocate size of file as optimization -- // this can potentially help IO performance as // we write the file and also later during // searching. It also uncovers a disk-full // situation earlier and hopefully without // actually filling disk to 100%: long finalLength = totalSize + os.FilePointer; os.SetLength(finalLength); // Open the files and copy their data into the stream. // Remember the locations of each file's data section. var buffer = new byte[16384]; foreach (FileEntry fe in entries) { fe.dataOffset = os.FilePointer; CopyFile(fe, os, buffer); } // Write the data offsets into the directory of the compound stream foreach (FileEntry fe in entries) { os.Seek(fe.directoryOffset); os.WriteLong(fe.dataOffset); } System.Diagnostics.Debug.Assert(finalLength == os.Length); // Close the output stream. Set the os to null before trying to // close so that if an exception occurs during the close, the // finally clause below will not attempt to close the stream // the second time. IndexOutput tmp = os; os = null; tmp.Close(); } finally { if (os != null) { try { os.Close(); } catch (System.IO.IOException) { } } } }
/// <summary> /// Closes the sql index output. /// </summary> public override void Close() { string fileName = _fileName; // make sure it's all written out _indexOutput.Flush(); long originalLength = _indexOutput.Length(); _indexOutput.Close(); Stream fileStream = new StreamInput(CacheDirectory.OpenInput(fileName)); try { // push the file stream up to the db. ICommandBuilder builder = _sqlStorageProviderUtility.GetCommandBuilder2(); DbConnection connection = builder.GetConnection(_connString); DbTransaction transaction = _sqlStorageProviderUtility.BeginTransaction(connection); QueryBuilder queryBuilder = new QueryBuilder(builder); //bool fileExists = FileExists(transaction, _wiki, _fileName); // To achieve decent performance, an UPDATE query is issued if the file exists, // otherwise an INSERT query is issued string query; List <Parameter> parameters; byte[] fileData = null; int size = Tools.ReadStream(fileStream, ref fileData, MaxFileSize); if (size < 0) { _sqlStorageProviderUtility.RollbackTransaction(transaction); throw new ArgumentException("Source Stream contains too much data", "sourceStream"); } //if(fileExists) { // query = queryBuilder.Update("SearchIndex", new string[] { "Size", "LastModified", "Data" }, new string[] { "Size", "LastModified", "Data" }); // query = queryBuilder.Where(query, "Wiki", WhereOperator.Equals, "Wiki"); // query = queryBuilder.AndWhere(query, "Name", WhereOperator.Equals, "Name"); // parameters = new List<Parameter>(5); // parameters.Add(new Parameter(ParameterType.String, "Wiki", _wiki)); // parameters.Add(new Parameter(ParameterType.Int64, "Size", (long)originalLength)); // parameters.Add(new Parameter(ParameterType.DateTime, "LastModified", DateTime.Now.ToUniversalTime())); // parameters.Add(new Parameter(ParameterType.ByteArray, "Data", fileData)); // parameters.Add(new Parameter(ParameterType.String, "Name", _fileName)); //} //else { query = queryBuilder.InsertInto("SearchIndex", new string[] { "Wiki", "Name", "Size", "LastModified", "Data" }, new string[] { "Wiki", "Name", "Size", "LastModified", "Data" }); parameters = new List <Parameter>(5); parameters.Add(new Parameter(ParameterType.String, "Wiki", _wiki)); parameters.Add(new Parameter(ParameterType.String, "Name", _fileName)); parameters.Add(new Parameter(ParameterType.Int64, "Size", (long)originalLength)); parameters.Add(new Parameter(ParameterType.DateTime, "LastModified", DateTime.Now.ToUniversalTime())); parameters.Add(new Parameter(ParameterType.ByteArray, "Data", fileData)); //} DbCommand command = builder.GetCommand(transaction, query, parameters); int rows = _sqlStorageProviderUtility.ExecuteNonQuery(command, false); if (rows == 1) { _sqlStorageProviderUtility.CommitTransaction(transaction); } else { _sqlStorageProviderUtility.RollbackTransaction(transaction); } } finally { fileStream.Dispose(); } // clean up _indexOutput = null; GC.SuppressFinalize(this); }
internal void FinishCommit(Directory dir) { if (pendingSegnOutput == null) { throw new System.SystemException("prepareCommit was not called"); } bool success = false; try { pendingSegnOutput.FinishCommit(); pendingSegnOutput.Close(); pendingSegnOutput = null; success = true; } finally { if (!success) { RollbackCommit(dir); } } // NOTE: if we crash here, we have left a segments_N // file in the directory in a possibly corrupt state (if // some bytes made it to stable storage and others // didn't). But, the segments_N file includes checksum // at the end, which should catch this case. So when a // reader tries to read it, it will throw a // CorruptIndexException, which should cause the retry // logic in SegmentInfos to kick in and load the last // good (previous) segments_N-1 file. System.String fileName = IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", generation); success = false; try { dir.Sync(fileName); success = true; } finally { if (!success) { try { dir.DeleteFile(fileName); } catch (System.Exception) { // Suppress so we keep throwing the original exception } } } lastGeneration = generation; try { IndexOutput genOutput = dir.CreateOutput(IndexFileNames.SEGMENTS_GEN); try { genOutput.WriteInt(FORMAT_LOCKLESS); genOutput.WriteLong(generation); genOutput.WriteLong(generation); } finally { genOutput.Close(); } } catch (System.Exception) { // It's OK if we fail to write this file since it's // used only as one of the retry fallbacks. } }
/// <summary>Produce _X.nrm if any document had a field with norms /// not disabled /// </summary> public override void Flush(IDictionary <InvertedDocEndConsumerPerThread, ICollection <InvertedDocEndConsumerPerField> > threadsAndFields, SegmentWriteState state) { IDictionary <FieldInfo, IList <NormsWriterPerField> > byField = new HashMap <FieldInfo, IList <NormsWriterPerField> >(); // Typically, each thread will have encountered the same // field. So first we collate by field, ie, all // per-thread field instances that correspond to the // same FieldInfo foreach (var entry in threadsAndFields) { ICollection <InvertedDocEndConsumerPerField> fields = entry.Value; IEnumerator <InvertedDocEndConsumerPerField> fieldsIt = fields.GetEnumerator(); var fieldsToRemove = new HashSet <NormsWriterPerField>(); while (fieldsIt.MoveNext()) { NormsWriterPerField perField = (NormsWriterPerField)fieldsIt.Current; if (perField.upto > 0) { // It has some norms IList <NormsWriterPerField> l = byField[perField.fieldInfo]; if (l == null) { l = new List <NormsWriterPerField>(); byField[perField.fieldInfo] = l; } l.Add(perField); } // Remove this field since we haven't seen it // since the previous flush else { fieldsToRemove.Add(perField); } } foreach (var field in fieldsToRemove) { fields.Remove(field); } } System.String normsFileName = state.segmentName + "." + IndexFileNames.NORMS_EXTENSION; state.flushedFiles.Add(normsFileName); IndexOutput normsOut = state.directory.CreateOutput(normsFileName); try { normsOut.WriteBytes(SegmentMerger.NORMS_HEADER, 0, SegmentMerger.NORMS_HEADER.Length); int numField = fieldInfos.Size(); int normCount = 0; for (int fieldNumber = 0; fieldNumber < numField; fieldNumber++) { FieldInfo fieldInfo = fieldInfos.FieldInfo(fieldNumber); IList <NormsWriterPerField> toMerge = byField[fieldInfo]; int upto = 0; if (toMerge != null) { int numFields = toMerge.Count; normCount++; NormsWriterPerField[] fields = new NormsWriterPerField[numFields]; int[] uptos = new int[numFields]; for (int j = 0; j < numFields; j++) { fields[j] = toMerge[j]; } int numLeft = numFields; while (numLeft > 0) { System.Diagnostics.Debug.Assert(uptos [0] < fields [0].docIDs.Length, " uptos[0]=" + uptos [0] + " len=" + (fields [0].docIDs.Length)); int minLoc = 0; int minDocID = fields[0].docIDs[uptos[0]]; for (int j = 1; j < numLeft; j++) { int docID = fields[j].docIDs[uptos[j]]; if (docID < minDocID) { minDocID = docID; minLoc = j; } } System.Diagnostics.Debug.Assert(minDocID < state.numDocs); // Fill hole for (; upto < minDocID; upto++) { normsOut.WriteByte(defaultNorm); } normsOut.WriteByte(fields[minLoc].norms[uptos[minLoc]]); (uptos[minLoc])++; upto++; if (uptos[minLoc] == fields[minLoc].upto) { fields[minLoc].Reset(); if (minLoc != numLeft - 1) { fields[minLoc] = fields[numLeft - 1]; uptos[minLoc] = uptos[numLeft - 1]; } numLeft--; } } // Fill final hole with defaultNorm for (; upto < state.numDocs; upto++) { normsOut.WriteByte(defaultNorm); } } else if (fieldInfo.isIndexed && !fieldInfo.omitNorms) { normCount++; // Fill entire field with default norm: for (; upto < state.numDocs; upto++) { normsOut.WriteByte(defaultNorm); } } System.Diagnostics.Debug.Assert(4 + normCount * state.numDocs == normsOut.FilePointer, ".nrm file size mismatch: expected=" + (4 + normCount * state.numDocs) + " actual=" + normsOut.FilePointer); } } finally { normsOut.Close(); } }
public override void Close() { _fileMutex.WaitOne(); try { string fileName = _name; // make sure it's all written out _indexOutput.Flush(); long originalLength = _indexOutput.Length(); _indexOutput.Close(); Stream blobStream; #if COMPRESSBLOBS // optionally put a compressor around the blob stream if (_azureDirectory.ShouldCompressFile(_name)) { // unfortunately, deflate stream doesn't allow seek, and we need a seekable stream // to pass to the blob storage stuff, so we compress into a memory stream MemoryStream compressedStream = new MemoryStream(); try { IndexInput indexInput = CacheDirectory.OpenInput(fileName); using (DeflateStream compressor = new DeflateStream(compressedStream, CompressionMode.Compress, true)) { // compress to compressedOutputStream byte[] bytes = new byte[indexInput.Length()]; indexInput.ReadBytes(bytes, 0, (int)bytes.Length); compressor.Write(bytes, 0, (int)bytes.Length); } indexInput.Close(); // seek back to beginning of comrpessed stream compressedStream.Seek(0, SeekOrigin.Begin); Debug.WriteLine(string.Format("COMPRESSED {0} -> {1} {2}% to {3}", originalLength, compressedStream.Length, ((float)compressedStream.Length / (float)originalLength) * 100, _name)); } catch { // release the compressed stream resources if an error occurs compressedStream.Dispose(); throw; } blobStream = compressedStream; } else #endif { blobStream = new StreamInput(CacheDirectory.OpenInput(fileName)); } try { // push the blobStream up to the cloud _blob.UploadFromStream(blobStream); // set the metadata with the original index file properties _blob.Metadata["CachedLength"] = originalLength.ToString(); _blob.Metadata["CachedLastModified"] = CacheDirectory.FileModified(fileName).ToString(); _blob.SetMetadata(); Debug.WriteLine(string.Format("PUT {1} bytes to {0} in cloud", _name, blobStream.Length)); } finally { blobStream.Dispose(); } #if FULLDEBUG Debug.WriteLine(string.Format("CLOSED WRITESTREAM {0}", _name)); #endif // clean up _indexOutput = null; _blobContainer = null; _blob = null; GC.SuppressFinalize(this); } finally { _fileMutex.ReleaseMutex(); } }