public MetaIndexEntry(string metaName, Compression.Algorithm compressionAlgorithm , BCFile.BlockRegion region) { this.metaName = metaName; this.compressionAlgorithm = compressionAlgorithm; this.region = region; }
/// <exception cref="System.IO.IOException"/> private BCFile.Reader.BlockReader CreateReader(Compression.Algorithm compressAlgo , BCFile.BlockRegion region) { BCFile.Reader.RBlockState rbs = new BCFile.Reader.RBlockState(compressAlgo, @in, region, conf); return(new BCFile.Reader.BlockReader(rbs)); }
/// <summary>Stream access to a Data Block.</summary> /// <param name="blockIndex">0-based data block index.</param> /// <returns>BlockReader input stream for reading the data block.</returns> /// <exception cref="System.IO.IOException"/> public virtual BCFile.Reader.BlockReader GetDataBlock(int blockIndex) { if (blockIndex < 0 || blockIndex >= GetBlockCount()) { throw new IndexOutOfRangeException(string.Format("blockIndex=%d, numBlocks=%d", blockIndex , GetBlockCount())); } BCFile.BlockRegion region = dataIndex.GetBlockRegionList()[blockIndex]; return(CreateReader(dataIndex.GetDefaultCompressionAlgorithm(), region)); }
/// <summary>Stream access to a Meta Block.</summary> /// <param name="name">meta block name</param> /// <returns>BlockReader input stream for reading the meta block.</returns> /// <exception cref="System.IO.IOException"/> /// <exception cref="MetaBlockDoesNotExist">The Meta Block with the given name does not exist. /// </exception> /// <exception cref="Org.Apache.Hadoop.IO.File.Tfile.MetaBlockDoesNotExist"/> public virtual BCFile.Reader.BlockReader GetMetaBlock(string name) { BCFile.MetaIndexEntry imeBCIndex = metaIndex.GetMetaByName(name); if (imeBCIndex == null) { throw new MetaBlockDoesNotExist("name=" + name); } BCFile.BlockRegion region = imeBCIndex.GetRegion(); return(CreateReader(imeBCIndex.GetCompressionAlgorithm(), region)); }
/// <exception cref="System.IO.IOException"/> public DataIndex(BinaryReader reader) { // for data blocks, each entry specifies a block's offset, compressed size // and raw size // for read, deserialized from a file defaultCompressionAlgorithm = Compression.GetCompressionAlgorithmByName(Utils.ReadString (@in)); int n = Utils.ReadVInt(@in); listRegions = new AList <BCFile.BlockRegion>(n); for (int i = 0; i < n; i++) { BCFile.BlockRegion region = new BCFile.BlockRegion(@in); listRegions.AddItem(region); } }
/// <exception cref="System.IO.IOException"/> public MetaIndexEntry(BinaryReader reader) { string fullMetaName = Utils.ReadString(@in); if (fullMetaName.StartsWith(defaultPrefix)) { metaName = Runtime.Substring(fullMetaName, defaultPrefix.Length, fullMetaName .Length); } else { throw new IOException("Corrupted Meta region Index"); } compressionAlgorithm = Compression.GetCompressionAlgorithmByName(Utils.ReadString (@in)); region = new BCFile.BlockRegion(@in); }
/// <exception cref="System.IO.IOException"/> public RBlockState(Compression.Algorithm compressionAlgo, FSDataInputStream fsin, BCFile.BlockRegion region, Configuration conf) { // Index for meta blocks this.compressAlgo = compressionAlgo; this.region = region; this.decompressor = compressionAlgo.GetDecompressor(); try { this.@in = compressAlgo.CreateDecompressionStream(new BoundedRangeFileInputStream (fsin, this.region.GetOffset(), this.region.GetCompressedSize()), decompressor, TFile.GetFSInputBufferSize(conf)); } catch (IOException e) { compressAlgo.ReturnDecompressor(decompressor); throw; } }
/// <summary>Dump information about TFile.</summary> /// <param name="file">Path string of the TFile</param> /// <param name="out">PrintStream to output the information.</param> /// <param name="conf">The configuration object.</param> /// <exception cref="System.IO.IOException"/> public static void DumpInfo(string file, TextWriter @out, Configuration conf) { int maxKeySampleLen = 16; Path path = new Path(file); FileSystem fs = path.GetFileSystem(conf); long length = fs.GetFileStatus(path).GetLen(); FSDataInputStream fsdis = fs.Open(path); TFile.Reader reader = new TFile.Reader(fsdis, length, conf); try { LinkedHashMap <string, string> properties = new LinkedHashMap <string, string>(); int blockCnt = reader.readerBCF.GetBlockCount(); int metaBlkCnt = reader.readerBCF.metaIndex.index.Count; properties["BCFile Version"] = reader.readerBCF.version.ToString(); properties["TFile Version"] = reader.tfileMeta.version.ToString(); properties["File Length"] = System.Convert.ToString(length); properties["Data Compression"] = reader.readerBCF.GetDefaultCompressionName(); properties["Record Count"] = System.Convert.ToString(reader.GetEntryCount()); properties["Sorted"] = bool.ToString(reader.IsSorted()); if (reader.IsSorted()) { properties["Comparator"] = reader.GetComparatorName(); } properties["Data Block Count"] = Extensions.ToString(blockCnt); long dataSize = 0; long dataSizeUncompressed = 0; if (blockCnt > 0) { for (int i = 0; i < blockCnt; ++i) { BCFile.BlockRegion region = reader.readerBCF.dataIndex.GetBlockRegionList()[i]; dataSize += region.GetCompressedSize(); dataSizeUncompressed += region.GetRawSize(); } properties["Data Block Bytes"] = System.Convert.ToString(dataSize); if (reader.readerBCF.GetDefaultCompressionName() != "none") { properties["Data Block Uncompressed Bytes"] = System.Convert.ToString(dataSizeUncompressed ); properties["Data Block Compression Ratio"] = string.Format("1:%.1f", (double)dataSizeUncompressed / dataSize); } } properties["Meta Block Count"] = Extensions.ToString(metaBlkCnt); long metaSize = 0; long metaSizeUncompressed = 0; if (metaBlkCnt > 0) { ICollection <BCFile.MetaIndexEntry> metaBlks = reader.readerBCF.metaIndex.index.Values; bool calculateCompression = false; for (IEnumerator <BCFile.MetaIndexEntry> it = metaBlks.GetEnumerator(); it.HasNext ();) { BCFile.MetaIndexEntry e = it.Next(); metaSize += e.GetRegion().GetCompressedSize(); metaSizeUncompressed += e.GetRegion().GetRawSize(); if (e.GetCompressionAlgorithm() != Compression.Algorithm.None) { calculateCompression = true; } } properties["Meta Block Bytes"] = System.Convert.ToString(metaSize); if (calculateCompression) { properties["Meta Block Uncompressed Bytes"] = System.Convert.ToString(metaSizeUncompressed ); properties["Meta Block Compression Ratio"] = string.Format("1:%.1f", (double)metaSizeUncompressed / metaSize); } } properties["Meta-Data Size Ratio"] = string.Format("1:%.1f", (double)dataSize / metaSize ); long leftOverBytes = length - dataSize - metaSize; long miscSize = BCFile.Magic.Size() * 2 + long.Size / byte.Size + Utils.Version.Size (); long metaIndexSize = leftOverBytes - miscSize; properties["Meta Block Index Bytes"] = System.Convert.ToString(metaIndexSize); properties["Headers Etc Bytes"] = System.Convert.ToString(miscSize); // Now output the properties table. int maxKeyLength = 0; ICollection <KeyValuePair <string, string> > entrySet = properties; for (IEnumerator <KeyValuePair <string, string> > it_1 = entrySet.GetEnumerator(); it_1 .HasNext();) { KeyValuePair <string, string> e = it_1.Next(); if (e.Key.Length > maxKeyLength) { maxKeyLength = e.Key.Length; } } for (IEnumerator <KeyValuePair <string, string> > it_2 = entrySet.GetEnumerator(); it_2 .HasNext();) { KeyValuePair <string, string> e = it_2.Next(); @out.Printf("%s : %s%n", TFileDumper.Align.Format(e.Key, maxKeyLength, TFileDumper.Align .Left), e.Value); } @out.WriteLine(); reader.CheckTFileDataIndex(); if (blockCnt > 0) { string blkID = "Data-Block"; int blkIDWidth = TFileDumper.Align.CalculateWidth(blkID, blockCnt); int blkIDWidth2 = TFileDumper.Align.CalculateWidth(string.Empty, blockCnt); string offset = "Offset"; int offsetWidth = TFileDumper.Align.CalculateWidth(offset, length); string blkLen = "Length"; int blkLenWidth = TFileDumper.Align.CalculateWidth(blkLen, dataSize / blockCnt * 10); string rawSize = "Raw-Size"; int rawSizeWidth = TFileDumper.Align.CalculateWidth(rawSize, dataSizeUncompressed / blockCnt * 10); string records = "Records"; int recordsWidth = TFileDumper.Align.CalculateWidth(records, reader.GetEntryCount () / blockCnt * 10); string endKey = "End-Key"; int endKeyWidth = Math.Max(endKey.Length, maxKeySampleLen * 2 + 5); @out.Printf("%s %s %s %s %s %s%n", TFileDumper.Align.Format(blkID, blkIDWidth, TFileDumper.Align .Center), TFileDumper.Align.Format(offset, offsetWidth, TFileDumper.Align.Center ), TFileDumper.Align.Format(blkLen, blkLenWidth, TFileDumper.Align.Center), TFileDumper.Align .Format(rawSize, rawSizeWidth, TFileDumper.Align.Center), TFileDumper.Align.Format (records, recordsWidth, TFileDumper.Align.Center), TFileDumper.Align.Format(endKey , endKeyWidth, TFileDumper.Align.Left)); for (int i = 0; i < blockCnt; ++i) { BCFile.BlockRegion region = reader.readerBCF.dataIndex.GetBlockRegionList()[i]; TFile.TFileIndexEntry indexEntry = reader.tfileIndex.GetEntry(i); @out.Printf("%s %s %s %s %s ", TFileDumper.Align.Format(TFileDumper.Align.Format( i, blkIDWidth2, TFileDumper.Align.ZeroPadded), blkIDWidth, TFileDumper.Align.Left ), TFileDumper.Align.Format(region.GetOffset(), offsetWidth, TFileDumper.Align.Left ), TFileDumper.Align.Format(region.GetCompressedSize(), blkLenWidth, TFileDumper.Align .Left), TFileDumper.Align.Format(region.GetRawSize(), rawSizeWidth, TFileDumper.Align .Left), TFileDumper.Align.Format(indexEntry.kvEntries, recordsWidth, TFileDumper.Align .Left)); byte[] key = indexEntry.key; bool asAscii = true; int sampleLen = Math.Min(maxKeySampleLen, key.Length); for (int j = 0; j < sampleLen; ++j) { byte b = key[j]; if ((((sbyte)b) < 32 && b != 9) || (b == 127)) { asAscii = false; } } if (!asAscii) { @out.Write("0X"); for (int j_1 = 0; j_1 < sampleLen; ++j_1) { byte b = key[i]; @out.Printf("%X", b); } } else { @out.Write(new string(key, 0, sampleLen, Charsets.Utf8)); } if (sampleLen < key.Length) { @out.Write("..."); } @out.WriteLine(); } } @out.WriteLine(); if (metaBlkCnt > 0) { string name = "Meta-Block"; int maxNameLen = 0; ICollection <KeyValuePair <string, BCFile.MetaIndexEntry> > metaBlkEntrySet = reader .readerBCF.metaIndex.index; for (IEnumerator <KeyValuePair <string, BCFile.MetaIndexEntry> > it = metaBlkEntrySet .GetEnumerator(); it_2.HasNext();) { KeyValuePair <string, BCFile.MetaIndexEntry> e = it_2.Next(); if (e.Key.Length > maxNameLen) { maxNameLen = e.Key.Length; } } int nameWidth = Math.Max(name.Length, maxNameLen); string offset = "Offset"; int offsetWidth = TFileDumper.Align.CalculateWidth(offset, length); string blkLen = "Length"; int blkLenWidth = TFileDumper.Align.CalculateWidth(blkLen, metaSize / metaBlkCnt * 10); string rawSize = "Raw-Size"; int rawSizeWidth = TFileDumper.Align.CalculateWidth(rawSize, metaSizeUncompressed / metaBlkCnt * 10); string compression = "Compression"; int compressionWidth = compression.Length; @out.Printf("%s %s %s %s %s%n", TFileDumper.Align.Format(name, nameWidth, TFileDumper.Align .Center), TFileDumper.Align.Format(offset, offsetWidth, TFileDumper.Align.Center ), TFileDumper.Align.Format(blkLen, blkLenWidth, TFileDumper.Align.Center), TFileDumper.Align .Format(rawSize, rawSizeWidth, TFileDumper.Align.Center), TFileDumper.Align.Format (compression, compressionWidth, TFileDumper.Align.Left)); for (IEnumerator <KeyValuePair <string, BCFile.MetaIndexEntry> > it_3 = metaBlkEntrySet .GetEnumerator(); it_3.HasNext();) { KeyValuePair <string, BCFile.MetaIndexEntry> e = it_3.Next(); string blkName = e.Value.GetMetaName(); BCFile.BlockRegion region = e.Value.GetRegion(); string blkCompression = e.Value.GetCompressionAlgorithm().GetName(); @out.Printf("%s %s %s %s %s%n", TFileDumper.Align.Format(blkName, nameWidth, TFileDumper.Align .Left), TFileDumper.Align.Format(region.GetOffset(), offsetWidth, TFileDumper.Align .Left), TFileDumper.Align.Format(region.GetCompressedSize(), blkLenWidth, TFileDumper.Align .Left), TFileDumper.Align.Format(region.GetRawSize(), rawSizeWidth, TFileDumper.Align .Left), TFileDumper.Align.Format(blkCompression, compressionWidth, TFileDumper.Align .Left)); } } } finally { IOUtils.Cleanup(Log, reader, fsdis); } }
public virtual void AddBlockRegion(BCFile.BlockRegion region) { listRegions.AddItem(region); }