예제 #1
0
        /**
         * Returns selected columns as a bool array with true value set for specified column names.
         * The result will contain number of elements equal to flattened number of columns.
         * For example:
         * selectedColumns - a,b,c
         * allColumns - a,b,c,d
         * If column c is a complex type, say list<string> and other types are primitives then result will
         * be [false, true, true, true, true, true, false]
         * Index 0 is the root element of the struct which is set to false by default, index 1,2
         * corresponds to columns a and b. Index 3,4 correspond to column c which is list<string> and
         * index 5 correspond to column d. After flattening list<string> gets 2 columns.
         *
         * @param selectedColumns - comma separated list of selected column names
         * @param schema       - object schema
         * @return - bool array with true value set for the specified column names
         */
        public static bool[] includeColumns(string selectedColumns, TypeDescription schema)
        {
            int numFlattenedCols = schema.getMaximumId();

            bool[] results = new bool[numFlattenedCols + 1];
            if ("*".Equals(selectedColumns))
            {
                for (int i = 0; i < results.Length; i++)
                {
                    results[i] = true;
                }
                return(results);
            }
            if (selectedColumns != null &&
                schema.getCategory() == Category.STRUCT)
            {
                IList <string>          fieldNames = schema.getFieldNames();
                IList <TypeDescription> fields     = schema.getChildren();
                foreach (string column in selectedColumns.Split((',')))
                {
                    TypeDescription col = findColumn(column, fieldNames, fields);
                    if (col != null)
                    {
                        for (int i = col.getId(); i <= col.getMaximumId(); ++i)
                        {
                            results[i] = true;
                        }
                    }
                }
            }
            return(results);
        }
예제 #2
0
        public WriterImpl(
            Stream stream,
            string path,
            OrcFile.WriterOptions options,
            ObjectInspector inspector,
            TypeDescription schema,
            long stripeSize,
            CompressionKind compress,
            int bufferSize,
            int rowIndexStride,
            MemoryManager memoryManager,
            bool addBlockPadding,
            OrcFile.Version version,
            OrcFile.WriterCallback callback,
            OrcFile.EncodingStrategy encodingStrategy,
            OrcFile.CompressionStrategy compressionStrategy,
            double paddingTolerance,
            long blockSizeValue,
            string bloomFilterColumnNames,
            double bloomFilterFpp)
        {
            this.baseStream = stream;
            this.streamFactory = new StreamFactory(this);
            this.path = path;
            this.options = options;
            this.callback = callback;
            this.schema = schema;
            this.adjustedStripeSize = stripeSize;
            this.defaultStripeSize = stripeSize;
            this.version = version;
            this.encodingStrategy = encodingStrategy;
            this.compressionStrategy = compressionStrategy;
            this.addBlockPadding = addBlockPadding;
            this.blockSize = blockSizeValue;
            this.paddingTolerance = paddingTolerance;
            this.compress = compress;
            this.rowIndexStride = rowIndexStride;
            this.memoryManager = memoryManager;
            buildIndex = rowIndexStride > 0;
            codec = createCodec(compress);
            int numColumns = schema.getMaximumId() + 1;
            this.bufferSize = getEstimatedBufferSize(defaultStripeSize, numColumns, bufferSize);
            if (version == OrcFile.Version.V_0_11)
            {
                /* do not write bloom filters for ORC v11 */
                this.bloomFilterColumns = new bool[schema.getMaximumId() + 1];
            }
            else
            {
                this.bloomFilterColumns =
                    OrcUtils.includeColumns(bloomFilterColumnNames, schema);
            }
            this.bloomFilterFpp = bloomFilterFpp;
            treeWriter = createTreeWriter(inspector, schema, streamFactory, false);
            if (buildIndex && rowIndexStride < MIN_ROW_INDEX_STRIDE)
            {
                throw new ArgumentException("Row stride must be at least " +
                    MIN_ROW_INDEX_STRIDE);
            }

            // ensure that we are able to handle callbacks before we register ourselves
            memoryManager.addWriter(path, stripeSize, this);
        }
예제 #3
0
 /**
  * Returns selected columns as a bool array with true value set for specified column names.
  * The result will contain number of elements equal to flattened number of columns.
  * For example:
  * selectedColumns - a,b,c
  * allColumns - a,b,c,d
  * If column c is a complex type, say list<string> and other types are primitives then result will
  * be [false, true, true, true, true, true, false]
  * Index 0 is the root element of the struct which is set to false by default, index 1,2
  * corresponds to columns a and b. Index 3,4 correspond to column c which is list<string> and
  * index 5 correspond to column d. After flattening list<string> gets 2 columns.
  *
  * @param selectedColumns - comma separated list of selected column names
  * @param schema       - object schema
  * @return - bool array with true value set for the specified column names
  */
 public static bool[] includeColumns(string selectedColumns, TypeDescription schema)
 {
     int numFlattenedCols = schema.getMaximumId();
     bool[] results = new bool[numFlattenedCols + 1];
     if ("*".Equals(selectedColumns))
     {
         for (int i = 0; i < results.Length; i++)
         {
             results[i] = true;
         }
         return results;
     }
     if (selectedColumns != null &&
         schema.getCategory() == Category.STRUCT)
     {
         IList<string> fieldNames = schema.getFieldNames();
         IList<TypeDescription> fields = schema.getChildren();
         foreach (string column in selectedColumns.Split((',')))
         {
             TypeDescription col = findColumn(column, fieldNames, fields);
             if (col != null)
             {
                 for (int i = col.getId(); i <= col.getMaximumId(); ++i)
                 {
                     results[i] = true;
                 }
             }
         }
     }
     return results;
 }