Beispiel #1
0
        /**
         * @param typeDescr
         * @return ORC types for the ACID event based on the row's type description
         */
        public static List <OrcProto.Type> createEventSchema(TypeDescription typeDescr)
        {
            List <OrcProto.Type> result = new List <OrcProto.Type>();

            OrcProto.Type.Builder type = OrcProto.Type.CreateBuilder();
            type.SetKind(OrcProto.Type.Types.Kind.STRUCT);
            type.AddRangeFieldNames(acidEventFieldNames);
            for (int i = 0; i < acidEventFieldNames.Length; i++)
            {
                type.AddSubtypes((uint)i + 1);
            }
            result.Add(type.Build());

            // Automatically add all fields except the last (ROW).
            for (int i = 0; i < acidEventOrcTypeKinds.Length - 1; i++)
            {
                type.Clear();
                type.SetKind(acidEventOrcTypeKinds[i]);
                result.Add(type.Build());
            }

            OrcUtils.appendOrcTypesRebuildSubtypes(result, typeDescr);
            return(result);
        }
Beispiel #2
0
        public static TreeReaderFactory.TreeReaderSchema validateAndCreate(
            IList <OrcProto.Type> fileTypes,
            IList <OrcProto.Type> schemaTypes)
        {
            // For ACID, the row is the ROW field in the outer STRUCT.
            bool isAcid = checkAcidSchema(fileTypes);
            IList <OrcProto.Type> rowSchema;
            int rowSubtype;

            if (isAcid)
            {
                rowSubtype = OrcRecordUpdater.ROW + 1;
                rowSchema  = fileTypes.subList(rowSubtype, fileTypes.Count);
            }
            else
            {
                rowSubtype = 0;
                rowSchema  = fileTypes;
            }

            // Do checking on the overlap.  Additional columns will be defaulted to NULL.

            int numFileColumns    = rowSchema[0].SubtypesCount;
            int numDesiredColumns = schemaTypes[0].SubtypesCount;

            int numReadColumns = Math.Min(numFileColumns, numDesiredColumns);

            /**
             * Check type promotion.
             *
             * Currently, we only support integer type promotions that can be done "implicitly".
             * That is, we know that using a bigger integer tree reader on the original smaller integer
             * column will "just work".
             *
             * In the future, other type promotions might require type conversion.
             */
            // short -> int -> bigint as same integer readers are used for the above types.

            for (int i = 0; i < numReadColumns; i++)
            {
                OrcProto.Type fColType = fileTypes[rowSubtype + i];
                OrcProto.Type rColType = schemaTypes[i];
                if (fColType.Kind != rColType.Kind)
                {
                    bool ok = false;
                    if (fColType.Kind == OrcProto.Type.Types.Kind.SHORT)
                    {
                        if (rColType.Kind == OrcProto.Type.Types.Kind.INT ||
                            rColType.Kind == OrcProto.Type.Types.Kind.LONG)
                        {
                            // type promotion possible, converting SHORT to INT/LONG requested type
                            ok = true;
                        }
                    }
                    else if (fColType.Kind == OrcProto.Type.Types.Kind.INT)
                    {
                        if (rColType.Kind == OrcProto.Type.Types.Kind.LONG)
                        {
                            // type promotion possible, converting INT to LONG requested type
                            ok = true;
                        }
                    }

                    if (!ok)
                    {
                        throw new IOException("ORC does not support type conversion from " +
                                              fColType.Kind.ToString() + " to " + rColType.Kind.ToString());
                    }
                }
            }

            IList <OrcProto.Type> fullSchemaTypes;

            if (isAcid)
            {
                fullSchemaTypes = new List <OrcProto.Type>();

                // This copies the ACID struct type which is subtype = 0.
                // It has field names "operation" through "row".
                // And we copy the types for all fields EXCEPT ROW (which must be last!).

                for (int i = 0; i < rowSubtype; i++)
                {
                    fullSchemaTypes.Add(fileTypes[i].ToBuilder().Build());
                }

                // Add the row struct type.
                OrcUtils.appendOrcTypesRebuildSubtypes(fullSchemaTypes, schemaTypes, 0);
            }
            else
            {
                fullSchemaTypes = schemaTypes;
            }

            int innerStructSubtype = rowSubtype;

            // LOG.info("Schema evolution: (fileTypes) " + fileTypes.toString() +
            //     " (schemaEvolutionTypes) " + schemaEvolutionTypes.toString());

            return(new TreeReaderFactory.TreeReaderSchema().
                   fileTypes(fileTypes).
                   schemaTypes(fullSchemaTypes).
                   innerStructSubtype(innerStructSubtype));
        }