Example #1
0
 private static void writeTypes(OrcProto.Footer.Builder builder,
                                TypeDescription schema)
 {
     OrcProto.Type.Builder type = OrcProto.Type.CreateBuilder();
     IList<TypeDescription> children = schema.getChildren();
     switch (schema.getCategory())
     {
         case Category.BOOLEAN:
             type.Kind = OrcProto.Type.Types.Kind.BOOLEAN;
             break;
         case Category.BYTE:
             type.Kind = OrcProto.Type.Types.Kind.BYTE;
             break;
         case Category.SHORT:
             type.Kind = OrcProto.Type.Types.Kind.SHORT;
             break;
         case Category.INT:
             type.Kind = OrcProto.Type.Types.Kind.INT;
             break;
         case Category.LONG:
             type.Kind = OrcProto.Type.Types.Kind.LONG;
             break;
         case Category.FLOAT:
             type.Kind = OrcProto.Type.Types.Kind.FLOAT;
             break;
         case Category.DOUBLE:
             type.Kind = OrcProto.Type.Types.Kind.DOUBLE;
             break;
         case Category.STRING:
             type.Kind = OrcProto.Type.Types.Kind.STRING;
             break;
         case Category.CHAR:
             type.Kind = OrcProto.Type.Types.Kind.CHAR;
             type.MaximumLength = (uint)schema.getMaxLength();
             break;
         case Category.VARCHAR:
             type.Kind = OrcProto.Type.Types.Kind.VARCHAR;
             type.MaximumLength = (uint)schema.getMaxLength();
             break;
         case Category.BINARY:
             type.Kind = OrcProto.Type.Types.Kind.BINARY;
             break;
         case Category.TIMESTAMP:
             type.Kind = OrcProto.Type.Types.Kind.TIMESTAMP;
             break;
         case Category.DATE:
             type.Kind = OrcProto.Type.Types.Kind.DATE;
             break;
         case Category.DECIMAL:
             type.Kind = OrcProto.Type.Types.Kind.DECIMAL;
             type.Precision = (uint)schema.getPrecision();
             type.Scale = (uint)schema.getScale();
             break;
         case Category.LIST:
             type.Kind = OrcProto.Type.Types.Kind.LIST;
             type.AddSubtypes((uint)children[0].getId());
             break;
         case Category.MAP:
             type.Kind = OrcProto.Type.Types.Kind.MAP;
             foreach (TypeDescription t in children)
             {
                 type.AddSubtypes((uint)t.getId());
             }
             break;
         case Category.STRUCT:
             type.Kind = OrcProto.Type.Types.Kind.STRUCT;
             foreach (TypeDescription t in children)
             {
                 type.AddSubtypes((uint)t.getId());
             }
             foreach (string field in schema.getFieldNames())
             {
                 type.AddFieldNames(field);
             }
             break;
         case Category.UNION:
             type.Kind = OrcProto.Type.Types.Kind.UNION;
             foreach (TypeDescription t in children)
             {
                 type.AddSubtypes((uint)t.getId());
             }
             break;
         default:
             throw new ArgumentException("Unknown category: " +
               schema.getCategory());
     }
     builder.AddTypes(type);
     if (children != null)
     {
         foreach (TypeDescription child in children)
         {
             writeTypes(builder, child);
         }
     }
 }
Example #2
0
 /**
  * Returns selected columns as a bool array with true value set for specified column names.
  * The result will contain number of elements equal to flattened number of columns.
  * For example:
  * selectedColumns - a,b,c
  * allColumns - a,b,c,d
  * If column c is a complex type, say list<string> and other types are primitives then result will
  * be [false, true, true, true, true, true, false]
  * Index 0 is the root element of the struct which is set to false by default, index 1,2
  * corresponds to columns a and b. Index 3,4 correspond to column c which is list<string> and
  * index 5 correspond to column d. After flattening list<string> gets 2 columns.
  *
  * @param selectedColumns - comma separated list of selected column names
  * @param schema       - object schema
  * @return - bool array with true value set for the specified column names
  */
 public static bool[] includeColumns(string selectedColumns, TypeDescription schema)
 {
     int numFlattenedCols = schema.getMaximumId();
     bool[] results = new bool[numFlattenedCols + 1];
     if ("*".Equals(selectedColumns))
     {
         for (int i = 0; i < results.Length; i++)
         {
             results[i] = true;
         }
         return results;
     }
     if (selectedColumns != null &&
         schema.getCategory() == Category.STRUCT)
     {
         IList<string> fieldNames = schema.getFieldNames();
         IList<TypeDescription> fields = schema.getChildren();
         foreach (string column in selectedColumns.Split((',')))
         {
             TypeDescription col = findColumn(column, fieldNames, fields);
             if (col != null)
             {
                 for (int i = col.getId(); i <= col.getMaximumId(); ++i)
                 {
                     results[i] = true;
                 }
             }
         }
     }
     return results;
 }
Example #3
0
 private static void appendOrcTypes(List<OrcProto.Type> result, TypeDescription typeDescr)
 {
     OrcProto.Type.Builder type = OrcProto.Type.CreateBuilder();
     IList<TypeDescription> children = typeDescr.getChildren();
     switch (typeDescr.getCategory())
     {
         case Category.BOOLEAN:
             type.SetKind(OrcProto.Type.Types.Kind.BOOLEAN);
             break;
         case Category.BYTE:
             type.SetKind(OrcProto.Type.Types.Kind.BYTE);
             break;
         case Category.SHORT:
             type.SetKind(OrcProto.Type.Types.Kind.SHORT);
             break;
         case Category.INT:
             type.SetKind(OrcProto.Type.Types.Kind.INT);
             break;
         case Category.LONG:
             type.SetKind(OrcProto.Type.Types.Kind.LONG);
             break;
         case Category.FLOAT:
             type.SetKind(OrcProto.Type.Types.Kind.FLOAT);
             break;
         case Category.DOUBLE:
             type.SetKind(OrcProto.Type.Types.Kind.DOUBLE);
             break;
         case Category.STRING:
             type.SetKind(OrcProto.Type.Types.Kind.STRING);
             break;
         case Category.CHAR:
             type.SetKind(OrcProto.Type.Types.Kind.CHAR);
             type.SetMaximumLength((uint)typeDescr.getMaxLength());
             break;
         case Category.VARCHAR:
             type.SetKind(OrcProto.Type.Types.Kind.VARCHAR);
             type.SetMaximumLength((uint)typeDescr.getMaxLength());
             break;
         case Category.BINARY:
             type.SetKind(OrcProto.Type.Types.Kind.BINARY);
             break;
         case Category.TIMESTAMP:
             type.SetKind(OrcProto.Type.Types.Kind.TIMESTAMP);
             break;
         case Category.DATE:
             type.SetKind(OrcProto.Type.Types.Kind.DATE);
             break;
         case Category.DECIMAL:
             type.SetKind(OrcProto.Type.Types.Kind.DECIMAL);
             type.SetPrecision((uint)typeDescr.getPrecision());
             type.SetScale((uint)typeDescr.getScale());
             break;
         case Category.LIST:
             type.SetKind(OrcProto.Type.Types.Kind.LIST);
             type.AddSubtypes((uint)children[0].getId());
             break;
         case Category.MAP:
             type.SetKind(OrcProto.Type.Types.Kind.MAP);
             foreach (TypeDescription t in children)
             {
                 type.AddSubtypes((uint)t.getId());
             }
             break;
         case Category.STRUCT:
             type.SetKind(OrcProto.Type.Types.Kind.STRUCT);
             foreach (TypeDescription t in children)
             {
                 type.AddSubtypes((uint)t.getId());
             }
             foreach (string field in typeDescr.getFieldNames())
             {
                 type.AddFieldNames(field);
             }
             break;
         case Category.UNION:
             type.SetKind(OrcProto.Type.Types.Kind.UNION);
             foreach (TypeDescription t in children)
             {
                 type.AddSubtypes((uint)t.getId());
             }
             break;
         default:
             throw new ArgumentException("Unknown category: " + typeDescr.getCategory());
     }
     result.Add(type.Build());
     if (children != null)
     {
         foreach (TypeDescription child in children)
         {
             appendOrcTypes(result, child);
         }
     }
 }
Example #4
0
        /**
         * NOTE: This method ignores the subtype numbers in the TypeDescription rebuilds the subtype
         * numbers based on the length of the result list being appended.
         *
         * @param result
         * @param typeInfo
         */
        public static void appendOrcTypesRebuildSubtypes(
            IList<OrcProto.Type> result,
            TypeDescription typeDescr)
        {
            int subtype = result.Count;
            OrcProto.Type.Builder type = OrcProto.Type.CreateBuilder();
            bool needsAdd = true;
            IList<TypeDescription> children = typeDescr.getChildren();
            switch (typeDescr.getCategory())
            {
                case Category.BOOLEAN:
                    type.SetKind(OrcProto.Type.Types.Kind.BOOLEAN);
                    break;
                case Category.BYTE:
                    type.SetKind(OrcProto.Type.Types.Kind.BYTE);
                    break;
                case Category.SHORT:
                    type.SetKind(OrcProto.Type.Types.Kind.SHORT);
                    break;
                case Category.INT:
                    type.SetKind(OrcProto.Type.Types.Kind.INT);
                    break;
                case Category.LONG:
                    type.SetKind(OrcProto.Type.Types.Kind.LONG);
                    break;
                case Category.FLOAT:
                    type.SetKind(OrcProto.Type.Types.Kind.FLOAT);
                    break;
                case Category.DOUBLE:
                    type.SetKind(OrcProto.Type.Types.Kind.DOUBLE);
                    break;
                case Category.STRING:
                    type.SetKind(OrcProto.Type.Types.Kind.STRING);
                    break;
                case Category.CHAR:
                    type.SetKind(OrcProto.Type.Types.Kind.CHAR);
                    type.SetMaximumLength((uint)typeDescr.getMaxLength());
                    break;
                case Category.VARCHAR:
                    type.SetKind(OrcProto.Type.Types.Kind.VARCHAR);
                    type.SetMaximumLength((uint)typeDescr.getMaxLength());
                    break;
                case Category.BINARY:
                    type.SetKind(OrcProto.Type.Types.Kind.BINARY);
                    break;
                case Category.TIMESTAMP:
                    type.SetKind(OrcProto.Type.Types.Kind.TIMESTAMP);
                    break;
                case Category.DATE:
                    type.SetKind(OrcProto.Type.Types.Kind.DATE);
                    break;
                case Category.DECIMAL:
                    type.SetKind(OrcProto.Type.Types.Kind.DECIMAL);
                    type.SetPrecision((uint)typeDescr.getPrecision());
                    type.SetScale((uint)typeDescr.getScale());
                    break;
                case Category.LIST:
                    type.SetKind(OrcProto.Type.Types.Kind.LIST);
                    type.AddSubtypes((uint)++subtype);
                    result.Add(type.Build());
                    needsAdd = false;
                    appendOrcTypesRebuildSubtypes(result, children[0]);
                    break;
                case Category.MAP:
                    {
                        // Make room for MAP type.
                        result.Add(null);

                        // Add MAP type pair in order to determine their subtype values.
                        appendOrcTypesRebuildSubtypes(result, children[0]);
                        int subtype2 = result.Count;
                        appendOrcTypesRebuildSubtypes(result, children[1]);
                        type.SetKind(OrcProto.Type.Types.Kind.MAP);
                        type.AddSubtypes((uint)subtype + 1);
                        type.AddSubtypes((uint)subtype2);
                        result[subtype] = type.Build();
                        needsAdd = false;
                    }
                    break;
                case Category.STRUCT:
                    {
                        IList<String> fieldNames = typeDescr.getFieldNames();

                        // Make room for STRUCT type.
                        result.Add(null);

                        List<int> fieldSubtypes = new List<int>(fieldNames.Count);
                        foreach (TypeDescription child in children)
                        {
                            int fieldSubtype = result.Count;
                            fieldSubtypes.Add(fieldSubtype);
                            appendOrcTypesRebuildSubtypes(result, child);
                        }

                        type.SetKind(OrcProto.Type.Types.Kind.STRUCT);

                        for (int i = 0; i < fieldNames.Count; i++)
                        {
                            type.AddSubtypes((uint)fieldSubtypes[i]);
                            type.AddFieldNames(fieldNames[i]);
                        }
                        result[subtype] = type.Build();
                        needsAdd = false;
                    }
                    break;
                case Category.UNION:
                    {
                        // Make room for UNION type.
                        result.Add(null);

                        List<int> unionSubtypes = new List<int>(children.Count);
                        foreach (TypeDescription child in children)
                        {
                            int unionSubtype = result.Count;
                            unionSubtypes.Add(unionSubtype);
                            appendOrcTypesRebuildSubtypes(result, child);
                        }

                        type.SetKind(OrcProto.Type.Types.Kind.UNION);
                        for (int i = 0; i < children.Count; i++)
                        {
                            type.AddSubtypes((uint)unionSubtypes[i]);
                        }
                        result[subtype] = type.Build();
                        needsAdd = false;
                    }
                    break;
                default:
                    throw new ArgumentException("Unknown category: " + typeDescr.getCategory());
            }
            if (needsAdd)
            {
                result.Add(type.Build());
            }
        }