private void ReadByteArray(byte[] data, SchemaElement schemaElement, IList destination)
 {
     // Both UTF8 and JSON are stored as binary data (byte_array) which allows annotations to be used either UTF8 and JSON
     // They should be treated in the same way as Strings
     // need to find a better implementation for this but date strings are always broken here because of the type mismatch
     if (schemaElement.IsAnnotatedWith(Thrift.ConvertedType.UTF8) ||
         schemaElement.IsAnnotatedWith(Thrift.ConvertedType.JSON) ||
         _options.TreatByteArrayAsString)
     {
         for (int i = 0; i < data.Length;)
         {
             int length = BitConverter.ToInt32(data, i);
             i += 4;      //fast-forward to data
             string s = UTF8.GetString(data, i, length);
             i += length; //fast-forward to the next element
             destination.Add(s);
         }
     }
     else
     {
         for (int i = 0; i < data.Length;)
         {
             int length = BitConverter.ToInt32(data, i);
             i += 4;   //fast-forward to data
             byte[] ar = new byte[length];
             Array.Copy(data, i, ar, 0, length);
             i += length; //fast-forward to the next element
             destination.Add(ar);
         }
     }
 }
        private static void ReadFixedLenByteArray(byte[] data, SchemaElement schema, IList destination)
        {
            if (schema.IsAnnotatedWith(Thrift.ConvertedType.DECIMAL))
            {
                int    typeLength = schema.Thrift.Type_length;
                byte[] itemData   = ByteGarbage.GetByteArray(typeLength);
                for (int i = 0; i < data.Length; i += typeLength)
                {
                    Array.Copy(data, i, itemData, 0, typeLength);

                    decimal dc = new BigDecimal(itemData, schema.Thrift);
                    destination.Add(dc);
                }
            }
            else if (schema.IsAnnotatedWith(Thrift.ConvertedType.INTERVAL))
            {
                for (int i = 0; i < data.Length; i += schema.Thrift.Type_length)
                {
                    // assume this is the number of months / days / millis offset from the Julian calendar
                    //todo: optimize allocations
                    byte[] months = new byte[4];
                    byte[] days   = new byte[4];
                    byte[] millis = new byte[4];
                    Array.Copy(data, i, months, 0, 4);
                    Array.Copy(data, i + 4, days, 0, 4);
                    Array.Copy(data, i + 8, millis, 0, 4);
                    destination.Add(new Interval(
                                        BitConverter.ToInt32(months, 0),
                                        BitConverter.ToInt32(days, 0),
                                        BitConverter.ToInt32(millis, 0)));
                }
            }
        }
 private static void WriteInt32(BinaryWriter writer, SchemaElement schema, IList data)
 {
    if (schema.IsAnnotatedWith(Thrift.ConvertedType.DATE))
    {
       var dataTyped = (List<DateTimeOffset>)data;
       foreach(DateTimeOffset el in dataTyped)
       {
          int days = (int)el.ToUnixDays();
          writer.Write(days + 1);
       }
    }
    else if (schema.IsAnnotatedWith(Thrift.ConvertedType.DECIMAL))
    {
       var dataTyped = (List<decimal>)data;
       double scaleFactor = Math.Pow(10, schema.Thrift.Scale);
       foreach (decimal d in dataTyped)
       {
          try
          {
             int i = (int) (d * (decimal) scaleFactor);
             writer.Write(i);
          }
          catch (OverflowException)
          {
             throw new ParquetException(
                $"value '{d}' is too large to fit into scale {schema.Thrift.Scale} and precision {schema.Thrift.Precision}");
          }
       }
    }
    else if (schema.IsAnnotatedWith(Thrift.ConvertedType.INT_8))
    {
       var dataTyped = (List<byte>)data;
       foreach (byte byteValue in dataTyped)
       {
          writer.Write(byteValue);
       }
    }
    else if (schema.IsAnnotatedWith(Thrift.ConvertedType.UINT_8))
    {
       var dataTyped = (List<sbyte>)data;
       foreach (sbyte byteValue in dataTyped)
       {
          writer.Write(byteValue);
       }
    }
    else
    {
       var dataTyped = (List<int>)data;
       foreach (int el in dataTyped)
       {
          writer.Write(el);
       }
    }
 }
        private static void ReadLong(byte[] data, SchemaElement schema, IList destination)
        {
            if (schema.ElementType == typeof(DateTimeOffset))
            {
                var lst = (List <DateTimeOffset>)destination;

                for (int i = 0; i < data.Length; i += 8)
                {
                    long lv = BitConverter.ToInt64(data, i);
                    lst.Add(lv.FromUnixTime());
                }
            }
            else if (schema.IsAnnotatedWith(Thrift.ConvertedType.DECIMAL))
            {
                decimal scaleFactor = (decimal)Math.Pow(10, -schema.Thrift.Scale);
                for (int i = 0; i < data.Length; i += 8)
                {
                    long    lv = BitConverter.ToInt64(data, i);
                    decimal dv = lv * scaleFactor;
                    destination.Add(dv);
                }
            }
            else
            {
                for (int i = 0; i < data.Length; i += 8)
                {
                    long lv = BitConverter.ToInt64(data, i);
                    destination.Add(lv);
                }
            }
        }
Example #5
0
 private static void ReadInt32(byte[] data, SchemaElement schema, IList destination)
 {
     if (schema.IsAnnotatedWith(Thrift.ConvertedType.DATE))
     {
         for (int i = 0; i < data.Length; i += 4)
         {
             int iv = BitConverter.ToInt32(data, i);
             destination.Add(new DateTimeOffset(iv.FromUnixTime(), TimeSpan.Zero));
         }
     }
     else if (schema.IsAnnotatedWith(Thrift.ConvertedType.DECIMAL))
     {
         decimal scaleFactor = (decimal)Math.Pow(10, -schema.Thrift.Scale);
         for (int i = 0; i < data.Length; i += 4)
         {
             int     iv = BitConverter.ToInt32(data, i);
             decimal dv = iv * scaleFactor;
             destination.Add(dv);
         }
     }
     else if (schema.IsAnnotatedWith(Thrift.ConvertedType.INT_8))
     {
         foreach (byte byteValue in data)
         {
             destination.Add(byteValue);
         }
     }
     else if (schema.IsAnnotatedWith(Thrift.ConvertedType.UINT_8))
     {
         foreach (byte byteValue in data)
         {
             destination.Add(Convert.ToSByte(byteValue));
         }
     }
     else
     {
         for (int i = 0; i < data.Length; i += 4)
         {
             int iv = BitConverter.ToInt32(data, i);
             destination.Add(iv);
         }
     }
 }
        private static void ReadFixedLenByteArray(byte[] data, SchemaElement schema, IList destination)
        {
            for (int i = 0; i < data.Length; i += schema.Thrift.Type_length)
            {
                if (!schema.IsAnnotatedWith(Thrift.ConvertedType.DECIMAL))
                {
                    continue;
                }
                // go from data - decimal needs to be 16 bytes but not from Spark - variable fixed nonsense
                byte[] dataNew = new byte[schema.Thrift.Type_length];
                Array.Copy(data, i, dataNew, 0, schema.Thrift.Type_length);
                var bigInt = new BigDecimal(new BigInteger(dataNew.Reverse().ToArray()), schema.Thrift.Scale, schema.Thrift.Precision);

                decimal dc = (decimal)bigInt;
                destination.Add(dc);
            }
        }
 private static void ReadInt32(byte[] data, SchemaElement schema, IList destination)
 {
     if (schema.IsAnnotatedWith(Thrift.ConvertedType.DATE))
     {
         for (int i = 0; i < data.Length; i += 4)
         {
             int iv = BitConverter.ToInt32(data, i);
             destination.Add(new DateTimeOffset(iv.FromUnixTime(), TimeSpan.Zero));
         }
     }
     else
     {
         for (int i = 0; i < data.Length; i += 4)
         {
             int iv = BitConverter.ToInt32(data, i);
             destination.Add(iv);
         }
     }
 }
 private static void WriteLong(BinaryWriter writer, SchemaElement schema, IList data)
 {
     if (schema.IsAnnotatedWith(Thrift.ConvertedType.TIMESTAMP_MILLIS))
     {
         var lst = (List <DateTimeOffset>)data;
         foreach (DateTimeOffset dto in lst)
         {
             long unixTime = dto.ToUnixTime();
             writer.Write(unixTime);
         }
     }
     else
     {
         var lst = (List <long>)data;
         foreach (long l in lst)
         {
             writer.Write(l);
         }
     }
 }
 private static void WriteInt32(BinaryWriter writer, SchemaElement schema, IList data)
 {
     if (schema.IsAnnotatedWith(Thrift.ConvertedType.DATE))
     {
         var dataTyped = (List <DateTimeOffset>)data;
         foreach (DateTimeOffset el in dataTyped)
         {
             int days = (int)el.ToUnixDays();
             writer.Write(days);
         }
     }
     else
     {
         var dataTyped = (List <int>)data;
         foreach (int el in dataTyped)
         {
             writer.Write(el);
         }
     }
 }
        private static void ReadLong(byte[] data, SchemaElement schema, IList destination)
        {
            if (schema.IsAnnotatedWith(Thrift.ConvertedType.TIMESTAMP_MILLIS))
            {
                var lst = (List <DateTimeOffset>)destination;

                for (int i = 0; i < data.Length; i += 8)
                {
                    long lv = BitConverter.ToInt64(data, i);
                    lst.Add(lv.FromUnixTime());
                }
            }
            else
            {
                for (int i = 0; i < data.Length; i += 8)
                {
                    long lv = BitConverter.ToInt64(data, i);
                    destination.Add(lv);
                }
            }
        }