public override IRow Process(IRow input, IUpdatableRow output)
        {
            var s = input.Get <string>("name");

            output.Set <string>("reversed", Reverse(s));
            return(output.AsReadOnly());
        }
Beispiel #2
0
        /// https://docs.microsoft.com/en-us/azure/data-lake-analytics/data-lake-analytics-u-sql-programmability-guide#use-user-defined-extractors
        /// <summary>Extract is called at least once per vertex</summary>
        /// <param name="input">Wrapper for a Stream</param>
        /// <param name="output">IUpdatableRow uses a mutable builder pattern --
        /// set individual fields with IUpdatableRow.Set,
        /// then build an immutable IRow by calling IUpdatableRow.AsReadOnly.</param>
        /// <returns>A sequence of IRows.</returns>
        public override IEnumerable <IRow> Extract(IUnstructuredReader input, IUpdatableRow output)
        {
            // use XML Reader for streaming the XML to keep memory usage to a minimum
            using (XmlReader reader = XmlReader.Create(input.BaseStream))
            {
                reader.MoveToContent();

                // forward reader to next available Element
                while (reader.ReadToFollowing(this.elementName))
                {
                    // decouple from reader position with new subtreeReader
                    // this prevents reader.ReadToFollowing() from skipping rows as its not forwarded now by ReadOuterXml()
                    using (XmlReader subtreeReader = reader.ReadSubtree())
                    {
                        subtreeReader.MoveToContent();

                        // Replace CRLF & CR & LF character (\r\n) by space ( ) within the XML to ensure the string fits in 1 row
                        output.Set <string>(0,
                                            XElement.Parse(subtreeReader.ReadOuterXml()).
                                            ToString(SaveOptions.DisableFormatting).
                                            Replace("\r\n", " ").Replace('\n', ' ').Replace('\r', ' '));

                        // then call output.AsReadOnly to build an immutable IRow.
                        yield return(output.AsReadOnly());
                    }
                }
            }
        }
Beispiel #3
0
        public static void ExtractPropertyDoubleOpt(JsonTextReader jsonReader, IUpdatableRow output, ColumnInfo columnInfo)
        {
            jsonReader.Read();

            if (columnInfo.IsRequired)
            {
                switch (jsonReader.TokenType)
                {
                case JsonToken.Integer:
                    output.Set(columnInfo.Idx, (float)(long)jsonReader.Value);
                    break;

                case JsonToken.Float:
                    output.Set(columnInfo.Idx, (float)(double)jsonReader.Value);
                    break;

                case JsonToken.Null:
                    output.Set(columnInfo.Idx, (double?)null);
                    break;

                default:
                    throw new Exception("wrong data type");
                }
            }
        }
Beispiel #4
0
        protected virtual IEnumerable <IRow> Extract(Stream inputStream, IUpdatableRow output)
        {
            // Json.Net
            using (var reader = new JsonTextReader(new StreamReader(inputStream)))
            {
                // Parse Json one token at a time
                if (!reader.Read())
                {
                    yield break;
                }
                if (reader.TokenType != JsonToken.StartObject)
                {
                    yield break;
                }
                var token = JToken.Load(reader);

                // Rows
                //  All objects are represented as rows
                foreach (JObject o in SelectChildren(token, this.rowpath))
                {
                    // All fields are represented as columns
                    this.JObjectToRow(o, output);

                    yield return(output.AsReadOnly());
                }
            }
        }
        public override IEnumerable <IRow> Extract(IUnstructuredReader input, IUpdatableRow output)
        {
            string id;
            string from;
            string to;

            // 1. Collect partition informations.
            using (var reader = new StreamReader(input.BaseStream))
            {
                string line  = reader.ReadLine();
                var    parts = line.Split('\t');
                id   = parts[0];
                from = parts[1];
                to   = parts[2];
            }

            // 2. Read data source using partition information.
            using (var reader = ProviderFactory.CreateInstance(_cnxString, from, to))
            {
                foreach (var row in reader.Rows)
                {
                    output.Set("extractor_id", _id);
                    output.Set("partition_id", id);
                    output.Set("partition", row[0]);
                    output.Set("value1", row[1]);
                    output.Set("value2", row[2]);

                    yield return(output.AsReadOnly());
                }
            }

            // Add some latency to data read.
            Thread.Sleep(10000);
        }
Beispiel #6
0
        public override IRow Process(IRow inRow, IUpdatableRow outRow)
        {
            var row            = (ScopeEngineManaged.SqlIpRow)inRow;
            var output         = (ScopeEngineManaged.SqlIpUpdatableRow)outRow;
            int exceptionIndex = 0;

            try
            {
                System.Int32  col_COLOR_ID       = row.GetInternal <System.Int32>(0);
                System.String col_COLOR_NAME     = row.GetInternal <System.String>(1);
                System.String col_COLOR_RGB      = row.GetInternal <System.String>(2);
                System.String col_IS_TRANSPARENT = row.GetInternal <System.String>(3);
                output.SetInternal(0, col_COLOR_NAME.ToUpper());
                exceptionIndex++;
                output.SetInternal(1, col_IS_TRANSPARENT.ToUpper() == "T" ? "Y" : "N");
                exceptionIndex++;
                output.SetInternal(2, col_COLOR_ID);
                exceptionIndex++;
                output.SetInternal(3, col_COLOR_RGB);
                exceptionIndex++;
            }
            catch (Exception exception)
            {
                ScopeEngineManaged.UserExceptionHelper.WrapUserExpressionException(exceptionsInfo[exceptionIndex], ScopeEngineManaged.SqlHelper.Dump(row), exception);
            }
            return(output.AsReadOnly());
        }
Beispiel #7
0
            public override IEnumerable <IRow> Apply(IRow input, IUpdatableRow output)
            {
                DateTime startTime     = input.Get <DateTime>(startColumn);
                DateTime endTime       = input.Get <DateTime>(endColumn);
                var      startValueCol = (from x in input.Schema where x.Name == startValueColumn select x).First();

                if (startValueCol.Type == typeof(bool))
                {
                    var startValue = input.Get <bool>(startValueColumn);
                    return(locf <bool>(startTime, endTime, startValue, output));
                }
                else if (startValueCol.Type == typeof(int))
                {
                    var startValue = input.Get <int>(startValueColumn);
                    return(locf <int>(startTime, endTime, startValue, output));
                }
                else if (startValueCol.Type == typeof(double))
                {
                    var startValue = input.Get <double>(startValueColumn);
                    return(locf <double>(startTime, endTime, startValue, output));
                }
                else if (startValueCol.Type == typeof(string))
                {
                    var startValue = input.Get <string>(startValueColumn);
                    return(locf <string>(startTime, endTime, startValue, output));
                }
                else
                {
                    return(null);
                }
            }
        public override IEnumerable <IRow> Extract(IUnstructuredReader input, IUpdatableRow output)
        {
            if (input.Length == 0)
            {
                yield break;
            }

            var serializer = AvroSerializer.CreateGeneric(avroSchema);

            using (var genericReader = AvroContainer.CreateGenericReader(input.BaseStream))
            {
                using (var reader = new SequentialReader <dynamic>(genericReader))
                {
                    foreach (var obj in reader.Objects)
                    {
                        foreach (var column in output.Schema)
                        {
                            output.Set(column.Name, obj[column.Name]);
                        }

                        yield return(output.AsReadOnly());
                    }
                }
            }
        }
 public override IRow Process(IRow input, IUpdatableRow output)
 {
     output.Set <int>("DepID", input.Get <int>("DepID"));
     output.Set <string>("DepName", input.Get <string>("DepName"));
     output.Set <string>("HelloWorld", hw);
     return(output.AsReadOnly());
 }
        public override IEnumerable <IRow> Extract(IUnstructuredReader input, IUpdatableRow output)
        {
            string line;

            //Read the input line by line
            foreach (Stream current in input.Split(_row_delim))
            {
                using (StreamReader streamReader = new StreamReader(current, this._encoding))
                {
                    line = streamReader.ReadToEnd().Trim();

                    LogRowParser   splitter = new LogRowParser();
                    LogRowElements parts    = new LogRowElements();

                    parts = splitter.ParseElements(line);

                    output.Set <string>(0, parts.IP);
                    output.Set <string>(1, parts.Identity);
                    output.Set <string>(2, parts.UserId);
                    output.Set <string>(3, parts.Timestamp);
                    output.Set <string>(4, parts.Offset);
                    output.Set <string>(5, parts.RequestMessage);
                    output.Set <string>(6, parts.StatusCode);
                    output.Set <string>(7, parts.Size);
                    output.Set <string>(8, parts.Referer);
                    output.Set <string>(9, parts.URL);
                    output.Set <string>(10, parts.UserAgent);
                    output.Set <string>(11, parts.Forwarded);

                    yield return(output.AsReadOnly());
                }
            }
        }
Beispiel #11
0
        public override IRow Process(IRow input, IUpdatableRow output)
        {
            string UserID     = input.Get <string>("UserID");
            string Name       = input.Get <string>("Name");
            string Address    = input.Get <string>("Address");
            string City       = input.Get <string>("City");
            string State      = input.Get <string>("State");
            string PostalCode = input.Get <string>("PostalCode");
            string Country    = input.Get <string>("Country");
            string Phone      = input.Get <string>("Phone");

            if (CountryTranslation.Keys.Contains(Country))
            {
                Country = CountryTranslation[Country];
            }
            output.Set <string>(0, UserID);
            output.Set <string>(1, Name);
            output.Set <string>(2, Address);
            output.Set <string>(3, City);
            output.Set <string>(4, State);
            output.Set <string>(5, PostalCode);
            output.Set <string>(6, Country);
            output.Set <string>(7, Phone);
            return(output.AsReadOnly());
        }
Beispiel #12
0
        protected virtual void LineToRow(string line, IUpdatableRow row)
        {
            int index = 0;

            foreach (var map in _fieldMap)
            {
                if (line.Length < map.Key + map.Value)
                {
                    index++;
                    continue;
                }

                if (index < row.Schema.Count && row.Schema[index].Type != typeof(string))
                {
                    var typeConverter = TypeDescriptor.GetConverter(row.Schema[index].Type);
                    if (typeConverter != null && typeConverter.CanConvertFrom(typeof(string)))
                    {
                        row.Set(index, typeConverter.ConvertFromString(line.Substring(map.Key, map.Value)));
                    }
                }
                else
                {
                    row.Set(index, line.Substring(map.Key, map.Value));
                }
                index++;
            }
        }
Beispiel #13
0
        public override IRow Process(IRow inRow, IUpdatableRow outRow)
        {
            var row            = (ScopeEngineManaged.SqlIpRow)inRow;
            var output         = (ScopeEngineManaged.SqlIpUpdatableRow)outRow;
            int exceptionIndex = 0;

            try
            {
                System.String col_location = row.GetInternal <System.String>(0);
                System.String col_device   = row.GetInternal <System.String>(1);
                System.String col_custom   = row.GetInternal <System.String>(2);
                output.SetInternal(0, Microsoft.Analytics.Samples.Formats.Json.JsonFunctions.JsonTuple(col_location));
                exceptionIndex++;
                output.SetInternal(1, Microsoft.Analytics.Samples.Formats.Json.JsonFunctions.JsonTuple(col_device));
                exceptionIndex++;
                output.SetInternal(2, Microsoft.Analytics.Samples.Formats.Json.JsonFunctions.JsonTuple(col_custom, "dimensions[0]"));
                exceptionIndex++;
                output.SetInternal(3, Microsoft.Analytics.Samples.Formats.Json.JsonFunctions.JsonTuple(col_custom, "dimensions[1]"));
                exceptionIndex++;
                output.SetInternal(4, Microsoft.Analytics.Samples.Formats.Json.JsonFunctions.JsonTuple(col_custom, "dimensions[2]"));
                exceptionIndex++;
            }
            catch (Exception exception)
            {
                ScopeEngineManaged.UserExceptionHelper.WrapUserExpressionException(exceptionsInfo[exceptionIndex], ScopeEngineManaged.SqlHelper.Dump(row), exception);
            }
            return(output.AsReadOnly());
        }
Beispiel #14
0
        public override IEnumerable <IRow> Extract(IUnstructuredReader input, IUpdatableRow output)
        {
            string line;

            using (StreamReader streamReader = new StreamReader(input.BaseStream, Encoding.UTF8))
            {
                while ((line = streamReader.ReadLine()) != null)
                {
                    var jObject = JsonConvert.DeserializeObject <JObject>(line);
                    foreach (var column in output.Schema)
                    {
                        if (column.Type == typeof(string))
                        {
                            output.Set(column.Name, jObject[column.Name].ToString());
                        }
                        if (column.Type == typeof(DateTime))
                        {
                            output.Set(column.Name, (DateTime.Parse(jObject[column.Name].ToString())));
                        }
                    }

                    yield return(output.AsReadOnly());
                }
            }

            yield break;
        }
Beispiel #15
0
        public override IEnumerable <IRow> Extract(IUnstructuredReader input, IUpdatableRow outputrow)
        {
            outputrow.Set <long>("GC_TotalMem_Start", GC.GetTotalMemory(true));
            outputrow.Set <long>("MaxUDOMemory", MyLimits.MaxUdoMemory);

            var buff_idx = 0;
            var failed   = false;
            var gc_mem   = GC.GetTotalMemory(true);

            try
            {
                while (buff_idx < no_buff)
                {
                    alloc_mem[buff_idx]    = new byte[increment];
                    alloc_mem[buff_idx][0] = 1; // to avoid it being optimized away
                    buff_idx++;
                    gc_mem = GC.GetTotalMemory(true);
                }
            }
            catch (Exception e)
            {
                failed = true;
                outputrow.Set <string>("error", e.Message);
            }
            outputrow.Set <long>("GC_TotalMem_End", gc_mem);
            outputrow.Set <bool>("failed", failed);
            outputrow.Set <long>("alloc_sz", buff_idx * increment);

            yield return(outputrow.AsReadOnly());
        }
Beispiel #16
0
 public static void Set <T>(this IUpdatableRow row, ColumnInfo columnInfo, T value)
 {
     if (columnInfo.IsRequired)
     {
         row.Set(columnInfo.Idx, value);
     }
 }
Beispiel #17
0
        public override IEnumerable <IRow> Reduce(IRowset input, IUpdatableRow output)
        {
            int acc = 0;
            int max = 0;

            foreach (var row in input.Rows)
            {
                var timestamp = row.Get <DateTime>("timestamp");
                var op        = row.Get <string>("op");
                if (op == "start")
                {
                    acc++;
                }
                else
                {
                    acc--;
                    if (acc < 0)
                    {
                        acc = 0;
                    }
                }

                max = System.Math.Max(max, acc);
            }

            output.Set <string>("cohort", "FOO");
            output.Set <int>("max", max);

            yield return(output.AsReadOnly());
        }
Beispiel #18
0
        public override IRow Process(IRow input, IUpdatableRow output)
        {
            double      lat = input.Get <double>(latColumn);
            double      lon = input.Get <double>(lonColumn);
            GeoLocation loc = new GeoLocation {
                Longitude = lon, Latitude = lat
            };
            var country  = _service.FindCountry(loc);
            var USstates = _service.FindUsaState(loc);

            if (country != null && country.Name != null)
            {
                output.Set <string>("country", country.Name);
            }
            else
            {
                output.Set <string>("country", "");
            }
            if (USstates != null && USstates.Name != null)
            {
                output.Set <string>("USstates", USstates.Name);
            }
            else
            {
                output.Set <string>("USstates", "");
            }
            return(output.AsReadOnly());
        }
        public void TestMyProcessor()
        {
            //Schema: "a:int, b:int"
            USqlColumn <int> col1    = new USqlColumn <int>("a");
            USqlColumn <int> col2    = new USqlColumn <int>("b");
            List <IColumn>   columns = new List <IColumn> {
                col1, col2
            };
            USqlSchema schema = new USqlSchema(columns);

            //Generate one row with specified column values
            object[] values = new object[2] {
                2, 3
            };
            IRow          input  = new USqlRow(schema, values);
            IUpdatableRow output = input.AsUpdatable();

            //Create UDO instance
            MyProcessor processor = new MyProcessor(floor: 4);
            IRow        newOutput = processor.Process(input, output);

            //Verify results
            Assert.IsTrue(newOutput.Schema.Count == 2);
            Assert.IsTrue(newOutput.Get <int>(0) == 2);
            Assert.IsTrue(newOutput.Get <int>(1) == 4);
        }
Beispiel #20
0
        public override IRow Process(IRow input, IUpdatableRow output)
        {
            var tag = input.Get<string>("Tag");
            var category = input.Get<string>("Category");
            category = "other";
            foreach (var cat in categoryMapper)
            {
                var categoryName = cat.Key;
                var listOfPrefixes = cat.Value;
                var found = false;
                foreach (var pref in listOfPrefixes)
                {
                    if (tag.StartsWith(pref))
                    {
                        category = categoryName;
                        found = true;
                        break;
                    }
                }

                if (found)
                {
                    break;
                }
            }
            output.Set("Category", category);
            return output.AsReadOnly();
        }
        /// <summary>Extract is called at least once per vertex</summary>
        /// <param name="input">Wrapper for a Stream</param>
        /// <param name="output">IUpdatableRow uses a mutable builder pattern --
        /// set individual fields with IUpdatableRow.Set, then build an immutable IRow by
        /// calling IUpdatableRow.AsReadOnly.</param>
        /// <returns>A sequence of IRows.</returns>
        public override IEnumerable <IRow> Extract(IUnstructuredReader input, IUpdatableRow output)
        {
            // Make sure that all requested columns are of type string
            IColumn column = output.Schema.FirstOrDefault(col => col.Type != typeof(string));

            if (column != null)
            {
                throw new ArgumentException(string.Format("Column '{0}' must be of type 'string', not '{1}'", column.Name, column.Type.Name));
            }

            XmlDocument xmlDocument = new XmlDocument();

            xmlDocument.Load(input.BaseStream);
            foreach (XmlNode xmlNode in xmlDocument.DocumentElement.SelectNodes(this.rowPath))
            {
                // IUpdatableRow implements a builder pattern to save memory allocations,
                // so call output.Set in a loop
                foreach (IColumn col in output.Schema)
                {
                    var     explicitColumnMapping = this.columnPaths.FirstOrDefault(columnPath => columnPath.Value == col.Name);
                    XmlNode xml = xmlNode.SelectSingleNode(explicitColumnMapping.Key ?? col.Name);
                    output.Set(explicitColumnMapping.Value ?? col.Name, xml == null ? null : xml.InnerXml);
                }

                // then call output.AsReadOnly to build an immutable IRow.
                yield return(output.AsReadOnly());
            }
        }
Beispiel #22
0
        public override IEnumerable <IRow> Extract(IUnstructuredReader input, IUpdatableRow output)
        {
            var avschema = Avro.Schema.Parse(avroSchema);
            var reader   = new GenericDatumReader <GenericRecord>(avschema, avschema);

            using (var ms = new MemoryStream())
            {
                CreateSeekableStream(input, ms);
                ms.Position = 0;

                var fileReader = DataFileReader <GenericRecord> .OpenReader(ms, avschema);

                while (fileReader.HasNext())
                {
                    var avroRecord = fileReader.Next();

                    foreach (var column in output.Schema)
                    {
                        if (avroRecord[column.Name] != null)
                        {
                            output.Set(column.Name, avroRecord[column.Name]);
                        }
                        else
                        {
                            output.Set <object>(column.Name, null);
                        }

                        yield return(output.AsReadOnly());
                    }
                }
            }
        }
Beispiel #23
0
        /// <summary/>
        public override IEnumerable <IRow> Extract(IUnstructuredReader input, IUpdatableRow output)
        {
            // Json.Net
            using (var reader = new JsonTextReader(new StreamReader(input.BaseStream)))
            {
                // Parse Json one token at a time
                while (reader.Read())
                {
                    if (reader.TokenType == JsonToken.StartObject)
                    {
                        var token = JToken.Load(reader);

                        // Rows
                        //  All objects are represented as rows
                        foreach (JObject o in SelectChildren(token, this.rowpath))
                        {
                            // All fields are represented as columns
                            this.JObjectToRow(o, output);

                            yield return(output.AsReadOnly());
                        }
                    }
                }
            }
        }
        /// <summary/>
        public override IEnumerable <IRow> Combine(IRowset left, IRowset right, IUpdatableRow output)
        {
            var buffer = new List <Tuple <int, string> >();

            foreach (var row2 in right.Rows)
            {
                buffer.Add(Tuple.Create <int, string>(
                               row2.Get <int>("employee_id"),
                               row2.Get <string>("employee_name")
                               ));
            }

            foreach (var row in left.Rows)
            {
                foreach (var tuple in buffer)
                {
                    if (row.Get <int>("employee_id") == tuple.Item1)
                    {
                        output.Set("employee_id", tuple.Item1);
                        output.Set("employee_name", tuple.Item2);
                        output.Set("department_name", row.Get <string>("department_name"));
                        yield return(output.AsReadOnly());
                    }
                }
            }
        }
Beispiel #25
0
        public override IRow Process(IRow inRow, IUpdatableRow outRow)
        {
            var row            = (ScopeEngineManaged.SqlIpRow)inRow;
            var output         = (ScopeEngineManaged.SqlIpUpdatableRow)outRow;
            int exceptionIndex = 0;

            try
            {
                System.Int32  col_INVENTORY_ID      = row.GetInternal <System.Int32>(0);
                System.Int32  col_INVENTORY_VERSION = row.GetInternal <System.Int32>(1);
                System.String col_SET_NUMBER        = row.GetInternal <System.String>(2);
                output.SetInternal(0, col_SET_NUMBER.IndexOf("-") >= 0 ? col_SET_NUMBER.Substring(0, col_SET_NUMBER.IndexOf("-")) : col_SET_NUMBER);
                exceptionIndex++;
                output.SetInternal(1, col_SET_NUMBER.IndexOf("-") >= 0 ? col_SET_NUMBER.Substring(col_SET_NUMBER.IndexOf("-") + 1) : null);
                exceptionIndex++;
                output.SetInternal(2, col_INVENTORY_ID);
                exceptionIndex++;
                output.SetInternal(3, col_INVENTORY_VERSION);
                exceptionIndex++;
            }
            catch (Exception exception)
            {
                ScopeEngineManaged.UserExceptionHelper.WrapUserExpressionException(exceptionsInfo[exceptionIndex], ScopeEngineManaged.SqlHelper.Dump(row), exception);
            }
            return(output.AsReadOnly());
        }
        public override IEnumerable <IRow> Reduce(IRowset input, IUpdatableRow output)
        {
            // Cache the rows in the input rowset (should be records for a single vehicle registration)
            // Only save rows where the vehicle is not marked as having been recovered
            var stolenVehicleRecords = (from row in input.Rows
                                        select new StolenVehicleRecord(
                                            row.Get <string>("VehicleRegistration"),
                                            row.Get <string>("DateStolen"),
                                            row.Get <string>("DateRecovered")
                                            )).ToList();

            // If there aren't any items in the stolenVehicleRecords list, then this vehicle is not stolen so skip over it
            if (stolenVehicleRecords.Count > 0)
            {
                // Sort the data in the stolenVehicleRecords list by DateStolen in descending order, so that the most recent record occurs first
                stolenVehicleRecords.Sort();

                // Retrieve the first record in the stolenVehicleRecords list - this is the most recent record of the vehicle having been stolen
                var stolenVehicleRecord = stolenVehicleRecords.First();

                // If the record does not have a recovery date, then output it, otherwise the vehicle is considered to have been recovered and is no longer stolen
                if (stolenVehicleRecord.DateRecovered == null)
                {
                    output.Set <string>("VehicleRegistration", stolenVehicleRecord.VehicleRegistration);
                    output.Set <DateTime>("DateStolen", stolenVehicleRecord.DateStolen);
                    yield return(output.AsReadOnly());
                }
            }
        }
        private void mapToColumns(JObject obj, IUpdatableRow output)
        {
            var json  = JsonConvert.SerializeObject(obj);
            var genre = JsonFunctions.JsonTuple(json, "$.data.author");
            //Console.WriteLine("Genre: "+genre.Count());

            /*
             * genre.Values.ToList().ForEach(g => {
             *  //Console.WriteLine(g);
             *  output.Set("contexts.data.genre", g);
             *  }
             *  );
             */
            var keys = genre.Keys;

            foreach (var key in keys)
            {
                Console.WriteLine($"{key}: {genre[key]}");
            }

            //Console.WriteLine($"data: {genre["data.breadcrumb"]}");

            //setting it to null for second object
            //work on it
            //create a condition and add second object if not null

            output.Set("contexts.data.genre", genre["data.genre"]);
        }
        /// <summary> 
        ///  
        /// </summary> 
        /// <param name="input"></param> 
        /// <param name="output"></param> 
        /// <returns></returns> 
        public override IEnumerable<IRow> Reduce(IRowset input, IUpdatableRow output)
        {
            int count = 0;
            int[] colValues = new int[colNames.Length];

            foreach (IRow row in input.Rows)
            {
                if (count == 0)
                {
                    colValues[(int)ColNames.id] = int.Parse(row.Get<string>("id").ToString());
                    colValues[(int)ColNames.loc] = location.GetValue(row.Get<string>("loc").ToString());
                    colValues[(int)ColNames.fs] = int.Parse(row.Get<string>("fs").ToString());
                    colValues[(int)ColNames.tr] = int.Parse(row.Get<string>("tr").ToString());
                    colValues[(int)ColNames.st] = sevType.GetValue(row.Get<string>("st").ToString());
                }

                colValues[eventType.GetValue(row.Get<string>("et").ToString())] = 1;
                int vol = int.Parse(row.Get<string>("vol").ToString());
                colValues[logFeature.GetValue(row.Get<string>("lf").ToString())] = vol;
                colValues[resType.GetValue(row.Get<string>("rt").ToString())] = 1;

                count++;
            }

            // Write output
            for (int n = (int)ColNames.lf_1; n < colValues.Length; n++)
            {
                string colName = colNames[n];
                output.Set(colName, colValues[n].ToString());
            }
            yield return output.AsReadOnly();
        }
Beispiel #29
0
        public override IEnumerable <IRow> Extract(IUnstructuredReader input, IUpdatableRow output)
        {
            string line;

            //Read the input line by line
            foreach (Stream current in input.Split(_encoding.GetBytes("\r\n")))
            {
                using (StreamReader streamReader = new StreamReader(current, this._encoding))
                {
                    line = streamReader.ReadToEnd().Trim();
                    //Split the input by the column delimiter
                    string[] parts = line.Split(this._col_delim);
                    int      count = 0;
                    foreach (string part in parts)
                    {
                        //If its the second column, treat it in a special way, split the column into first name and last name columns
                        if (count == 1)
                        {
                            string[] name = part.Trim().Split(' ');
                            output.Set <string>(count, name[0]);
                            count += 1;
                            output.Set <string>(count, name[1]);
                        }
                        else
                        {
                            output.Set <string>(count, part);
                        }
                        count += 1;
                    }
                }
                yield return(output.AsReadOnly());
            }
            yield break;
        }
Beispiel #30
0
        public void TestMyProcessor()
        {
            // Define the schema for processor input rowset
            // Schema: "a:int, b:int"
            //
            USqlColumn <int> col1    = new USqlColumn <int>("col1");
            USqlColumn <int> col2    = new USqlColumn <int>("col2");
            List <IColumn>   columns = new List <IColumn> {
                col1, col2
            };
            USqlSchema schema = new USqlSchema(columns);

            // Generate one row with specified column values as input rowset
            //
            object[] values = new object[2] {
                0, 0
            };
            IRow          input  = new USqlRow(schema, values);
            IUpdatableRow output = input.AsUpdatable();

            // Create processor instance for testing and run the processor with fake input
            //
            MyProcessor processor = new MyProcessor();
            IRow        newOutput = processor.Process(input, output);

            //Verify results for processor output
            //
            Assert.IsTrue(newOutput.Schema.Count == 2);
            Assert.IsTrue(newOutput.Get <int>(0) == 1);
            Assert.IsTrue(newOutput.Get <int>(1) == 5);
        }
Beispiel #31
0
        /// <summary>Apply is called at least once per instance</summary>
        /// <param name="input">A SQLIP row</param>
        /// <param name="output">A SQLIP updatable row.</param>
        /// <returns>IEnumerable of IRow, one IRow per SQLIP row.</returns>
        /// <remarks>Because applier constructor arguments cannot depend on
        /// column references, the name of the column to parse is given as a string. Then
        /// the actual column value is obtained by calling IRow.Get. The rest of the code
        /// is the same as XmlDomExtractor.</remarks>
        public override IEnumerable<IRow> Apply(IRow input, IUpdatableRow output)
        {
            // Make sure that all requested columns are of type string
            IColumn column = output.Schema.FirstOrDefault(col => col.Type != typeof(string));
            if (column != null)
            {
                throw new ArgumentException(string.Format("Column '{0}' must be of type 'string', not '{1}'", column.Name, column.Type.Name));
            }
            
            XmlDocument xmlDocument = new XmlDocument();
            xmlDocument.LoadXml(input.Get<string>(this.xmlColumnName));
            foreach (XmlNode xmlNode in xmlDocument.DocumentElement.SelectNodes(this.rowPath))
            {
                // IUpdatableRow implements a builder pattern to save memory allocations, 
                // so call output.Set in a loop
                foreach(IColumn col in output.Schema)
                {
                    var explicitColumnMapping = this.columnPaths.FirstOrDefault(columnPath => columnPath.Value == col.Name);
                    XmlNode xml = xmlNode.SelectSingleNode(explicitColumnMapping.Key ?? col.Name);
                    output.Set(explicitColumnMapping.Value ?? col.Name, xml == null ? null : xml.InnerXml);
                }

                // then call output.AsReadOnly to build an immutable IRow.
                yield return output.AsReadOnly();
            }
        }
Beispiel #32
0
        private static IEnumerable <IRow> ExtractInternal(IUpdatableRow output, Stream input)
        {
            if (!input.CanSeek)
            {
                throw new ArgumentOutOfRangeException(nameof(input), "Input stream must be seekable for ORC reader. Enable the hack to copy to a Memory Stream or to a non-Persisted Memory Mapped file. The hack is the default setting.");
            }

            using (var fileTail = new FileTail(input))
            {
                var stripes = fileTail.GetStripeCollection();

                var columnsToRead = GetIntersectedColumnMetadata(output.Schema, fileTail).ToArray();

                foreach (var stripe in stripes)
                {
                    var extractedColumns = ReadStripe(stripe, columnsToRead).ToArray();

                    for (int i = 0; i < (int)stripe.NumRows; i++)
                    {
                        foreach (var col in extractedColumns)
                        {
                            var outputColumn = col.Item1.USqlProjectionColumnIndex;
                            var value        = col.Item2?.GetValue(i) ?? col.Item1.USqlProjectionColumn.DefaultValue;
                            output.Set(outputColumn, value);
                        }
                        yield return(output.AsReadOnly());
                    }
                }
            }
        }
Beispiel #33
0
 // void OutputValueAtCol_I(string c, int i, IUpdatableRow outputrow)
 //
 // Helper function that takes the string value c and puts it into the column at position i in the output row.
 // The value will be cast to the expected type of the column.
 private void OutputValueAtCol_I(string c, int i, IUpdatableRow outputrow)
 {
     ISchema schema = outputrow.Schema;
     if (schema[i].Type == typeof(SqlMap<string, string>))
     {
         c = DriverFunctions.RemoveOptionalQuotes(c);
         SqlMap<string, string> scopeMap = String.IsNullOrEmpty(c) ? null : DriverFunctions.ReadStringMap(c, this._map_item_delim, this._map_kv_delim);
         outputrow.Set<SqlMap<string, string>>(i, scopeMap);
     }
     else if (schema[i].Type == typeof(SqlArray<int>))
     {
         c = DriverFunctions.RemoveOptionalQuotes(c);
         SqlArray<int> scopeArray = String.IsNullOrEmpty(c) ? null : DriverFunctions.ReadIntArray(c, this._array_item_delim);
         outputrow.Set<SqlArray<int>>(i, scopeArray);
     }
     else if (schema[i].Type == typeof(int))
     {
         int num = Convert.ToInt32(c);
         outputrow.Set<int>(i, num);
     }
     else if (schema[i].Type == typeof(int?))
     {
         int? num2 = (c == "") ? null : new int?(Convert.ToInt32(c));
         outputrow.Set<int?>(i, num2);
     }
     else if (schema[i].Type == typeof(long))
     {
         long num3 = Convert.ToInt64(c);
         outputrow.Set<long>(i, num3);
     }
     else if (schema[i].Type == typeof(long?))
     {
         long? num4 = (c == "") ? null : new long?(Convert.ToInt64(c));
         outputrow.Set<long?>(i, num4);
     }
     else if (schema[i].Type == typeof(DateTime))
     {
         DateTime dateTime = Convert.ToDateTime(c);
         outputrow.Set<DateTime>(i, dateTime);
     }
     else if (schema[i].Type == typeof(DateTime?))
     {
         DateTime? dateTime2 = (c == "") ? null : new DateTime?(Convert.ToDateTime(c));
         outputrow.Set<DateTime?>(i, dateTime2);
     }
     else if (schema[i].Type == typeof(string))
     {
         string text = DriverFunctions.RemoveOptionalQuotes(c);
         outputrow.Set<string>(i, text);
     }
     else
     {
         outputrow.Set<string>(i, c);
     }
 }
 public override IEnumerable<IRow> Extract(IUnstructuredReader input, IUpdatableRow output)
 {
     using(var reader = new StreamReader(input.BaseStream))
     {
         string line;
         while ((line = reader.ReadLine()) != null)
         {
             LineToRow(line, output);
             yield return output.AsReadOnly();
         }
     }
 }
Beispiel #35
0
        public override IEnumerable<IRow> Extract(IUnstructuredReader input, IUpdatableRow output)
        {
            using (var reader = new JsonTextReader(new StreamReader(input.BaseStream, Encoding.UTF8)))
            {
                reader.SupportMultipleContent = true;
                
                while (reader.Read())
                {
                    var row = JToken.ReadFrom(reader);

                    var size = 0;
                    var flattendData = GHInsights.USql.Utility.FlattenJson(row, ref size);

                    if (size < (_dataLakeMaxRowSize))
                    {
                        output.Set(_outputColumnName, new SqlMap<string, byte[]>(flattendData));
                    }
                    else
                    {
                        var compressedData = GHInsights.USql.Utility.GzipByteArray(Encoding.UTF8.GetBytes(row.ToString(Formatting.None)));

                        if (compressedData.Length < (_dataLakeMaxRowSize))
                        {
                            var compressedRow = new Dictionary<string, byte[]>
                                {
                                    {
                                        "!CompressedRow",
                                        compressedData
                                    }
                                };
                            output.Set(_outputColumnName, new SqlMap<string, byte[]>(compressedRow));
                        }
                        else {
                            //throw new ArgumentOutOfRangeException($"Resulting SqlMap is too large: {size} - {row.ToString(Formatting.None).Substring(0,100)}");
                            var error = new Dictionary<string, byte[]>
                                {
                                    {
                                        "!RowExtractorError",
                                        Encoding.UTF8.GetBytes($"Resulting SqlMap is too large: OriginalSize:{size} CompressedSize: {compressedData.Length} - {row.ToString(Formatting.None).Substring(0, 100)}")
                                    }
                                };
                            output.Set(_outputColumnName, new SqlMap<string, byte[]>(error));
                        }

                    }
                        

                    yield return output.AsReadOnly();

                }
            }
        }
Beispiel #36
0
        public override IRow Process(IRow input, IUpdatableRow output)
        {
            var img = input.Get<byte[]>("image_data");

                // load image only once into memory per row
                using (StreamImage inImage = new StreamImage(img))
                {
                    output.SetColumnIfExists("equipment_make", inImage.getStreamImageProperty(ImageProperties.equipment_make));
                    output.SetColumnIfExists("equipment_model", inImage.getStreamImageProperty(ImageProperties.equipment_model));
                    output.SetColumnIfExists("description", inImage.getStreamImageProperty(ImageProperties.description));
                    output.SetColumnIfExists("copyright", inImage.getStreamImageProperty(ImageProperties.copyright));
                    output.SetColumnIfExists("thumbnail", inImage.scaleStreamImageTo(150, 150));
                }
                return output.AsReadOnly();
        }
Beispiel #37
0
        public override IEnumerable<IRow> Reduce(IRowset input, IUpdatableRow output)
        {
            // Init aggregation values
            bool first_row_processed = false;
            var begin = DateTime.MaxValue; // Dummy value to make compiler happy
            var end = DateTime.MinValue; // Dummy value to make compiler happy

            // requires that the reducer is PRESORTED on begin and READONLY on the reduce key.
            foreach (var row in input.Rows)
            {
                // Initialize the first interval with the first row if i is 0
                if (!first_row_processed)
                {
                    first_row_processed = true; // mark that we handled the first row
                    begin = row.Get<DateTime>("begin");
                    end = row.Get<DateTime>("end");
                    // If the end is just a time and not a date, it can be earlier than the begin, indicating it is on the next day.
                    // This let's fix up the end to the next day in that case
                    if (end < begin) { end = end.AddDays(1); }
                }
                else
                {
                    var b = row.Get<DateTime>("begin");
                    var e = row.Get<DateTime>("end");
                    // fix up the date if end is earlier than begin
                    if (e < b) { e = e.AddDays(1); }

                    // if the begin is still inside the interval, increase the interval if it is longer
                    if (b <= end)
                    {
                        // if the new end time is later than the current, extend the interval
                        if (e > end) { end = e; }
                    }
                    else // output the previous interval and start a new one
                    {
                        output.Set<DateTime>("begin", begin);
                        output.Set<DateTime>("end", end);
                        yield return output.AsReadOnly();
                        begin = b; end = e;
                    } // if
                } // if
            } // foreach

            // now output the last interval
            output.Set<DateTime>("begin", begin);
            output.Set<DateTime>("end", end);
            yield return output.AsReadOnly();
        }
        public override IEnumerable<IRow> Extract(IUnstructuredReader input, IUpdatableRow output)
        {
            using (XmlReader reader = XmlReader.Create(input.BaseStream))
            {
                while (reader.Read())
                {
                    if (reader.NodeType == XmlNodeType.Element && reader.LocalName == "row")
                    {
                        foreach (IColumn column in output.Schema)
                        {
                            string rawValue = reader.GetAttribute(column.Name);

                            if (rawValue == null)
                            {
                                output.Set(column.Name, column.DefaultValue);
                            }
                            else
                            {
                                if (column.Type == typeof(string))
                                {
                                    string simplifiedValue = Simplify(rawValue);

                                    int byteCount = Encoding.UTF8.GetByteCount(simplifiedValue);

                                    if (byteCount > Constants.Limits.StringSizeInBytes) // 128kB
                                    {
                                        simplifiedValue = ShortenWithinBoundries(simplifiedValue);
                                    }

                                    output.Set(column.Name, simplifiedValue);
                                }
                                else
                                {
                                    var typeConverter = TypeDescriptor.GetConverter(column.Type);
                                    var castedValue = typeConverter.ConvertFromString(rawValue);

                                    output.Set(column.Name, castedValue);
                                }
                            }
                        }

                        yield return output.AsReadOnly();
                    }
                }
            }
        }
Beispiel #39
0
 // IEnumerable<IRow> Extract(IUnstructuredReader input, IUpdatableRow outputrow)
 //
 // Actual implementation of DriverExtractor that overwrites the Extract method of IExtractor.
 public override IEnumerable<IRow> Extract(IUnstructuredReader input, IUpdatableRow outputrow)
 {
     foreach (Stream current in input.Split(this._row_delim))
     {
         using (StreamReader streamReader = new StreamReader(current, this._encoding))
         {
             int num = 0;
             string[] array = streamReader.ReadToEnd().Split(new string[]{this._col_delim}, StringSplitOptions.None);
             for (int i = 0; i < array.Length; i++)
             {
                 string c = array[i];
                 this.OutputValueAtCol_I(c, num++, outputrow);
             }
         }
         yield return outputrow.AsReadOnly();
     }
     yield break;
 }
Beispiel #40
0
        public override IEnumerable<IRow> Extract(IUnstructuredReader input, IUpdatableRow output)
        {
            var serializer = AvroSerializer.CreateGeneric(avroSchema);
            using (var genericReader = AvroContainer.CreateGenericReader(input.BaseStream))
            {
                using (var reader = new SequentialReader<dynamic>(genericReader))
                {
                    foreach (var obj in reader.Objects)
                    {
                        foreach (var column in output.Schema)
                        {
                            output.Set(column.Name, obj[column.Name]);
                        }

                        yield return output.AsReadOnly();
                    }
                }
            }
        }
Beispiel #41
0
        /// <summary/>
        public override IEnumerable<IRow>       Extract(IUnstructuredReader input, IUpdatableRow output)
        {
            // Json.Net
            using(var reader = new JsonTextReader(new StreamReader(input.BaseStream)))
            {
                // Parse Json
                //  TODO: Json.Net fails with empty input files
                var root = JToken.ReadFrom(reader);

                // Rows
                //  All objects are represented as rows
                foreach(JObject o in SelectChildren(root, this.rowpath))
                {
                    // All fields are represented as columns
                    this.JObjectToRow(o, output);

                    yield return output.AsReadOnly();
                }
            }
        }
Beispiel #42
0
    public override IEnumerable<IRow> Extract(IUnstructuredReader input, IUpdatableRow output)
    {
        List<IRow> rows = new List<IRow>();

        XmlDocument xmlDocument = new XmlDocument();
        xmlDocument.Load(input.BaseStream);
        foreach (XmlNode xmlNode in xmlDocument.DocumentElement.SelectNodes(this.m_XPath))
        {
            foreach (IColumn col in output.Schema)
            {
                XmlNode xml = xmlNode.SelectSingleNode(col.Name);
                if (xml != null)
                {
                    object val = Convert.ChangeType(xml.InnerXml, col.Type);
                    output.Set(col.Name, val);
                }
            }

            yield return output.AsReadOnly();

        }
    }
        protected virtual void LineToRow(string line, IUpdatableRow row)
        {
            int index = 0;
            foreach(var map in _fieldMap)
            {
                if (line.Length < map.Key + map.Value)
                {
                    index++;
                    continue;
                }

                if (index < row.Schema.Count && row.Schema[index].Type != typeof(string))
                {
                    var typeConverter = TypeDescriptor.GetConverter(row.Schema[index].Type);
                    if (typeConverter != null && typeConverter.CanConvertFrom(typeof(string)))
                    {
                        row.Set(index, typeConverter.ConvertFromString(line.Substring(map.Key, map.Value)));
                    }
                }
                else
                    row.Set(index, line.Substring(map.Key, map.Value));
                index++;
            }
        }
Beispiel #44
0
        // IRow Process(IRow input, IUpdatableRow output)
        //
        // Actual implementatoin of the user-defined processor. Overwrites the Process method of IProcessor.
        public override IRow Process(IRow input, IUpdatableRow output)
        {
            List<string> list = new List<string>();
            foreach (var current in input.Schema)
            {
                if (current.Type.IsGenericType && current.Type.GetGenericTypeDefinition() == typeof(SqlMap) && current.Type.GetGenericArguments()[0] == typeof(string))
                {
                    list.Add(current.Name);
                }
            }

            Dictionary<string, ArrayList> maps_to_be_changed = new Dictionary<string, ArrayList>();
            foreach (var current2 in output.Schema)
            {
                bool flag = list.Contains(current2.Name);
                if (-1 < input.Schema.IndexOf(current2.Name) && !flag)
                {
                    output.Set<object>(current2.Name, input.Get<object>(current2.Name));
                }
                else if (!flag)
                {
                    foreach (string current3 in list)
                    {
                        SqlMap<string, string> sqlMap = input.Get<SqlMap<string, string>>(current3);
                        SqlArray<string> sqlArray = null;
                        List<string> list2 = null;
                        if (sqlMap != null)
                        {
                            sqlArray = sqlMap.Keys;
                            if (sqlMap.Values != null)
                            {
                                list2 = sqlMap.Values.ToList<string>();
                            }
                        }
                        int num = (sqlArray == null) ? -1 : sqlArray.ToList<string>().IndexOf(current2.Name);
                        if (num != -1)
                        {
                            output.Set<string>(current2.Name, list2[num]);
                            if (maps_to_be_changed.Keys.Contains(current3))
                            {
                                maps_to_be_changed[current3].Add(current2.Name);
                            }
                            else
                            {
                                maps_to_be_changed.Add(current3, new ArrayList
                                {
                                    current2.Name
                                });
                            }
                            break;
                        }
                        output.Set<object>(current2.Name, current2.Type.IsValueType ? Activator.CreateInstance(current2.Type) : null);
                    }
                }
            }

            using (IEnumerator<IColumn> enumerator = output.Schema.GetEnumerator())
            {
                while (enumerator.MoveNext())
                {
                    IColumn out_col = enumerator.Current;
                    bool flag = list.Contains(out_col.Name);
                    if (flag)
                    {
                        SqlMap<string, string> sqlMap = input.Get<SqlMap<string, string>>(out_col.Name);
                        if (maps_to_be_changed != null && maps_to_be_changed.Keys.Contains(out_col.Name))
                        {
                            sqlMap = new SqlMap<string, string>(
                                from kvp in sqlMap
                                where !maps_to_be_changed[out_col.Name].Contains(kvp.Key)
                                select kvp);
                        }
                        output.Set<SqlMap<string, string>>(out_col.Name, sqlMap);
                    }
                }
            }
            return output.AsReadOnly();
        }
Beispiel #45
0
        /// <summary>Extract is called at least once per instance</summary>
        /// <param name="input">Wrapper for a Stream</param>
        /// <param name="output">IUpdatableRow uses a mutable builder pattern -- 
        /// set individual fields with IUpdatableRow.Set, then build an immutable IRow by
        /// calling IUpdatableRow.AsReadOnly.</param>
        /// <returns>IEnumerable of IRow, one IRow per SQLIP row.</returns>
		public override IEnumerable<IRow> Extract(IUnstructuredReader input, IUpdatableRow output)
		{
            // Make sure that all requested columns are of type string
            IColumn column = output.Schema.FirstOrDefault(col => col.Type != typeof(string));
            if (column != null)
            {
                throw new ArgumentException(string.Format("Column '{0}' must be of type 'string', not '{1}'", column.Name, column.Type.Name));
            }

			var state = new ParseState();
			state.ClearAndJump(ParseLocation.Row);
			using (var reader = XmlReader.Create(input.BaseStream))
			{
				while (reader.Read())
				{
					switch (state.Location)
					{
                        case ParseLocation.Row:
                            // when looking for a new row, we are only interested in elements
                            // whose name matches the requested row element
                            if (reader.NodeType == XmlNodeType.Element && reader.Name == this.rowPath)
                            {
                                // when found, clear the IUpdatableRow's memory
                                // (this is no provided Clear method)
                                for (int i = 0; i < output.Schema.Count; i++)
                                {
                                    output.Set<string>(i, null);
                                }

                                state.ClearAndJump(ParseLocation.Column);
                            }

                            break;
                        case ParseLocation.Column:
                            // When looking for a new column, we are interested in elements
                            // whose name is a key in the columnPaths map or
                            // whose name is in the requested output schema.
                            // This indicates a column whose value needs to be read, 
                            // so prepare for reading it by clearing elementValue.
                            if (reader.NodeType == XmlNodeType.Element
                                && (this.columnPaths.ContainsKey(reader.Name)
                                    || output.Schema.Select(c => c.Name).Contains(reader.Name)))
                            {
                                if (reader.IsEmptyElement)
                                {
                                    // For an empty element, set an empty string 
                                    // and immediately jump to looking for the next column
                                    output.Set(this.columnPaths[reader.Name] ?? reader.Name, state.ReadElementValue());
                                    state.ClearAndJump(ParseLocation.Column);
                                }
                                else
                                {
                                    state.Location = ParseLocation.Data;
                                    state.ElementName = reader.Name;
                                    state.ClearElementValue();
                                }
                            }
                            else if (reader.NodeType == XmlNodeType.EndElement && reader.Name == this.rowPath)
                            {
                                // The other interesting case is an end element whose name matches 
                                // the current row element. This indicates the end of a row, 
                                // so yield the now-complete row and jump to looking for 
                                // another row.
                                yield return output.AsReadOnly();
                                state.ClearAndJump(ParseLocation.Row);
                            }

                            break;
                        case ParseLocation.Data:
                            // Most of the code for reading the value of a column
                            // deals with re-creating the inner XML from discrete elements.
                            // The only jump occurs when the reader hits an end element
                            // whose name matches the current column. In this case, we
                            // need to write the accumulated value to the appropriate 
                            // column in the output row.
                            switch (reader.NodeType)
                            {
                                case XmlNodeType.EndElement:
                                    if (reader.Name == state.ElementName)
                                    {
                                        output.Set(this.columnPaths[state.ElementName] ?? state.ElementName, state.ReadElementValue());
                                        state.ClearAndJump(ParseLocation.Column);
                                    }
                                    else
                                    {
                                        state.ElementWriter.WriteEndElement();
                                    }

                                    break;
                                case XmlNodeType.Element:
                                    state.ElementWriter.WriteStartElement(reader.Name);
                                    state.ElementWriter.WriteAttributes(reader, false);
                                    if (reader.IsEmptyElement)
                                    {
                                        state.ElementWriter.WriteEndElement();
                                    }

                                    break;
                                case XmlNodeType.CDATA:
                                    state.ElementWriter.WriteCData(reader.Value);
                                    break;
                                case XmlNodeType.Comment:
                                    state.ElementWriter.WriteComment(reader.Value);
                                    break;
                                case XmlNodeType.ProcessingInstruction:
                                    state.ElementWriter.WriteProcessingInstruction(reader.Name, reader.Value);
                                    break;
                                default:
                                    state.ElementWriter.WriteString(reader.Value);
                                    break;
                            }

                            break;
                        default:
                            throw new NotImplementedException("StreamFromXml has not implemented a new member of the ParseLocation enum");
                    }
				}

                if (state.Location != ParseLocation.Row)
				{
					throw new ArgumentException("XML document ended without proper closing tags");
				}
			}
		}
Beispiel #46
0
        /// <summary/>
        protected virtual void                  JObjectToRow(JObject o, IUpdatableRow row)
        {
            foreach(var c in row.Schema)
            {
                JToken token = null;
                object value = c.DefaultValue;
                
                // All fields are represented as columns
                //  Note: Each JSON row/payload can contain more or less columns than those specified in the row schema
                //  We simply update the row for any column that matches (and in any order).
                if(o.TryGetValue(c.Name, out token) && token != null)
                {
                    // Note: We simply delegate to Json.Net for all data conversions
                    //  For data conversions beyond what Json.Net supports, do an explicit projection:
                    //      ie: SELECT DateTime.Parse(datetime) AS datetime, ...
                    //  Note: Json.Net incorrectly returns null even for some non-nullable types (sbyte)
                    //      We have to correct this by using the default(T) so it can fit into a row value
                    value = JsonFunctions.ConvertToken(token, c.Type) ?? c.DefaultValue;
                }

                // Update
                row.Set<object>(c.Name, value);
            }
        }
Beispiel #47
0
 public override IEnumerable<IRow> Extract(IUnstructuredReader input, IUpdatableRow output)
 {
     byte[] imageArray = ImageOps.GetByteArrayforImage(input.BaseStream);
     output.Set<byte[]>(0, imageArray);
     yield return output.AsReadOnly();
 }
Beispiel #48
0
 // IRow Process(IRow input, IUpdatableRow output)
 //
 // Actual implementatoin of the user-defined processor. Overwrites the Process method of IProcessor.
 public override IRow Process(IRow input, IUpdatableRow output)
 {
     string text = input.Get<string>("country");
     if (EnglishCountryNames.CountryTranslation.Keys.Contains(text))
     {
         text = EnglishCountryNames.CountryTranslation[text];
     }
     output.Set<string>("country", text);
     return output.AsReadOnly();
 }
        /// <summary/>
        public override IEnumerable<IRow> Extract(IUnstructuredReader input, IUpdatableRow output)
        {
            if (input.Length == 0)
                yield break;

            using (var reader = new JsonTextReader(new StreamReader(input.BaseStream)))
            {
                IColumn currentColumn = null;
                StringBuilder valueBuilder = null;
                JsonTextWriter writer = null;
                var startedGlobalObjects = 0;
                var startedLocalObjects = 0;
                var startedGlobalArrays = 0;
                var startedLocalArrays = 0;

                while (reader.Read())
                {
                    switch (reader.TokenType)
                    {
                        case JsonToken.StartArray:
                            startedGlobalArrays++;
                            if (currentColumn != null && currentColumn.Type == typeof(string))
                            {
                                if (writer == null)
                                {
                                    valueBuilder = new StringBuilder();
                                    writer = new JsonTextWriter(new StringWriter(valueBuilder));
                                }
                                startedLocalArrays++;
                                writer.WriteStartArray();
                            }
                            break;
                        case JsonToken.EndArray:
                            startedGlobalArrays--;
                            if (writer != null)
                            {
                                startedLocalArrays--;
                                writer.WriteEndArray();
                            }
                            if (currentColumn != null && valueBuilder != null
                                && startedLocalArrays == 0 && startedLocalObjects == 0)
                            {
                                output.Set(currentColumn.Name, valueBuilder.ToString());
                                writer = null;
                                valueBuilder = null;
                                currentColumn = null;
                            }
                            if (startedGlobalArrays == 0)
                            {
                                yield break;
                            }
                            break;

                        case JsonToken.StartObject:
                            startedGlobalObjects++;
                            if (currentColumn != null && currentColumn.Type == typeof(string))
                            {
                                if (writer == null)
                                {
                                    valueBuilder = new StringBuilder();
                                    writer = new JsonTextWriter(new StringWriter(valueBuilder));
                                }
                                startedLocalObjects++;
                                writer.WriteStartObject();
                            }
                            break;
                        case JsonToken.EndObject:
                            startedGlobalObjects--;
                            if (writer != null)
                            {
                                startedLocalObjects--;
                                writer.WriteEndObject();
                            }
                            if (currentColumn != null && valueBuilder != null
                                && startedLocalArrays == 0 && startedLocalObjects == 0)
                            {
                                output.Set(currentColumn.Name, valueBuilder.ToString());
                                writer = null;
                                valueBuilder = null;
                                currentColumn = null;
                            }
                            if (startedGlobalObjects == 0)
                                yield return output.AsReadOnly();
                            break;

                        case JsonToken.PropertyName:
                            if (writer != null)
                            {
                                writer.WritePropertyName(reader.Value.ToString());
                            }
                            else
                            {
                                var currentPropertyName = reader.Value.ToString();
                                currentColumn = output.Schema
                                    .FirstOrDefault(s => s.Name == currentPropertyName);
                                if (currentColumn == null)
                                    reader.Skip();
                            }
                            break;

                        case JsonToken.String:
                        case JsonToken.Boolean:
                        case JsonToken.Bytes:
                        case JsonToken.Date:
                        case JsonToken.Integer:
                        case JsonToken.Float:
                            if (writer != null)
                            {
                                writer.WriteValue(reader.Value);
                            }
                            else if (currentColumn != null)
                            {
                                var typeConverter = TypeDescriptor.GetConverter(currentColumn.Type);
                                if (typeConverter != null && typeConverter.CanConvertFrom(reader.ValueType))
                                {
                                    output.Set(currentColumn.Name, typeConverter.ConvertFrom(reader.Value));
                                }
                                else
                                    output.Set(currentColumn.Name, reader.Value);
                                currentColumn = null;
                            }
                            break;
                        case JsonToken.Null:
                            if (writer != null)
                            {
                                writer.WriteNull();
                            }
                            else if (currentColumn != null)
                            {
                                output.Set(currentColumn.Name, currentColumn.DefaultValue);
                                currentColumn = null;
                            }
                            break;

                        case JsonToken.StartConstructor:
                            writer?.WriteStartConstructor(reader.Value.ToString());
                            break;
                        case JsonToken.EndConstructor:
                            writer?.WriteEndConstructor();
                            break;
                        case JsonToken.Comment:
                            writer?.WriteComment(reader.Value.ToString());
                            break;
                        case JsonToken.Raw:
                            writer?.WriteRaw(reader.Value.ToString());
                            break;
                        case JsonToken.None:
                        case JsonToken.Undefined:
                            // ignore
                            break;
                        default:
                            throw new NotImplementedException();
                    }
                } while (reader.TokenType != JsonToken.None);
            }
        }