/// <summary/>
        public override IEnumerable <IRow> Combine(IRowset left, IRowset right, IUpdatableRow output)
        {
            var buffer = new List <Tuple <int, string> >();

            foreach (var row2 in right.Rows)
            {
                buffer.Add(Tuple.Create <int, string>(
                               row2.Get <int>("employee_id"),
                               row2.Get <string>("employee_name")
                               ));
            }

            foreach (var row in left.Rows)
            {
                foreach (var tuple in buffer)
                {
                    if (row.Get <int>("employee_id") == tuple.Item1)
                    {
                        output.Set("employee_id", tuple.Item1);
                        output.Set("employee_name", tuple.Item2);
                        output.Set("department_name", row.Get <string>("department_name"));
                        yield return(output.AsReadOnly());
                    }
                }
            }
        }
Beispiel #2
0
        public override IEnumerable <IRow> Extract(IUnstructuredReader input, IUpdatableRow output)
        {
            var avschema = Avro.Schema.Parse(avroSchema);
            var reader   = new GenericDatumReader <GenericRecord>(avschema, avschema);

            using (var ms = new MemoryStream())
            {
                CreateSeekableStream(input, ms);
                ms.Position = 0;

                var fileReader = DataFileReader <GenericRecord> .OpenReader(ms, avschema);

                while (fileReader.HasNext())
                {
                    var avroRecord = fileReader.Next();

                    foreach (var column in output.Schema)
                    {
                        if (avroRecord[column.Name] != null)
                        {
                            output.Set(column.Name, avroRecord[column.Name]);
                        }
                        else
                        {
                            output.Set <object>(column.Name, null);
                        }

                        yield return(output.AsReadOnly());
                    }
                }
            }
        }
Beispiel #3
0
        protected virtual void LineToRow(string line, IUpdatableRow row)
        {
            int index = 0;

            foreach (var map in _fieldMap)
            {
                if (line.Length < map.Key + map.Value)
                {
                    index++;
                    continue;
                }

                if (index < row.Schema.Count && row.Schema[index].Type != typeof(string))
                {
                    var typeConverter = TypeDescriptor.GetConverter(row.Schema[index].Type);
                    if (typeConverter != null && typeConverter.CanConvertFrom(typeof(string)))
                    {
                        row.Set(index, typeConverter.ConvertFromString(line.Substring(map.Key, map.Value)));
                    }
                }
                else
                {
                    row.Set(index, line.Substring(map.Key, map.Value));
                }
                index++;
            }
        }
Beispiel #4
0
            public override IEnumerable <IRow> Apply(IRow input, IUpdatableRow output)
            {
                DateTime startTime = input.Get <DateTime>(startColumn);
                DateTime endTime   = input.Get <DateTime>(endColumn);
                var      startDate = startTime.Date;
                var      endDate   = endTime.Date;
                DateTime startTimeOfDay;
                DateTime endTimeOfDay;

                var lastSecond = new TimeSpan(23, 59, 59);

                for (var dt = startDate; dt <= endDate; dt = dt.AddDays(1))
                {
                    if (dt == startDate)
                    {
                        startTimeOfDay = startTime;
                    }
                    else
                    {
                        startTimeOfDay = dt;
                    }
                    if (dt == endDate)
                    {
                        endTimeOfDay = endTime;
                    }
                    else
                    {
                        endTimeOfDay = dt + lastSecond;
                    }
                    output.Set <DateTime>("startTimeOfDay", startTimeOfDay);
                    output.Set <DateTime>("endTimeOfDay", endTimeOfDay);
                    yield return(output.AsReadOnly());
                }
            }
        public override IEnumerable <IRow> Reduce(IRowset input, IUpdatableRow output)
        {
            // Cache the rows in the input rowset (should be records for a single vehicle registration)
            // Only save rows where the vehicle is not marked as having been recovered
            var stolenVehicleRecords = (from row in input.Rows
                                        select new StolenVehicleRecord(
                                            row.Get <string>("VehicleRegistration"),
                                            row.Get <string>("DateStolen"),
                                            row.Get <string>("DateRecovered")
                                            )).ToList();

            // If there aren't any items in the stolenVehicleRecords list, then this vehicle is not stolen so skip over it
            if (stolenVehicleRecords.Count > 0)
            {
                // Sort the data in the stolenVehicleRecords list by DateStolen in descending order, so that the most recent record occurs first
                stolenVehicleRecords.Sort();

                // Retrieve the first record in the stolenVehicleRecords list - this is the most recent record of the vehicle having been stolen
                var stolenVehicleRecord = stolenVehicleRecords.First();

                // If the record does not have a recovery date, then output it, otherwise the vehicle is considered to have been recovered and is no longer stolen
                if (stolenVehicleRecord.DateRecovered == null)
                {
                    output.Set <string>("VehicleRegistration", stolenVehicleRecord.VehicleRegistration);
                    output.Set <DateTime>("DateStolen", stolenVehicleRecord.DateStolen);
                    yield return(output.AsReadOnly());
                }
            }
        }
Beispiel #6
0
        public override IEnumerable <IRow> Extract(IUnstructuredReader input, IUpdatableRow outputrow)
        {
            long   length     = input.Length;
            long   start      = input.Start;
            Stream baseStream = input.BaseStream;

            byte[] readBuffer = new byte[this._blocksz];

            while (length > 0)
            {
                var globalPosition = start + baseStream.Position;

                // We need to make sure that we read block size or only the last remainder and not into the 4MB overscan area in the next extent block that is provided to handle row-oriented processing
                var readsize = (int)Math.Min(this._blocksz, length); // Cast from (long) to (int) is safe since Min will never give a value larger than (int) _blocksz.

                Array.Resize <byte>(ref readBuffer, readsize);       // Make sure buffer is large enough. Assumes that Resize only resizes if needed.

                var bytesRead = baseStream.Read(readBuffer, 0, readsize);
                if (bytesRead <= 0 || bytesRead > readsize)
                {
                    throw new Exception(string.Format("Unexpected amount of {2} bytes was read starting at global stream position {1}. Expected to read {0} bytes.",
                                                      readsize, globalPosition, bytesRead));
                }

                Array.Resize <byte>(ref readBuffer, bytesRead);
                length -= bytesRead;

                outputrow.Set <long>(0, globalPosition); // global position of the block
                outputrow.Set <long>(1, bytesRead);      // block size
                outputrow.Set <byte[]>(2, readBuffer);   // block data
                yield return(outputrow.AsReadOnly());
            }
        }
Beispiel #7
0
        public override IEnumerable <IRow> Extract(IUnstructuredReader input, IUpdatableRow output)
        {
            string line;

            //Read the input line by line
            foreach (Stream current in input.Split(_encoding.GetBytes("\r\n")))
            {
                using (StreamReader streamReader = new StreamReader(current, this._encoding))
                {
                    line = streamReader.ReadToEnd().Trim();
                    //Split the input by the column delimiter
                    string[] parts = line.Split(this._col_delim);
                    int      count = 0;
                    foreach (string part in parts)
                    {
                        //If its the second column, treat it in a special way, split the column into first name and last name columns
                        if (count == 1)
                        {
                            string[] name = part.Trim().Split(' ');
                            output.Set <string>(count, name[0]);
                            count += 1;
                            output.Set <string>(count, name[1]);
                        }
                        else
                        {
                            output.Set <string>(count, part);
                        }
                        count += 1;
                    }
                }
                yield return(output.AsReadOnly());
            }
            yield break;
        }
Beispiel #8
0
        public static void ExtractPropertyDoubleOpt(JsonTextReader jsonReader, IUpdatableRow output, ColumnInfo columnInfo)
        {
            jsonReader.Read();

            if (columnInfo.IsRequired)
            {
                switch (jsonReader.TokenType)
                {
                case JsonToken.Integer:
                    output.Set(columnInfo.Idx, (float)(long)jsonReader.Value);
                    break;

                case JsonToken.Float:
                    output.Set(columnInfo.Idx, (float)(double)jsonReader.Value);
                    break;

                case JsonToken.Null:
                    output.Set(columnInfo.Idx, (double?)null);
                    break;

                default:
                    throw new Exception("wrong data type");
                }
            }
        }
Beispiel #9
0
        public override IEnumerable <IRow> Extract(IUnstructuredReader input, IUpdatableRow output)
        {
            string line;

            using (StreamReader streamReader = new StreamReader(input.BaseStream, Encoding.UTF8))
            {
                while ((line = streamReader.ReadLine()) != null)
                {
                    var jObject = JsonConvert.DeserializeObject <JObject>(line);
                    foreach (var column in output.Schema)
                    {
                        if (column.Type == typeof(string))
                        {
                            output.Set(column.Name, jObject[column.Name].ToString());
                        }
                        if (column.Type == typeof(DateTime))
                        {
                            output.Set(column.Name, (DateTime.Parse(jObject[column.Name].ToString())));
                        }
                    }

                    yield return(output.AsReadOnly());
                }
            }

            yield break;
        }
        public override IEnumerable <IRow> Extract(IUnstructuredReader input, IUpdatableRow output)
        {
            string id;
            string from;
            string to;

            // 1. Collect partition informations.
            using (var reader = new StreamReader(input.BaseStream))
            {
                string line  = reader.ReadLine();
                var    parts = line.Split('\t');
                id   = parts[0];
                from = parts[1];
                to   = parts[2];
            }

            // 2. Read data source using partition information.
            using (var reader = ProviderFactory.CreateInstance(_cnxString, from, to))
            {
                foreach (var row in reader.Rows)
                {
                    output.Set("extractor_id", _id);
                    output.Set("partition_id", id);
                    output.Set("partition", row[0]);
                    output.Set("value1", row[1]);
                    output.Set("value2", row[2]);

                    yield return(output.AsReadOnly());
                }
            }

            // Add some latency to data read.
            Thread.Sleep(10000);
        }
 public override IRow Process(IRow input, IUpdatableRow output)
 {
     output.Set <int>("DepID", input.Get <int>("DepID"));
     output.Set <string>("DepName", input.Get <string>("DepName"));
     output.Set <string>("HelloWorld", hw);
     return(output.AsReadOnly());
 }
Beispiel #12
0
        public override IRow Process(IRow input, IUpdatableRow output)
        {
            double      lat = input.Get <double>(latColumn);
            double      lon = input.Get <double>(lonColumn);
            GeoLocation loc = new GeoLocation {
                Longitude = lon, Latitude = lat
            };
            var country  = _service.FindCountry(loc);
            var USstates = _service.FindUsaState(loc);

            if (country != null && country.Name != null)
            {
                output.Set <string>("country", country.Name);
            }
            else
            {
                output.Set <string>("country", "");
            }
            if (USstates != null && USstates.Name != null)
            {
                output.Set <string>("USstates", USstates.Name);
            }
            else
            {
                output.Set <string>("USstates", "");
            }
            return(output.AsReadOnly());
        }
Beispiel #13
0
        public override IEnumerable <IRow> Reduce(IRowset input, IUpdatableRow output)
        {
            int acc = 0;
            int max = 0;

            foreach (var row in input.Rows)
            {
                var timestamp = row.Get <DateTime>("timestamp");
                var op        = row.Get <string>("op");
                if (op == "start")
                {
                    acc++;
                }
                else
                {
                    acc--;
                    if (acc < 0)
                    {
                        acc = 0;
                    }
                }

                max = System.Math.Max(max, acc);
            }

            output.Set <string>("cohort", "FOO");
            output.Set <int>("max", max);

            yield return(output.AsReadOnly());
        }
Beispiel #14
0
        /// <summary>Apply is called at least once per instance</summary>
        /// <param name="input">A SQLIP row</param>
        /// <param name="output">A SQLIP updatable row.</param>
        /// <returns>IEnumerable of IRow, one IRow per SQLIP row.</returns>
        /// <remarks>Because applier constructor arguments cannot depend on
        /// column references, the name of the column to parse is given as a string. Then
        /// the actual column value is obtained by calling IRow.Get. The rest of the code
        /// is the same as XmlDomExtractor.</remarks>
        public override IEnumerable <IRow> Apply(IRow input, IUpdatableRow output)
        {
            // Make sure that all requested columns are of type string
            IColumn column = output.Schema.FirstOrDefault(col => col.Type != typeof(string));

            if (column != null)
            {
                throw new ArgumentException(string.Format("Column '{0}' must be of type 'string', not '{1}'", column.Name, column.Type.Name));
            }

            XmlDocument xmlDocument = new XmlDocument();

            xmlDocument.LoadXml(input.Get <string>(this.xmlColumnName));
            XmlNamespaceManager nsmanager = new XmlNamespaceManager(xmlDocument.NameTable);

            // If namespace declarations have been provided, add them to the namespace manager
            if (this.namespaceDecls != null)
            {
                foreach (var namespaceDecl in this.namespaceDecls)
                {
                    nsmanager.AddNamespace(namespaceDecl.Key, namespaceDecl.Value);
                }
            }

            foreach (XmlNode xmlNode in xmlDocument.DocumentElement.SelectNodes(this.rowPath, nsmanager))
            {
                // IUpdatableRow implements a builder pattern to save memory allocations,
                // so call output.Set in a loop
                foreach (IColumn col in output.Schema)
                {
                    switch (col.Name)
                    {
                    // populate hiearchy columns with their XmlNode.Name
                    case "ElementName":
                        output.Set <string>("ElementName", xmlNode.Name);
                        break;

                    case "ChildName":
                        output.Set <string>("ChildName", xmlNode.FirstChild?.Name);
                        break;

                    case "GrandChildName":
                        output.Set <string>("GrandChildName", xmlNode.FirstChild?.FirstChild?.Name);
                        break;

                    // populate mapped columns with their XPath result from XmlNode.InnerXml
                    default:
                        var explicitColumnMapping = this.columnPaths.FirstOrDefault(columnPath => columnPath.Value == col.Name);
                        var xml = xmlNode.SelectSingleNode(explicitColumnMapping.Key ?? col.Name, nsmanager);
                        output.Set(explicitColumnMapping.Value ?? col.Name, xml?.InnerXml);
                        break;
                    }
                }

                // then call output.AsReadOnly to build an immutable IRow.
                yield return(output.AsReadOnly());
            }
        }
Beispiel #15
0
        public override IEnumerable <IRow> Reduce(IRowset input, IUpdatableRow output)
        {
            // Init aggregation values
            int i     = 0;
            var begin = DateTime.MaxValue; // Dummy value to make compiler happy
            var end   = DateTime.MinValue; // Dummy value to make compiler happy

            // requires that the reducer is PRESORTED on begin and READONLY on the reduce key.
            foreach (var row in input.Rows)
            {
                // Initialize the first interval with the first row if i is 0
                if (i == 0)
                {
                    i++; // mark that we handled the first row
                    begin = row.Get <DateTime>("begin");
                    end   = row.Get <DateTime>("end");
                    // If the end is just a time and not a date, it can be earlier than the begin, indicating it is on the next day.
                    // This let's fix up the end to the next day in that case
                    if (end < begin)
                    {
                        end = end.AddDays(1);
                    }
                }
                else
                {
                    var b = row.Get <DateTime>("begin");
                    var e = row.Get <DateTime>("end");
                    // fix up the date if end is earlier than begin
                    if (e < b)
                    {
                        e = e.AddDays(1);
                    }

                    // if the begin is still inside the interval, increase the interval if it is longer
                    if (b <= end)
                    {
                        // if the new end time is later than the current, extend the interval
                        if (e > end)
                        {
                            end = e;
                        }
                    }
                    else // output the previous interval and start a new one
                    {
                        output.Set <DateTime>("begin", begin);
                        output.Set <DateTime>("end", end);
                        yield return(output.AsReadOnly());

                        begin = b; end = e;
                    } // if
                }     // if
            }         // foreach

            // now output the last interval
            output.Set <DateTime>("begin", begin);
            output.Set <DateTime>("end", end);
            yield return(output.AsReadOnly());
        } // Reduce
Beispiel #16
0
            public IRow Apply(IUpdatableRow row)
            {
                row.Set(this.SessionIdColumn, this.SessionId);
                row.Set(this.TimestampColumn, this.Timestamp);
                row.Set(this.NumActionsColumn, this.NumActions);
                row.Set(this.PdropColumn, this.pdrop);

                return(row.AsReadOnly());
            }
Beispiel #17
0
        public override IRow Process(IRow input, IUpdatableRow output)
        {
            int a = input.Get <int>("col1");
            int b = input.Get <int>("col2");

            output.Set <int>("col1", a + 1);
            output.Set <int>("col2", b + 5);
            return(output.AsReadOnly());
        }
Beispiel #18
0
        // void OutputValueAtCol_I(string c, int i, IUpdatableRow outputrow)
        //
        // Helper function that takes the string value c and puts it into the column at position i in the output row.
        // The value will be cast to the expected type of the column.
        private void OutputValueAtCol_I(string c, int i, IUpdatableRow outputrow)
        {
            ISchema schema = outputrow.Schema;

            if (schema[i].Type == typeof(SqlMap <string, string>))
            {
                c = DriverFunctions.RemoveOptionalQuotes(c);
                SqlMap <string, string> scopeMap = String.IsNullOrEmpty(c) ? null : DriverFunctions.ReadStringMap(c, this._map_item_delim, this._map_kv_delim);
                outputrow.Set <SqlMap <string, string> >(i, scopeMap);
            }
            else if (schema[i].Type == typeof(SqlArray <int>))
            {
                c = DriverFunctions.RemoveOptionalQuotes(c);
                SqlArray <int> scopeArray = String.IsNullOrEmpty(c) ? null : DriverFunctions.ReadIntArray(c, this._array_item_delim);
                outputrow.Set <SqlArray <int> >(i, scopeArray);
            }
            else if (schema[i].Type == typeof(int))
            {
                int num = Convert.ToInt32(c);
                outputrow.Set <int>(i, num);
            }
            else if (schema[i].Type == typeof(int?))
            {
                int?num2 = (c == "") ? null : new int?(Convert.ToInt32(c));
                outputrow.Set <int?>(i, num2);
            }
            else if (schema[i].Type == typeof(long))
            {
                long num3 = Convert.ToInt64(c);
                outputrow.Set <long>(i, num3);
            }
            else if (schema[i].Type == typeof(long?))
            {
                long?num4 = (c == "") ? null : new long?(Convert.ToInt64(c));
                outputrow.Set <long?>(i, num4);
            }
            else if (schema[i].Type == typeof(DateTime))
            {
                DateTime dateTime = Convert.ToDateTime(c);
                outputrow.Set <DateTime>(i, dateTime);
            }
            else if (schema[i].Type == typeof(DateTime?))
            {
                DateTime?dateTime2 = (c == "") ? null : new DateTime?(Convert.ToDateTime(c));
                outputrow.Set <DateTime?>(i, dateTime2);
            }
            else if (schema[i].Type == typeof(string))
            {
                string text = DriverFunctions.RemoveOptionalQuotes(c);
                outputrow.Set <string>(i, text);
            }
            else
            {
                outputrow.Set <string>(i, c);
            }
        }
        public override IEnumerable <IRow> Extract(IUnstructuredReader input, IUpdatableRow output)
        {
            foreach (var currentLine in input.Split(_rowDelim))
            {
                using (var lineReader = new StreamReader(currentLine, _encoding))
                {
                    var line = lineReader.ReadToEnd();
                    //read new line of input
                    var startParse = 0;

                    //for each column
                    var i = 0;
                    foreach (var colWidth in _colWidths)
                    {
                        //read chars associated with fixed-width column
                        var charsToRead = int.Parse(colWidth.Value);
                        var value       = line.Substring(startParse, charsToRead);


                        //assign value to output (w/ appropriate type)
                        switch (output.Schema[i].Type.Name)
                        {
                        case "String":
                            output.Set(i, value);
                            break;

                        case "Int32":
                            output.Set(i, int.Parse(value));
                            break;

                        case "Double":
                            output.Set(i, double.Parse(value));
                            break;

                        case "Float":
                            output.Set(i, float.Parse(value));
                            break;

                        case "DateTime":
                            output.Set(i, DateTime.Parse(value));
                            break;

                        default:
                            throw new Exception("Unknown data type specified: " + output.Schema[i].Type.Name);
                        }

                        //move to start of next column
                        startParse += charsToRead;
                        i++;
                    }

                    //send output
                    yield return(output.AsReadOnly());
                }
            }
        }
Beispiel #20
0
        public override IRow Process(IRow input, IUpdatableRow output)
        {
            string geohash7 = input.Get <string>("geohash7");
            var    coord    = Geohash.Decode(geohash7);

            output.Set <string>("geohash7", geohash7);
            output.Set <double>("lat", coord[0]);
            output.Set <double>("lon", coord[1]);
            return(output.AsReadOnly());
        }
Beispiel #21
0
        public override IEnumerable <IRow> Extract(IUnstructuredReader input, IUpdatableRow output)
        {
            char delimiter = ',';

            if (fileTemplate.FileType == "Delimited")
            {
                delimiter = (char)fileTemplate.FileDelimiterASCIIValue;
            }
            // Read the input stream
            using (StreamReader reader = new StreamReader(input.BaseStream))
            {
                // Rows
                while (reader.Peek() >= 0)
                {
                    try
                    {
                        string line = reader.ReadLine();
                        if (fileTemplate.FileType == "FixedPosition")
                        {
                            for (int i = 0; i < fileTemplate.FileColumns.Count; i++)
                            {
                                if (fileTemplate.FileColumns[i].Position == -1)
                                {
                                    output.Set <string>(fileTemplate.FileColumns[i].Field, "");
                                }
                                else
                                {
                                    output.Set <string>(fileTemplate.FileColumns[i].Field, line.Substring(fileTemplate.FileColumns[i].Position, fileTemplate.FileColumns[i].Length));
                                }
                            }
                        }
                        else if (fileTemplate.FileType == "Delimited")
                        {
                            string[] lineitems = line.Split(delimiter);
                            for (int i = 0; i < fileTemplate.FileColumns.Count; i++)
                            {
                                if (fileTemplate.FileColumns[i].Position == -1)
                                {
                                    output.Set <string>(fileTemplate.FileColumns[i].Field, "");
                                }
                                else
                                {
                                    output.Set <string>(fileTemplate.FileColumns[i].Field, lineitems[fileTemplate.FileColumns[i].Position - 1]);
                                }
                            }
                        }
                    }
                    catch (Exception e)
                    {
                    }
                    yield return(output.AsReadOnly());
                }
            }
        }
Beispiel #22
0
        public override IEnumerable <IRow> Extract(IUnstructuredReader input, IUpdatableRow output)
        {
            var reader = new PdfReader(input.BaseStream);

            for (var page = 1; page <= reader.NumberOfPages; page++)
            {
                output.Set(0, page);
                output.Set(1, ExtractText(reader, page));
                yield return(output.AsReadOnly());
            }
        }
Beispiel #23
0
        public override IRow Process(IRow input, IUpdatableRow output)
        {
            var tags = this.tagger.ProduceTags(input.Get <byte[]>(this.imgColName));

            var stringOfTags = string.Join(";", tags.Select(x => string.Format("{0}:{1}", x.Key, x.Value)));

            output.Set <int>(this.numColName, tags.Count);
            output.Set <string>(this.tagColName, stringOfTags);

            return(output.AsReadOnly());
        }
Beispiel #24
0
        // IRow Process(IRow input, IUpdatableRow output)
        //
        // Actual implementatoin of the user-defined processor. Overwrites the Process method of IProcessor.
        public override IRow Process(IRow input, IUpdatableRow output)
        {
            string first_name = input.Get <string>("first_name");
            string last_name  = input.Get <string>("last_name");
            string name       = first_name.Substring(0, 1) + "." + last_name;

            output.Set <string>("name", name);
            output.Set <int>("id", Int32.Parse(input.Get <string>("id")));
            output.Set <string>("zipcode", input.Get <string>("zipcode"));
            output.Set <string>("country", input.Get <string>("country"));
            return(output.AsReadOnly());
        }
Beispiel #25
0
        public override IEnumerable<IRow> Extract(IUnstructuredReader input, IUpdatableRow output)
        {
            using (var reader = new JsonTextReader(new StreamReader(input.BaseStream, Encoding.UTF8)))
            {
                reader.SupportMultipleContent = true;
                
                while (reader.Read())
                {
                    var row = JToken.ReadFrom(reader);

                    var size = 0;
                    var flattendData = GHInsights.USql.Utility.FlattenJson(row, ref size);

                    if (size < (_dataLakeMaxRowSize))
                    {
                        output.Set(_outputColumnName, new SqlMap<string, byte[]>(flattendData));
                    }
                    else
                    {
                        var compressedData = GHInsights.USql.Utility.GzipByteArray(Encoding.UTF8.GetBytes(row.ToString(Formatting.None)));

                        if (compressedData.Length < (_dataLakeMaxRowSize))
                        {
                            var compressedRow = new Dictionary<string, byte[]>
                                {
                                    {
                                        "!CompressedRow",
                                        compressedData
                                    }
                                };
                            output.Set(_outputColumnName, new SqlMap<string, byte[]>(compressedRow));
                        }
                        else {
                            //throw new ArgumentOutOfRangeException($"Resulting SqlMap is too large: {size} - {row.ToString(Formatting.None).Substring(0,100)}");
                            var error = new Dictionary<string, byte[]>
                                {
                                    {
                                        "!RowExtractorError",
                                        Encoding.UTF8.GetBytes($"Resulting SqlMap is too large: OriginalSize:{size} CompressedSize: {compressedData.Length} - {row.ToString(Formatting.None).Substring(0, 100)}")
                                    }
                                };
                            output.Set(_outputColumnName, new SqlMap<string, byte[]>(error));
                        }

                    }
                        

                    yield return output.AsReadOnly();

                }
            }
        }
        public override IEnumerable <IRow> Combine(IRowset left, IRowset right, IUpdatableRow output)
        {
            var ipList = (from ip in right.Rows
                          select new
            {
                IPStart = ip.Get <long>("ip_start_int"),
                IPEnd = ip.Get <long>("ip_end_int"),
                country = ip.Get <string>("country"),
                state = ip.Get <string>("state"),
                city = ip.Get <string>("city")
            }).ToList();

            foreach (var row in left.Rows)
            {
                output.Set <int>("Year", row.Get <int>("Year"));
                output.Set <int>("Month", row.Get <int>("Month"));
                output.Set <int>("Day", row.Get <int>("Day"));
                output.Set <long?>("TotalVisits", row.Get <long?>("TotalVisits"));
                long IP = row.Get <long>("IPInt");

                string Location = "";

                if (ipList != null)
                {
                    var loc = (from w in ipList
                               where IP >= w.IPStart && IP <= w.IPEnd
                               select new
                    {
                        country = w.country,
                        state = w.state,
                        city = w.city
                    }).ToList();

                    if ((loc != null) && (loc.Count > 0))
                    {
                        if (String.IsNullOrEmpty(loc[0].state))
                        {
                            Location = String.Format("{0}, {1}", loc[0].city, loc[0].country);
                        }
                        else
                        {
                            Location = String.Format("{0}, {1}, {2}", loc[0].city, loc[0].state, loc[0].country);
                        }
                    }
                }
                ;

                output.Set <string>("Location", Location);
                yield return(output.AsReadOnly());
            }
        }
Beispiel #27
0
        public override IEnumerable <IRow> Extract(IUnstructuredReader input, IUpdatableRow output)
        {
            using (var reader = new JsonTextReader(new StreamReader(input.BaseStream, Encoding.UTF8)))
            {
                reader.SupportMultipleContent = true;

                while (reader.Read())
                {
                    var row = JToken.ReadFrom(reader);

                    var size         = 0;
                    var flattendData = GHInsights.USql.Utility.FlattenJson(row, ref size);

                    if (size < (_dataLakeMaxRowSize))
                    {
                        output.Set(_outputColumnName, new SqlMap <string, byte[]>(flattendData));
                    }
                    else
                    {
                        var compressedData = GHInsights.USql.Utility.GzipByteArray(Encoding.UTF8.GetBytes(row.ToString(Formatting.None)));

                        if (compressedData.Length < (_dataLakeMaxRowSize))
                        {
                            var compressedRow = new Dictionary <string, byte[]>
                            {
                                {
                                    "!CompressedRow",
                                    compressedData
                                }
                            };
                            output.Set(_outputColumnName, new SqlMap <string, byte[]>(compressedRow));
                        }
                        else
                        {
                            //throw new ArgumentOutOfRangeException($"Resulting SqlMap is too large: {size} - {row.ToString(Formatting.None).Substring(0,100)}");
                            var error = new Dictionary <string, byte[]>
                            {
                                {
                                    "!RowExtractorError",
                                    Encoding.UTF8.GetBytes($"Resulting SqlMap is too large: OriginalSize:{size} CompressedSize: {compressedData.Length} - {row.ToString(Formatting.None).Substring(0, 100)}")
                                }
                            };
                            output.Set(_outputColumnName, new SqlMap <string, byte[]>(error));
                        }
                    }


                    yield return(output.AsReadOnly());
                }
            }
        }
Beispiel #28
0
        public override IRow Process(IRow input, IUpdatableRow output)
        {
            string UserID     = input.Get <string>("UserID");
            string Name       = input.Get <string>("Name");
            string Address    = input.Get <string>("Address");
            string City       = input.Get <string>("City");
            string State      = input.Get <string>("State");
            string PostalCode = input.Get <string>("PostalCode");
            string Country    = input.Get <string>("Country");
            string Phone      = input.Get <string>("Phone");

            if (CountryTranslation.Keys.Contains(Country))
            {
                Country = CountryTranslation[Country];
            }
            output.Set <string>(0, UserID);
            output.Set <string>(1, Name);
            output.Set <string>(2, Address);
            output.Set <string>(3, City);
            output.Set <string>(4, State);
            output.Set <string>(5, PostalCode);
            output.Set <string>(6, Country);
            output.Set <string>(7, Phone);
            return(output.AsReadOnly());
        }
        public override IEnumerable <IRow> Extract(IUnstructuredReader input, IUpdatableRow output)
        {
            char   column_delimiter = '\t';
            string line;
            var    reader = new StreamReader(input.BaseStream);

            while ((line = reader.ReadLine()) != null)
            {
                var tokens = line.Split(column_delimiter);
                output.Set("Market", tokens[0]);
                output.Set("Query", tokens[1]);
                output.Set("Latency", Convert.ToInt64(tokens[2]));
                yield return(output.AsReadOnly());
            }
        }
Beispiel #30
0
        public override IEnumerable<IRow> Reduce(IRowset input, IUpdatableRow output)
        {
            // Init aggregation values
            bool first_row_processed = false;
            var begin = DateTime.MaxValue; // Dummy value to make compiler happy
            var end = DateTime.MinValue; // Dummy value to make compiler happy

            // requires that the reducer is PRESORTED on begin and READONLY on the reduce key.
            foreach (var row in input.Rows)
            {
                // Initialize the first interval with the first row if i is 0
                if (!first_row_processed)
                {
                    first_row_processed = true; // mark that we handled the first row
                    begin = row.Get<DateTime>("begin");
                    end = row.Get<DateTime>("end");
                    // If the end is just a time and not a date, it can be earlier than the begin, indicating it is on the next day.
                    // This let's fix up the end to the next day in that case
                    if (end < begin) { end = end.AddDays(1); }
                }
                else
                {
                    var b = row.Get<DateTime>("begin");
                    var e = row.Get<DateTime>("end");
                    // fix up the date if end is earlier than begin
                    if (e < b) { e = e.AddDays(1); }

                    // if the begin is still inside the interval, increase the interval if it is longer
                    if (b <= end)
                    {
                        // if the new end time is later than the current, extend the interval
                        if (e > end) { end = e; }
                    }
                    else // output the previous interval and start a new one
                    {
                        output.Set<DateTime>("begin", begin);
                        output.Set<DateTime>("end", end);
                        yield return output.AsReadOnly();
                        begin = b; end = e;
                    } // if
                } // if
            } // foreach

            // now output the last interval
            output.Set<DateTime>("begin", begin);
            output.Set<DateTime>("end", end);
            yield return output.AsReadOnly();
        }
        /// <summary> 
        ///  
        /// </summary> 
        /// <param name="input"></param> 
        /// <param name="output"></param> 
        /// <returns></returns> 
        public override IEnumerable<IRow> Reduce(IRowset input, IUpdatableRow output)
        {
            int count = 0;
            int[] colValues = new int[colNames.Length];

            foreach (IRow row in input.Rows)
            {
                if (count == 0)
                {
                    colValues[(int)ColNames.id] = int.Parse(row.Get<string>("id").ToString());
                    colValues[(int)ColNames.loc] = location.GetValue(row.Get<string>("loc").ToString());
                    colValues[(int)ColNames.fs] = int.Parse(row.Get<string>("fs").ToString());
                    colValues[(int)ColNames.tr] = int.Parse(row.Get<string>("tr").ToString());
                    colValues[(int)ColNames.st] = sevType.GetValue(row.Get<string>("st").ToString());
                }

                colValues[eventType.GetValue(row.Get<string>("et").ToString())] = 1;
                int vol = int.Parse(row.Get<string>("vol").ToString());
                colValues[logFeature.GetValue(row.Get<string>("lf").ToString())] = vol;
                colValues[resType.GetValue(row.Get<string>("rt").ToString())] = 1;

                count++;
            }

            // Write output
            for (int n = (int)ColNames.lf_1; n < colValues.Length; n++)
            {
                string colName = colNames[n];
                output.Set(colName, colValues[n].ToString());
            }
            yield return output.AsReadOnly();
        }
Beispiel #32
0
        /// https://docs.microsoft.com/en-us/azure/data-lake-analytics/data-lake-analytics-u-sql-programmability-guide#use-user-defined-extractors
        /// <summary>Extract is called at least once per vertex</summary>
        /// <param name="input">Wrapper for a Stream</param>
        /// <param name="output">IUpdatableRow uses a mutable builder pattern --
        /// set individual fields with IUpdatableRow.Set,
        /// then build an immutable IRow by calling IUpdatableRow.AsReadOnly.</param>
        /// <returns>A sequence of IRows.</returns>
        public override IEnumerable <IRow> Extract(IUnstructuredReader input, IUpdatableRow output)
        {
            // use XML Reader for streaming the XML to keep memory usage to a minimum
            using (XmlReader reader = XmlReader.Create(input.BaseStream))
            {
                reader.MoveToContent();

                // forward reader to next available Element
                while (reader.ReadToFollowing(this.elementName))
                {
                    // decouple from reader position with new subtreeReader
                    // this prevents reader.ReadToFollowing() from skipping rows as its not forwarded now by ReadOuterXml()
                    using (XmlReader subtreeReader = reader.ReadSubtree())
                    {
                        subtreeReader.MoveToContent();

                        // Replace CRLF & CR & LF character (\r\n) by space ( ) within the XML to ensure the string fits in 1 row
                        output.Set <string>(0,
                                            XElement.Parse(subtreeReader.ReadOuterXml()).
                                            ToString(SaveOptions.DisableFormatting).
                                            Replace("\r\n", " ").Replace('\n', ' ').Replace('\r', ' '));

                        // then call output.AsReadOnly to build an immutable IRow.
                        yield return(output.AsReadOnly());
                    }
                }
            }
        }
        /// <summary>Extract is called at least once per vertex</summary>
        /// <param name="input">Wrapper for a Stream</param>
        /// <param name="output">IUpdatableRow uses a mutable builder pattern --
        /// set individual fields with IUpdatableRow.Set, then build an immutable IRow by
        /// calling IUpdatableRow.AsReadOnly.</param>
        /// <returns>A sequence of IRows.</returns>
        public override IEnumerable <IRow> Extract(IUnstructuredReader input, IUpdatableRow output)
        {
            // Make sure that all requested columns are of type string
            IColumn column = output.Schema.FirstOrDefault(col => col.Type != typeof(string));

            if (column != null)
            {
                throw new ArgumentException(string.Format("Column '{0}' must be of type 'string', not '{1}'", column.Name, column.Type.Name));
            }

            XmlDocument xmlDocument = new XmlDocument();

            xmlDocument.Load(input.BaseStream);
            foreach (XmlNode xmlNode in xmlDocument.DocumentElement.SelectNodes(this.rowPath))
            {
                // IUpdatableRow implements a builder pattern to save memory allocations,
                // so call output.Set in a loop
                foreach (IColumn col in output.Schema)
                {
                    var     explicitColumnMapping = this.columnPaths.FirstOrDefault(columnPath => columnPath.Value == col.Name);
                    XmlNode xml = xmlNode.SelectSingleNode(explicitColumnMapping.Key ?? col.Name);
                    output.Set(explicitColumnMapping.Value ?? col.Name, xml == null ? null : xml.InnerXml);
                }

                // then call output.AsReadOnly to build an immutable IRow.
                yield return(output.AsReadOnly());
            }
        }
        public override IEnumerable<IRow> Extract(IUnstructuredReader input, IUpdatableRow output)
        {
            using (XmlReader reader = XmlReader.Create(input.BaseStream))
            {
                while (reader.Read())
                {
                    if (reader.NodeType == XmlNodeType.Element && reader.LocalName == "row")
                    {
                        foreach (IColumn column in output.Schema)
                        {
                            string rawValue = reader.GetAttribute(column.Name);

                            if (rawValue == null)
                            {
                                output.Set(column.Name, column.DefaultValue);
                            }
                            else
                            {
                                if (column.Type == typeof(string))
                                {
                                    string simplifiedValue = Simplify(rawValue);

                                    int byteCount = Encoding.UTF8.GetByteCount(simplifiedValue);

                                    if (byteCount > Constants.Limits.StringSizeInBytes) // 128kB
                                    {
                                        simplifiedValue = ShortenWithinBoundries(simplifiedValue);
                                    }

                                    output.Set(column.Name, simplifiedValue);
                                }
                                else
                                {
                                    var typeConverter = TypeDescriptor.GetConverter(column.Type);
                                    var castedValue = typeConverter.ConvertFromString(rawValue);

                                    output.Set(column.Name, castedValue);
                                }
                            }
                        }

                        yield return output.AsReadOnly();
                    }
                }
            }
        }
Beispiel #35
0
        public override IEnumerable<IRow> Extract(IUnstructuredReader input, IUpdatableRow output)
        {
            var serializer = AvroSerializer.CreateGeneric(avroSchema);
            using (var genericReader = AvroContainer.CreateGenericReader(input.BaseStream))
            {
                using (var reader = new SequentialReader<dynamic>(genericReader))
                {
                    foreach (var obj in reader.Objects)
                    {
                        foreach (var column in output.Schema)
                        {
                            output.Set(column.Name, obj[column.Name]);
                        }

                        yield return output.AsReadOnly();
                    }
                }
            }
        }
Beispiel #36
0
    public override IEnumerable<IRow> Extract(IUnstructuredReader input, IUpdatableRow output)
    {
        List<IRow> rows = new List<IRow>();

        XmlDocument xmlDocument = new XmlDocument();
        xmlDocument.Load(input.BaseStream);
        foreach (XmlNode xmlNode in xmlDocument.DocumentElement.SelectNodes(this.m_XPath))
        {
            foreach (IColumn col in output.Schema)
            {
                XmlNode xml = xmlNode.SelectSingleNode(col.Name);
                if (xml != null)
                {
                    object val = Convert.ChangeType(xml.InnerXml, col.Type);
                    output.Set(col.Name, val);
                }
            }

            yield return output.AsReadOnly();

        }
    }
        protected virtual void LineToRow(string line, IUpdatableRow row)
        {
            int index = 0;
            foreach(var map in _fieldMap)
            {
                if (line.Length < map.Key + map.Value)
                {
                    index++;
                    continue;
                }

                if (index < row.Schema.Count && row.Schema[index].Type != typeof(string))
                {
                    var typeConverter = TypeDescriptor.GetConverter(row.Schema[index].Type);
                    if (typeConverter != null && typeConverter.CanConvertFrom(typeof(string)))
                    {
                        row.Set(index, typeConverter.ConvertFromString(line.Substring(map.Key, map.Value)));
                    }
                }
                else
                    row.Set(index, line.Substring(map.Key, map.Value));
                index++;
            }
        }
Beispiel #38
0
        // IRow Process(IRow input, IUpdatableRow output)
        //
        // Actual implementatoin of the user-defined processor. Overwrites the Process method of IProcessor.
        public override IRow Process(IRow input, IUpdatableRow output)
        {
            List<string> list = new List<string>();
            foreach (var current in input.Schema)
            {
                if (current.Type.IsGenericType && current.Type.GetGenericTypeDefinition() == typeof(SqlMap) && current.Type.GetGenericArguments()[0] == typeof(string))
                {
                    list.Add(current.Name);
                }
            }

            Dictionary<string, ArrayList> maps_to_be_changed = new Dictionary<string, ArrayList>();
            foreach (var current2 in output.Schema)
            {
                bool flag = list.Contains(current2.Name);
                if (-1 < input.Schema.IndexOf(current2.Name) && !flag)
                {
                    output.Set<object>(current2.Name, input.Get<object>(current2.Name));
                }
                else if (!flag)
                {
                    foreach (string current3 in list)
                    {
                        SqlMap<string, string> sqlMap = input.Get<SqlMap<string, string>>(current3);
                        SqlArray<string> sqlArray = null;
                        List<string> list2 = null;
                        if (sqlMap != null)
                        {
                            sqlArray = sqlMap.Keys;
                            if (sqlMap.Values != null)
                            {
                                list2 = sqlMap.Values.ToList<string>();
                            }
                        }
                        int num = (sqlArray == null) ? -1 : sqlArray.ToList<string>().IndexOf(current2.Name);
                        if (num != -1)
                        {
                            output.Set<string>(current2.Name, list2[num]);
                            if (maps_to_be_changed.Keys.Contains(current3))
                            {
                                maps_to_be_changed[current3].Add(current2.Name);
                            }
                            else
                            {
                                maps_to_be_changed.Add(current3, new ArrayList
                                {
                                    current2.Name
                                });
                            }
                            break;
                        }
                        output.Set<object>(current2.Name, current2.Type.IsValueType ? Activator.CreateInstance(current2.Type) : null);
                    }
                }
            }

            using (IEnumerator<IColumn> enumerator = output.Schema.GetEnumerator())
            {
                while (enumerator.MoveNext())
                {
                    IColumn out_col = enumerator.Current;
                    bool flag = list.Contains(out_col.Name);
                    if (flag)
                    {
                        SqlMap<string, string> sqlMap = input.Get<SqlMap<string, string>>(out_col.Name);
                        if (maps_to_be_changed != null && maps_to_be_changed.Keys.Contains(out_col.Name))
                        {
                            sqlMap = new SqlMap<string, string>(
                                from kvp in sqlMap
                                where !maps_to_be_changed[out_col.Name].Contains(kvp.Key)
                                select kvp);
                        }
                        output.Set<SqlMap<string, string>>(out_col.Name, sqlMap);
                    }
                }
            }
            return output.AsReadOnly();
        }
Beispiel #39
0
 // void OutputValueAtCol_I(string c, int i, IUpdatableRow outputrow)
 //
 // Helper function that takes the string value c and puts it into the column at position i in the output row.
 // The value will be cast to the expected type of the column.
 private void OutputValueAtCol_I(string c, int i, IUpdatableRow outputrow)
 {
     ISchema schema = outputrow.Schema;
     if (schema[i].Type == typeof(SqlMap<string, string>))
     {
         c = DriverFunctions.RemoveOptionalQuotes(c);
         SqlMap<string, string> scopeMap = String.IsNullOrEmpty(c) ? null : DriverFunctions.ReadStringMap(c, this._map_item_delim, this._map_kv_delim);
         outputrow.Set<SqlMap<string, string>>(i, scopeMap);
     }
     else if (schema[i].Type == typeof(SqlArray<int>))
     {
         c = DriverFunctions.RemoveOptionalQuotes(c);
         SqlArray<int> scopeArray = String.IsNullOrEmpty(c) ? null : DriverFunctions.ReadIntArray(c, this._array_item_delim);
         outputrow.Set<SqlArray<int>>(i, scopeArray);
     }
     else if (schema[i].Type == typeof(int))
     {
         int num = Convert.ToInt32(c);
         outputrow.Set<int>(i, num);
     }
     else if (schema[i].Type == typeof(int?))
     {
         int? num2 = (c == "") ? null : new int?(Convert.ToInt32(c));
         outputrow.Set<int?>(i, num2);
     }
     else if (schema[i].Type == typeof(long))
     {
         long num3 = Convert.ToInt64(c);
         outputrow.Set<long>(i, num3);
     }
     else if (schema[i].Type == typeof(long?))
     {
         long? num4 = (c == "") ? null : new long?(Convert.ToInt64(c));
         outputrow.Set<long?>(i, num4);
     }
     else if (schema[i].Type == typeof(DateTime))
     {
         DateTime dateTime = Convert.ToDateTime(c);
         outputrow.Set<DateTime>(i, dateTime);
     }
     else if (schema[i].Type == typeof(DateTime?))
     {
         DateTime? dateTime2 = (c == "") ? null : new DateTime?(Convert.ToDateTime(c));
         outputrow.Set<DateTime?>(i, dateTime2);
     }
     else if (schema[i].Type == typeof(string))
     {
         string text = DriverFunctions.RemoveOptionalQuotes(c);
         outputrow.Set<string>(i, text);
     }
     else
     {
         outputrow.Set<string>(i, c);
     }
 }
Beispiel #40
0
        /// <summary>Extract is called at least once per instance</summary>
        /// <param name="input">Wrapper for a Stream</param>
        /// <param name="output">IUpdatableRow uses a mutable builder pattern -- 
        /// set individual fields with IUpdatableRow.Set, then build an immutable IRow by
        /// calling IUpdatableRow.AsReadOnly.</param>
        /// <returns>IEnumerable of IRow, one IRow per SQLIP row.</returns>
		public override IEnumerable<IRow> Extract(IUnstructuredReader input, IUpdatableRow output)
		{
            // Make sure that all requested columns are of type string
            IColumn column = output.Schema.FirstOrDefault(col => col.Type != typeof(string));
            if (column != null)
            {
                throw new ArgumentException(string.Format("Column '{0}' must be of type 'string', not '{1}'", column.Name, column.Type.Name));
            }

			var state = new ParseState();
			state.ClearAndJump(ParseLocation.Row);
			using (var reader = XmlReader.Create(input.BaseStream))
			{
				while (reader.Read())
				{
					switch (state.Location)
					{
                        case ParseLocation.Row:
                            // when looking for a new row, we are only interested in elements
                            // whose name matches the requested row element
                            if (reader.NodeType == XmlNodeType.Element && reader.Name == this.rowPath)
                            {
                                // when found, clear the IUpdatableRow's memory
                                // (this is no provided Clear method)
                                for (int i = 0; i < output.Schema.Count; i++)
                                {
                                    output.Set<string>(i, null);
                                }

                                state.ClearAndJump(ParseLocation.Column);
                            }

                            break;
                        case ParseLocation.Column:
                            // When looking for a new column, we are interested in elements
                            // whose name is a key in the columnPaths map or
                            // whose name is in the requested output schema.
                            // This indicates a column whose value needs to be read, 
                            // so prepare for reading it by clearing elementValue.
                            if (reader.NodeType == XmlNodeType.Element
                                && (this.columnPaths.ContainsKey(reader.Name)
                                    || output.Schema.Select(c => c.Name).Contains(reader.Name)))
                            {
                                if (reader.IsEmptyElement)
                                {
                                    // For an empty element, set an empty string 
                                    // and immediately jump to looking for the next column
                                    output.Set(this.columnPaths[reader.Name] ?? reader.Name, state.ReadElementValue());
                                    state.ClearAndJump(ParseLocation.Column);
                                }
                                else
                                {
                                    state.Location = ParseLocation.Data;
                                    state.ElementName = reader.Name;
                                    state.ClearElementValue();
                                }
                            }
                            else if (reader.NodeType == XmlNodeType.EndElement && reader.Name == this.rowPath)
                            {
                                // The other interesting case is an end element whose name matches 
                                // the current row element. This indicates the end of a row, 
                                // so yield the now-complete row and jump to looking for 
                                // another row.
                                yield return output.AsReadOnly();
                                state.ClearAndJump(ParseLocation.Row);
                            }

                            break;
                        case ParseLocation.Data:
                            // Most of the code for reading the value of a column
                            // deals with re-creating the inner XML from discrete elements.
                            // The only jump occurs when the reader hits an end element
                            // whose name matches the current column. In this case, we
                            // need to write the accumulated value to the appropriate 
                            // column in the output row.
                            switch (reader.NodeType)
                            {
                                case XmlNodeType.EndElement:
                                    if (reader.Name == state.ElementName)
                                    {
                                        output.Set(this.columnPaths[state.ElementName] ?? state.ElementName, state.ReadElementValue());
                                        state.ClearAndJump(ParseLocation.Column);
                                    }
                                    else
                                    {
                                        state.ElementWriter.WriteEndElement();
                                    }

                                    break;
                                case XmlNodeType.Element:
                                    state.ElementWriter.WriteStartElement(reader.Name);
                                    state.ElementWriter.WriteAttributes(reader, false);
                                    if (reader.IsEmptyElement)
                                    {
                                        state.ElementWriter.WriteEndElement();
                                    }

                                    break;
                                case XmlNodeType.CDATA:
                                    state.ElementWriter.WriteCData(reader.Value);
                                    break;
                                case XmlNodeType.Comment:
                                    state.ElementWriter.WriteComment(reader.Value);
                                    break;
                                case XmlNodeType.ProcessingInstruction:
                                    state.ElementWriter.WriteProcessingInstruction(reader.Name, reader.Value);
                                    break;
                                default:
                                    state.ElementWriter.WriteString(reader.Value);
                                    break;
                            }

                            break;
                        default:
                            throw new NotImplementedException("StreamFromXml has not implemented a new member of the ParseLocation enum");
                    }
				}

                if (state.Location != ParseLocation.Row)
				{
					throw new ArgumentException("XML document ended without proper closing tags");
				}
			}
		}
Beispiel #41
0
        /// <summary/>
        protected virtual void                  JObjectToRow(JObject o, IUpdatableRow row)
        {
            foreach(var c in row.Schema)
            {
                JToken token = null;
                object value = c.DefaultValue;
                
                // All fields are represented as columns
                //  Note: Each JSON row/payload can contain more or less columns than those specified in the row schema
                //  We simply update the row for any column that matches (and in any order).
                if(o.TryGetValue(c.Name, out token) && token != null)
                {
                    // Note: We simply delegate to Json.Net for all data conversions
                    //  For data conversions beyond what Json.Net supports, do an explicit projection:
                    //      ie: SELECT DateTime.Parse(datetime) AS datetime, ...
                    //  Note: Json.Net incorrectly returns null even for some non-nullable types (sbyte)
                    //      We have to correct this by using the default(T) so it can fit into a row value
                    value = JsonFunctions.ConvertToken(token, c.Type) ?? c.DefaultValue;
                }

                // Update
                row.Set<object>(c.Name, value);
            }
        }
Beispiel #42
0
 public override IEnumerable<IRow> Extract(IUnstructuredReader input, IUpdatableRow output)
 {
     byte[] imageArray = ImageOps.GetByteArrayforImage(input.BaseStream);
     output.Set<byte[]>(0, imageArray);
     yield return output.AsReadOnly();
 }
Beispiel #43
0
 // IRow Process(IRow input, IUpdatableRow output)
 //
 // Actual implementatoin of the user-defined processor. Overwrites the Process method of IProcessor.
 public override IRow Process(IRow input, IUpdatableRow output)
 {
     string text = input.Get<string>("country");
     if (EnglishCountryNames.CountryTranslation.Keys.Contains(text))
     {
         text = EnglishCountryNames.CountryTranslation[text];
     }
     output.Set<string>("country", text);
     return output.AsReadOnly();
 }
Beispiel #44
0
        /// <summary>Apply is called at least once per instance</summary>
        /// <param name="input">A SQLIP row</param>
        /// <param name="output">A SQLIP updatable row.</param>
        /// <returns>IEnumerable of IRow, one IRow per SQLIP row.</returns>
        /// <remarks>Because applier constructor arguments cannot depend on
        /// column references, the name of the column to parse is given as a string. Then
        /// the actual column value is obtained by calling IRow.Get. The rest of the code
        /// is the same as XmlDomExtractor.</remarks>
        public override IEnumerable<IRow> Apply(IRow input, IUpdatableRow output)
        {
            // Make sure that all requested columns are of type string
            IColumn column = output.Schema.FirstOrDefault(col => col.Type != typeof(string));
            if (column != null)
            {
                throw new ArgumentException(string.Format("Column '{0}' must be of type 'string', not '{1}'", column.Name, column.Type.Name));
            }
            
            XmlDocument xmlDocument = new XmlDocument();
            xmlDocument.LoadXml(input.Get<string>(this.xmlColumnName));
            foreach (XmlNode xmlNode in xmlDocument.DocumentElement.SelectNodes(this.rowPath))
            {
                // IUpdatableRow implements a builder pattern to save memory allocations, 
                // so call output.Set in a loop
                foreach(IColumn col in output.Schema)
                {
                    var explicitColumnMapping = this.columnPaths.FirstOrDefault(columnPath => columnPath.Value == col.Name);
                    XmlNode xml = xmlNode.SelectSingleNode(explicitColumnMapping.Key ?? col.Name);
                    output.Set(explicitColumnMapping.Value ?? col.Name, xml == null ? null : xml.InnerXml);
                }

                // then call output.AsReadOnly to build an immutable IRow.
                yield return output.AsReadOnly();
            }
        }
        /// <summary/>
        public override IEnumerable<IRow> Extract(IUnstructuredReader input, IUpdatableRow output)
        {
            if (input.Length == 0)
                yield break;

            using (var reader = new JsonTextReader(new StreamReader(input.BaseStream)))
            {
                IColumn currentColumn = null;
                StringBuilder valueBuilder = null;
                JsonTextWriter writer = null;
                var startedGlobalObjects = 0;
                var startedLocalObjects = 0;
                var startedGlobalArrays = 0;
                var startedLocalArrays = 0;

                while (reader.Read())
                {
                    switch (reader.TokenType)
                    {
                        case JsonToken.StartArray:
                            startedGlobalArrays++;
                            if (currentColumn != null && currentColumn.Type == typeof(string))
                            {
                                if (writer == null)
                                {
                                    valueBuilder = new StringBuilder();
                                    writer = new JsonTextWriter(new StringWriter(valueBuilder));
                                }
                                startedLocalArrays++;
                                writer.WriteStartArray();
                            }
                            break;
                        case JsonToken.EndArray:
                            startedGlobalArrays--;
                            if (writer != null)
                            {
                                startedLocalArrays--;
                                writer.WriteEndArray();
                            }
                            if (currentColumn != null && valueBuilder != null
                                && startedLocalArrays == 0 && startedLocalObjects == 0)
                            {
                                output.Set(currentColumn.Name, valueBuilder.ToString());
                                writer = null;
                                valueBuilder = null;
                                currentColumn = null;
                            }
                            if (startedGlobalArrays == 0)
                            {
                                yield break;
                            }
                            break;

                        case JsonToken.StartObject:
                            startedGlobalObjects++;
                            if (currentColumn != null && currentColumn.Type == typeof(string))
                            {
                                if (writer == null)
                                {
                                    valueBuilder = new StringBuilder();
                                    writer = new JsonTextWriter(new StringWriter(valueBuilder));
                                }
                                startedLocalObjects++;
                                writer.WriteStartObject();
                            }
                            break;
                        case JsonToken.EndObject:
                            startedGlobalObjects--;
                            if (writer != null)
                            {
                                startedLocalObjects--;
                                writer.WriteEndObject();
                            }
                            if (currentColumn != null && valueBuilder != null
                                && startedLocalArrays == 0 && startedLocalObjects == 0)
                            {
                                output.Set(currentColumn.Name, valueBuilder.ToString());
                                writer = null;
                                valueBuilder = null;
                                currentColumn = null;
                            }
                            if (startedGlobalObjects == 0)
                                yield return output.AsReadOnly();
                            break;

                        case JsonToken.PropertyName:
                            if (writer != null)
                            {
                                writer.WritePropertyName(reader.Value.ToString());
                            }
                            else
                            {
                                var currentPropertyName = reader.Value.ToString();
                                currentColumn = output.Schema
                                    .FirstOrDefault(s => s.Name == currentPropertyName);
                                if (currentColumn == null)
                                    reader.Skip();
                            }
                            break;

                        case JsonToken.String:
                        case JsonToken.Boolean:
                        case JsonToken.Bytes:
                        case JsonToken.Date:
                        case JsonToken.Integer:
                        case JsonToken.Float:
                            if (writer != null)
                            {
                                writer.WriteValue(reader.Value);
                            }
                            else if (currentColumn != null)
                            {
                                var typeConverter = TypeDescriptor.GetConverter(currentColumn.Type);
                                if (typeConverter != null && typeConverter.CanConvertFrom(reader.ValueType))
                                {
                                    output.Set(currentColumn.Name, typeConverter.ConvertFrom(reader.Value));
                                }
                                else
                                    output.Set(currentColumn.Name, reader.Value);
                                currentColumn = null;
                            }
                            break;
                        case JsonToken.Null:
                            if (writer != null)
                            {
                                writer.WriteNull();
                            }
                            else if (currentColumn != null)
                            {
                                output.Set(currentColumn.Name, currentColumn.DefaultValue);
                                currentColumn = null;
                            }
                            break;

                        case JsonToken.StartConstructor:
                            writer?.WriteStartConstructor(reader.Value.ToString());
                            break;
                        case JsonToken.EndConstructor:
                            writer?.WriteEndConstructor();
                            break;
                        case JsonToken.Comment:
                            writer?.WriteComment(reader.Value.ToString());
                            break;
                        case JsonToken.Raw:
                            writer?.WriteRaw(reader.Value.ToString());
                            break;
                        case JsonToken.None:
                        case JsonToken.Undefined:
                            // ignore
                            break;
                        default:
                            throw new NotImplementedException();
                    }
                } while (reader.TokenType != JsonToken.None);
            }
        }