Exemple #1
0
        public override IEnumerable <IRow> Reduce(IRowset input, IUpdatableRow output)
        {
            // Init aggregation values
            int i     = 0;
            var begin = DateTime.MaxValue; // Dummy value to make compiler happy
            var end   = DateTime.MinValue; // Dummy value to make compiler happy

            // requires that the reducer is PRESORTED on begin and READONLY on the reduce key.
            foreach (var row in input.Rows)
            {
                // Initialize the first interval with the first row if i is 0
                if (i == 0)
                {
                    i++; // mark that we handled the first row
                    begin = row.Get <DateTime>("begin");
                    end   = row.Get <DateTime>("end");
                    // If the end is just a time and not a date, it can be earlier than the begin, indicating it is on the next day.
                    // This let's fix up the end to the next day in that case
                    if (end < begin)
                    {
                        end = end.AddDays(1);
                    }
                }
                else
                {
                    var b = row.Get <DateTime>("begin");
                    var e = row.Get <DateTime>("end");
                    // fix up the date if end is earlier than begin
                    if (e < b)
                    {
                        e = e.AddDays(1);
                    }

                    // if the begin is still inside the interval, increase the interval if it is longer
                    if (b <= end)
                    {
                        // if the new end time is later than the current, extend the interval
                        if (e > end)
                        {
                            end = e;
                        }
                    }
                    else // output the previous interval and start a new one
                    {
                        output.Set <DateTime>("begin", begin);
                        output.Set <DateTime>("end", end);
                        yield return(output.AsReadOnly());

                        begin = b; end = e;
                    } // if
                }     // if
            }         // foreach

            // now output the last interval
            output.Set <DateTime>("begin", begin);
            output.Set <DateTime>("end", end);
            yield return(output.AsReadOnly());
        } // Reduce
Exemple #2
0
        public override IEnumerable<IRow> Reduce(IRowset input, IUpdatableRow output)
        {
            // Init aggregation values
            bool first_row_processed = false;
            var begin = DateTime.MaxValue; // Dummy value to make compiler happy
            var end = DateTime.MinValue; // Dummy value to make compiler happy

            // requires that the reducer is PRESORTED on begin and READONLY on the reduce key.
            foreach (var row in input.Rows)
            {
                // Initialize the first interval with the first row if i is 0
                if (!first_row_processed)
                {
                    first_row_processed = true; // mark that we handled the first row
                    begin = row.Get<DateTime>("begin");
                    end = row.Get<DateTime>("end");
                    // If the end is just a time and not a date, it can be earlier than the begin, indicating it is on the next day.
                    // This let's fix up the end to the next day in that case
                    if (end < begin) { end = end.AddDays(1); }
                }
                else
                {
                    var b = row.Get<DateTime>("begin");
                    var e = row.Get<DateTime>("end");
                    // fix up the date if end is earlier than begin
                    if (e < b) { e = e.AddDays(1); }

                    // if the begin is still inside the interval, increase the interval if it is longer
                    if (b <= end)
                    {
                        // if the new end time is later than the current, extend the interval
                        if (e > end) { end = e; }
                    }
                    else // output the previous interval and start a new one
                    {
                        output.Set<DateTime>("begin", begin);
                        output.Set<DateTime>("end", end);
                        yield return output.AsReadOnly();
                        begin = b; end = e;
                    } // if
                } // if
            } // foreach

            // now output the last interval
            output.Set<DateTime>("begin", begin);
            output.Set<DateTime>("end", end);
            yield return output.AsReadOnly();
        }
        public override IEnumerable <IRow> Reduce(IRowset input, IUpdatableRow output)
        {
            int acc = 0;
            int max = 0;

            foreach (var row in input.Rows)
            {
                var timestamp = row.Get <DateTime>("timestamp");
                var op        = row.Get <string>("op");
                if (op == "start")
                {
                    acc++;
                }
                else
                {
                    acc--;
                    if (acc < 0)
                    {
                        acc = 0;
                    }
                }

                max = System.Math.Max(max, acc);
            }

            output.Set <string>("cohort", "FOO");
            output.Set <int>("max", max);

            yield return(output.AsReadOnly());
        }
        public override IEnumerable <IRow> Reduce(IRowset input, IUpdatableRow output)
        {
            // Cache the rows in the input rowset (should be records for a single vehicle registration)
            // Only save rows where the vehicle is not marked as having been recovered
            var stolenVehicleRecords = (from row in input.Rows
                                        select new StolenVehicleRecord(
                                            row.Get <string>("VehicleRegistration"),
                                            row.Get <string>("DateStolen"),
                                            row.Get <string>("DateRecovered")
                                            )).ToList();

            // If there aren't any items in the stolenVehicleRecords list, then this vehicle is not stolen so skip over it
            if (stolenVehicleRecords.Count > 0)
            {
                // Sort the data in the stolenVehicleRecords list by DateStolen in descending order, so that the most recent record occurs first
                stolenVehicleRecords.Sort();

                // Retrieve the first record in the stolenVehicleRecords list - this is the most recent record of the vehicle having been stolen
                var stolenVehicleRecord = stolenVehicleRecords.First();

                // If the record does not have a recovery date, then output it, otherwise the vehicle is considered to have been recovered and is no longer stolen
                if (stolenVehicleRecord.DateRecovered == null)
                {
                    output.Set <string>("VehicleRegistration", stolenVehicleRecord.VehicleRegistration);
                    output.Set <DateTime>("DateStolen", stolenVehicleRecord.DateStolen);
                    yield return(output.AsReadOnly());
                }
            }
        }
        public override IEnumerable <IRow> Extract(IUnstructuredReader input, IUpdatableRow output)
        {
            string line;

            //Read the input line by line
            foreach (Stream current in input.Split(_encoding.GetBytes("\r\n")))
            {
                using (StreamReader streamReader = new StreamReader(current, this._encoding))
                {
                    line = streamReader.ReadToEnd().Trim();
                    //Split the input by the column delimiter
                    string[] parts = line.Split(this._col_delim);
                    int      count = 0;
                    foreach (string part in parts)
                    {
                        //If its the second column, treat it in a special way, split the column into first name and last name columns
                        if (count == 1)
                        {
                            string[] name = part.Trim().Split(' ');
                            output.Set <string>(count, name[0]);
                            count += 1;
                            output.Set <string>(count, name[1]);
                        }
                        else
                        {
                            output.Set <string>(count, part);
                        }
                        count += 1;
                    }
                }
                yield return(output.AsReadOnly());
            }
            yield break;
        }
Exemple #6
0
        public override IEnumerable <IRow> Extract(IUnstructuredReader input, IUpdatableRow outputrow)
        {
            outputrow.Set <long>("GC_TotalMem_Start", GC.GetTotalMemory(true));
            outputrow.Set <long>("MaxUDOMemory", MyLimits.MaxUdoMemory);

            var buff_idx = 0;
            var failed   = false;
            var gc_mem   = GC.GetTotalMemory(true);

            try
            {
                while (buff_idx < no_buff)
                {
                    alloc_mem[buff_idx]    = new byte[increment];
                    alloc_mem[buff_idx][0] = 1; // to avoid it being optimized away
                    buff_idx++;
                    gc_mem = GC.GetTotalMemory(true);
                }
            }
            catch (Exception e)
            {
                failed = true;
                outputrow.Set <string>("error", e.Message);
            }
            outputrow.Set <long>("GC_TotalMem_End", gc_mem);
            outputrow.Set <bool>("failed", failed);
            outputrow.Set <long>("alloc_sz", buff_idx * increment);

            yield return(outputrow.AsReadOnly());
        }
Exemple #7
0
        protected virtual IEnumerable <IRow> Extract(Stream inputStream, IUpdatableRow output)
        {
            // Json.Net
            using (var reader = new JsonTextReader(new StreamReader(inputStream)))
            {
                // Parse Json one token at a time
                if (!reader.Read())
                {
                    yield break;
                }
                if (reader.TokenType != JsonToken.StartObject)
                {
                    yield break;
                }
                var token = JToken.Load(reader);

                // Rows
                //  All objects are represented as rows
                foreach (JObject o in SelectChildren(token, this.rowpath))
                {
                    // All fields are represented as columns
                    this.JObjectToRow(o, output);

                    yield return(output.AsReadOnly());
                }
            }
        }
Exemple #8
0
        public override IEnumerable <IRow> Extract(IUnstructuredReader input, IUpdatableRow output)
        {
            var avschema = Avro.Schema.Parse(avroSchema);
            var reader   = new GenericDatumReader <GenericRecord>(avschema, avschema);

            using (var ms = new MemoryStream())
            {
                CreateSeekableStream(input, ms);
                ms.Position = 0;

                var fileReader = DataFileReader <GenericRecord> .OpenReader(ms, avschema);

                while (fileReader.HasNext())
                {
                    var avroRecord = fileReader.Next();

                    foreach (var column in output.Schema)
                    {
                        if (avroRecord[column.Name] != null)
                        {
                            output.Set(column.Name, avroRecord[column.Name]);
                        }
                        else
                        {
                            output.Set <object>(column.Name, null);
                        }

                        yield return(output.AsReadOnly());
                    }
                }
            }
        }
        /// <summary/>
        public override IEnumerable <IRow> Extract(IUnstructuredReader input, IUpdatableRow output)
        {
            // Json.Net
            using (var reader = new JsonTextReader(new StreamReader(input.BaseStream)))
            {
                // Parse Json one token at a time
                while (reader.Read())
                {
                    if (reader.TokenType == JsonToken.StartObject)
                    {
                        var token = JToken.Load(reader);

                        // Rows
                        //  All objects are represented as rows
                        foreach (JObject o in SelectChildren(token, this.rowpath))
                        {
                            // All fields are represented as columns
                            this.JObjectToRow(o, output);

                            yield return(output.AsReadOnly());
                        }
                    }
                }
            }
        }
 public override IRow Process(IRow input, IUpdatableRow output)
 {
     output.Set <int>("DepID", input.Get <int>("DepID"));
     output.Set <string>("DepName", input.Get <string>("DepName"));
     output.Set <string>("HelloWorld", hw);
     return(output.AsReadOnly());
 }
Exemple #11
0
        public override IEnumerable <IRow> Extract(IUnstructuredReader input, IUpdatableRow outputrow)
        {
            long   length     = input.Length;
            long   start      = input.Start;
            Stream baseStream = input.BaseStream;

            byte[] readBuffer = new byte[this._blocksz];

            while (length > 0)
            {
                var globalPosition = start + baseStream.Position;

                // We need to make sure that we read block size or only the last remainder and not into the 4MB overscan area in the next extent block that is provided to handle row-oriented processing
                var readsize = (int)Math.Min(this._blocksz, length); // Cast from (long) to (int) is safe since Min will never give a value larger than (int) _blocksz.

                Array.Resize <byte>(ref readBuffer, readsize);       // Make sure buffer is large enough. Assumes that Resize only resizes if needed.

                var bytesRead = baseStream.Read(readBuffer, 0, readsize);
                if (bytesRead <= 0 || bytesRead > readsize)
                {
                    throw new Exception(string.Format("Unexpected amount of {2} bytes was read starting at global stream position {1}. Expected to read {0} bytes.",
                                                      readsize, globalPosition, bytesRead));
                }

                Array.Resize <byte>(ref readBuffer, bytesRead);
                length -= bytesRead;

                outputrow.Set <long>(0, globalPosition); // global position of the block
                outputrow.Set <long>(1, bytesRead);      // block size
                outputrow.Set <byte[]>(2, readBuffer);   // block data
                yield return(outputrow.AsReadOnly());
            }
        }
Exemple #12
0
        public override IEnumerable <IRow> Extract(IUnstructuredReader input, IUpdatableRow output_row)
        {
            var s = new System.IO.StreamReader(input.BaseStream);
            {
                var rows = AzureDiagnostics.AzureDiagnosticsUtil.GetLogADLARecords(s);

                foreach (var row in rows)
                {
                    output_row.Set <System.DateTime>("Time", row.Time.DateTime);
                    output_row.Set <string>("ResourceId", row.ResourceId);
                    output_row.Set <string>("Category", row.Category);
                    output_row.Set <string>("OperationName", row.OperationName);
                    output_row.Set <string>("ResultType", row.ResultType);
                    output_row.Set <string>("ResultSignature", row.ResultType);
                    output_row.Set <string>("CorrelationId", row.CorrelationId);
                    output_row.Set <string>("Identity", row.Identity);

                    var props = row.Properties;
                    output_row.Set <string>("ADLA_JobId", props.JobId);
                    output_row.Set <string>("ADLA_JobName", props.JobName);
                    output_row.Set <string>("ADLA_JobRuntimeName", props.JobRuntimeName);


                    output_row.Set <System.DateTime?>("ADLA_StartTime", props.StartTime.ToDateTimeNullable());
                    output_row.Set <System.DateTime?>("ADLA_SubmitTime", props.SubmitTime.ToDateTimeNullable());
                    output_row.Set <System.DateTime?>("ADLA_EndTime", props.EndTime.ToDateTimeNullable());

                    yield return(output_row.AsReadOnly());
                }
            }
        }
        public override IEnumerable <IRow> Extract(IUnstructuredReader input, IUpdatableRow output)
        {
            using (var streamReader = new StreamReader(input.BaseStream)) {
                // assumes each line is an independent json object.
                var recordLine = streamReader.ReadLine();
                while (!string.IsNullOrEmpty(recordLine))
                {
                    Tweet tweet = Newtonsoft.Json.JsonConvert.DeserializeObject <Tweet> (recordLine);
                    output.Set <string> ("tweetText", tweet.text);
                    output.Set <string> ("tweetId", tweet.id_str);
                    output.Set <string> ("timestampMs", tweet.timestamp_ms);
                    output.Set <string> ("language", tweet.id_str);

                    SqlArray <string> hashtags     = new SqlArray <string> (tweet.entities.hashtags.Select(t => t.text));
                    SqlArray <string> usermentions = new SqlArray <string> (tweet.entities.user_mentions.Select(t => t.screen_name));

                    output.Set <SqlArray <string> > ("hashTags", hashtags);
                    output.Set <SqlArray <string> > ("userMentions", usermentions);

                    yield return(output.AsReadOnly());

                    recordLine = streamReader.ReadLine();
                }
            }

            yield break;
        }
        public override IEnumerable <IRow> Extract(IUnstructuredReader input, IUpdatableRow output)
        {
            string id;
            string from;
            string to;

            // 1. Collect partition informations.
            using (var reader = new StreamReader(input.BaseStream))
            {
                string line  = reader.ReadLine();
                var    parts = line.Split('\t');
                id   = parts[0];
                from = parts[1];
                to   = parts[2];
            }

            // 2. Read data source using partition information.
            using (var reader = ProviderFactory.CreateInstance(_cnxString, from, to))
            {
                foreach (var row in reader.Rows)
                {
                    output.Set("extractor_id", _id);
                    output.Set("partition_id", id);
                    output.Set("partition", row[0]);
                    output.Set("value1", row[1]);
                    output.Set("value2", row[2]);

                    yield return(output.AsReadOnly());
                }
            }

            // Add some latency to data read.
            Thread.Sleep(10000);
        }
        public override IEnumerable <IRow> Extract(IUnstructuredReader input, IUpdatableRow output)
        {
            if (input.Length == 0)
            {
                yield break;
            }

            var serializer = AvroSerializer.CreateGeneric(avroSchema);

            using (var genericReader = AvroContainer.CreateGenericReader(input.BaseStream))
            {
                using (var reader = new SequentialReader <dynamic>(genericReader))
                {
                    foreach (var obj in reader.Objects)
                    {
                        foreach (var column in output.Schema)
                        {
                            output.Set(column.Name, obj[column.Name]);
                        }

                        yield return(output.AsReadOnly());
                    }
                }
            }
        }
        /// <summary/>
        public override IEnumerable <IRow> Combine(IRowset left, IRowset right, IUpdatableRow output)
        {
            var buffer = new List <Tuple <int, string> >();

            foreach (var row2 in right.Rows)
            {
                buffer.Add(Tuple.Create <int, string>(
                               row2.Get <int>("employee_id"),
                               row2.Get <string>("employee_name")
                               ));
            }

            foreach (var row in left.Rows)
            {
                foreach (var tuple in buffer)
                {
                    if (row.Get <int>("employee_id") == tuple.Item1)
                    {
                        output.Set("employee_id", tuple.Item1);
                        output.Set("employee_name", tuple.Item2);
                        output.Set("department_name", row.Get <string>("department_name"));
                        yield return(output.AsReadOnly());
                    }
                }
            }
        }
        /// <summary> 
        ///  
        /// </summary> 
        /// <param name="input"></param> 
        /// <param name="output"></param> 
        /// <returns></returns> 
        public override IEnumerable<IRow> Reduce(IRowset input, IUpdatableRow output)
        {
            int count = 0;
            int[] colValues = new int[colNames.Length];

            foreach (IRow row in input.Rows)
            {
                if (count == 0)
                {
                    colValues[(int)ColNames.id] = int.Parse(row.Get<string>("id").ToString());
                    colValues[(int)ColNames.loc] = location.GetValue(row.Get<string>("loc").ToString());
                    colValues[(int)ColNames.fs] = int.Parse(row.Get<string>("fs").ToString());
                    colValues[(int)ColNames.tr] = int.Parse(row.Get<string>("tr").ToString());
                    colValues[(int)ColNames.st] = sevType.GetValue(row.Get<string>("st").ToString());
                }

                colValues[eventType.GetValue(row.Get<string>("et").ToString())] = 1;
                int vol = int.Parse(row.Get<string>("vol").ToString());
                colValues[logFeature.GetValue(row.Get<string>("lf").ToString())] = vol;
                colValues[resType.GetValue(row.Get<string>("rt").ToString())] = 1;

                count++;
            }

            // Write output
            for (int n = (int)ColNames.lf_1; n < colValues.Length; n++)
            {
                string colName = colNames[n];
                output.Set(colName, colValues[n].ToString());
            }
            yield return output.AsReadOnly();
        }
Exemple #18
0
        private static IEnumerable <IRow> ExtractInternal(IUpdatableRow output, Stream input)
        {
            if (!input.CanSeek)
            {
                throw new ArgumentOutOfRangeException(nameof(input), "Input stream must be seekable for ORC reader. Enable the hack to copy to a Memory Stream or to a non-Persisted Memory Mapped file. The hack is the default setting.");
            }

            using (var fileTail = new FileTail(input))
            {
                var stripes = fileTail.GetStripeCollection();

                var columnsToRead = GetIntersectedColumnMetadata(output.Schema, fileTail).ToArray();

                foreach (var stripe in stripes)
                {
                    var extractedColumns = ReadStripe(stripe, columnsToRead).ToArray();

                    for (int i = 0; i < (int)stripe.NumRows; i++)
                    {
                        foreach (var col in extractedColumns)
                        {
                            var outputColumn = col.Item1.USqlProjectionColumnIndex;
                            var value        = col.Item2?.GetValue(i) ?? col.Item1.USqlProjectionColumn.DefaultValue;
                            output.Set(outputColumn, value);
                        }
                        yield return(output.AsReadOnly());
                    }
                }
            }
        }
        /// <summary>Extract is called at least once per vertex</summary>
        /// <param name="input">Wrapper for a Stream</param>
        /// <param name="output">IUpdatableRow uses a mutable builder pattern --
        /// set individual fields with IUpdatableRow.Set, then build an immutable IRow by
        /// calling IUpdatableRow.AsReadOnly.</param>
        /// <returns>A sequence of IRows.</returns>
        public override IEnumerable <IRow> Extract(IUnstructuredReader input, IUpdatableRow output)
        {
            // Make sure that all requested columns are of type string
            IColumn column = output.Schema.FirstOrDefault(col => col.Type != typeof(string));

            if (column != null)
            {
                throw new ArgumentException(string.Format("Column '{0}' must be of type 'string', not '{1}'", column.Name, column.Type.Name));
            }

            XmlDocument xmlDocument = new XmlDocument();

            xmlDocument.Load(input.BaseStream);
            foreach (XmlNode xmlNode in xmlDocument.DocumentElement.SelectNodes(this.rowPath))
            {
                // IUpdatableRow implements a builder pattern to save memory allocations,
                // so call output.Set in a loop
                foreach (IColumn col in output.Schema)
                {
                    var     explicitColumnMapping = this.columnPaths.FirstOrDefault(columnPath => columnPath.Value == col.Name);
                    XmlNode xml = xmlNode.SelectSingleNode(explicitColumnMapping.Key ?? col.Name);
                    output.Set(explicitColumnMapping.Value ?? col.Name, xml == null ? null : xml.InnerXml);
                }

                // then call output.AsReadOnly to build an immutable IRow.
                yield return(output.AsReadOnly());
            }
        }
        public override IRow Process(IRow input, IUpdatableRow output)
        {
            string UserID     = input.Get <string>("UserID");
            string Name       = input.Get <string>("Name");
            string Address    = input.Get <string>("Address");
            string City       = input.Get <string>("City");
            string State      = input.Get <string>("State");
            string PostalCode = input.Get <string>("PostalCode");
            string Country    = input.Get <string>("Country");
            string Phone      = input.Get <string>("Phone");

            if (CountryTranslation.Keys.Contains(Country))
            {
                Country = CountryTranslation[Country];
            }
            output.Set <string>(0, UserID);
            output.Set <string>(1, Name);
            output.Set <string>(2, Address);
            output.Set <string>(3, City);
            output.Set <string>(4, State);
            output.Set <string>(5, PostalCode);
            output.Set <string>(6, Country);
            output.Set <string>(7, Phone);
            return(output.AsReadOnly());
        }
Exemple #21
0
            public override IEnumerable <IRow> Apply(IRow input, IUpdatableRow output)
            {
                DateTime startTime = input.Get <DateTime>(startColumn);
                DateTime endTime   = input.Get <DateTime>(endColumn);
                var      startDate = startTime.Date;
                var      endDate   = endTime.Date;
                DateTime startTimeOfDay;
                DateTime endTimeOfDay;

                var lastSecond = new TimeSpan(23, 59, 59);

                for (var dt = startDate; dt <= endDate; dt = dt.AddDays(1))
                {
                    if (dt == startDate)
                    {
                        startTimeOfDay = startTime;
                    }
                    else
                    {
                        startTimeOfDay = dt;
                    }
                    if (dt == endDate)
                    {
                        endTimeOfDay = endTime;
                    }
                    else
                    {
                        endTimeOfDay = dt + lastSecond;
                    }
                    output.Set <DateTime>("startTimeOfDay", startTimeOfDay);
                    output.Set <DateTime>("endTimeOfDay", endTimeOfDay);
                    yield return(output.AsReadOnly());
                }
            }
Exemple #22
0
        public override IRow Process(IRow input, IUpdatableRow output)
        {
            var tag = input.Get<string>("Tag");
            var category = input.Get<string>("Category");
            category = "other";
            foreach (var cat in categoryMapper)
            {
                var categoryName = cat.Key;
                var listOfPrefixes = cat.Value;
                var found = false;
                foreach (var pref in listOfPrefixes)
                {
                    if (tag.StartsWith(pref))
                    {
                        category = categoryName;
                        found = true;
                        break;
                    }
                }

                if (found)
                {
                    break;
                }
            }
            output.Set("Category", category);
            return output.AsReadOnly();
        }
Exemple #23
0
        public override IEnumerable <IRow> Extract(IUnstructuredReader input, IUpdatableRow output)
        {
            string line;

            using (StreamReader streamReader = new StreamReader(input.BaseStream, Encoding.UTF8))
            {
                while ((line = streamReader.ReadLine()) != null)
                {
                    var jObject = JsonConvert.DeserializeObject <JObject>(line);
                    foreach (var column in output.Schema)
                    {
                        if (column.Type == typeof(string))
                        {
                            output.Set(column.Name, jObject[column.Name].ToString());
                        }
                        if (column.Type == typeof(DateTime))
                        {
                            output.Set(column.Name, (DateTime.Parse(jObject[column.Name].ToString())));
                        }
                    }

                    yield return(output.AsReadOnly());
                }
            }

            yield break;
        }
        public override IRow Process(IRow input, IUpdatableRow output)
        {
            var s = input.Get <string>("name");

            output.Set <string>("reversed", Reverse(s));
            return(output.AsReadOnly());
        }
        public override IEnumerable <IRow> Extract(IUnstructuredReader input, IUpdatableRow output)
        {
            string line;

            //Read the input line by line
            foreach (Stream current in input.Split(_row_delim))
            {
                using (StreamReader streamReader = new StreamReader(current, this._encoding))
                {
                    line = streamReader.ReadToEnd().Trim();

                    LogRowParser   splitter = new LogRowParser();
                    LogRowElements parts    = new LogRowElements();

                    parts = splitter.ParseElements(line);

                    output.Set <string>(0, parts.IP);
                    output.Set <string>(1, parts.Identity);
                    output.Set <string>(2, parts.UserId);
                    output.Set <string>(3, parts.Timestamp);
                    output.Set <string>(4, parts.Offset);
                    output.Set <string>(5, parts.RequestMessage);
                    output.Set <string>(6, parts.StatusCode);
                    output.Set <string>(7, parts.Size);
                    output.Set <string>(8, parts.Referer);
                    output.Set <string>(9, parts.URL);
                    output.Set <string>(10, parts.UserAgent);
                    output.Set <string>(11, parts.Forwarded);

                    yield return(output.AsReadOnly());
                }
            }
        }
Exemple #26
0
        public override IRow Process(IRow input, IUpdatableRow output)
        {
            double      lat = input.Get <double>(latColumn);
            double      lon = input.Get <double>(lonColumn);
            GeoLocation loc = new GeoLocation {
                Longitude = lon, Latitude = lat
            };
            var country  = _service.FindCountry(loc);
            var USstates = _service.FindUsaState(loc);

            if (country != null && country.Name != null)
            {
                output.Set <string>("country", country.Name);
            }
            else
            {
                output.Set <string>("country", "");
            }
            if (USstates != null && USstates.Name != null)
            {
                output.Set <string>("USstates", USstates.Name);
            }
            else
            {
                output.Set <string>("USstates", "");
            }
            return(output.AsReadOnly());
        }
Exemple #27
0
        /// https://docs.microsoft.com/en-us/azure/data-lake-analytics/data-lake-analytics-u-sql-programmability-guide#use-user-defined-extractors
        /// <summary>Extract is called at least once per vertex</summary>
        /// <param name="input">Wrapper for a Stream</param>
        /// <param name="output">IUpdatableRow uses a mutable builder pattern --
        /// set individual fields with IUpdatableRow.Set,
        /// then build an immutable IRow by calling IUpdatableRow.AsReadOnly.</param>
        /// <returns>A sequence of IRows.</returns>
        public override IEnumerable <IRow> Extract(IUnstructuredReader input, IUpdatableRow output)
        {
            // use XML Reader for streaming the XML to keep memory usage to a minimum
            using (XmlReader reader = XmlReader.Create(input.BaseStream))
            {
                reader.MoveToContent();

                // forward reader to next available Element
                while (reader.ReadToFollowing(this.elementName))
                {
                    // decouple from reader position with new subtreeReader
                    // this prevents reader.ReadToFollowing() from skipping rows as its not forwarded now by ReadOuterXml()
                    using (XmlReader subtreeReader = reader.ReadSubtree())
                    {
                        subtreeReader.MoveToContent();

                        // Replace CRLF & CR & LF character (\r\n) by space ( ) within the XML to ensure the string fits in 1 row
                        output.Set <string>(0,
                                            XElement.Parse(subtreeReader.ReadOuterXml()).
                                            ToString(SaveOptions.DisableFormatting).
                                            Replace("\r\n", " ").Replace('\n', ' ').Replace('\r', ' '));

                        // then call output.AsReadOnly to build an immutable IRow.
                        yield return(output.AsReadOnly());
                    }
                }
            }
        }
Exemple #28
0
        public override IEnumerable <IRow> Extract(IUnstructuredReader input, IUpdatableRow output)
        {
            string line = string.Empty;

            foreach (Stream current in input.Split(_rowDelim))
            {
                using (StreamReader streamReader = new StreamReader(current, _encoding))
                {
                    line = streamReader.ReadToEnd().Trim();
                    if (!string.IsNullOrEmpty(line))
                    {
                        GitRepository repo = JsonConvert.DeserializeObject <GitRepository>(line);
                        output.Set("openIssuesCount", repo.OpenIssuesCount);
                        output.Set("pushedAt", repo.PushedAt == null ? (DateTime?)null : repo.PushedAt.Value.UtcDateTime);
                        output.Set("createdAt", repo.CreatedAt.UtcDateTime);
                        output.Set("updatedAt", repo.UpdatedAt.UtcDateTime);
                        output.Set("permissions_admin", repo.Permissions == null ? (bool?)null : repo.Permissions.Admin);
                        output.Set("permissions_push", repo.Permissions == null ? (bool?)null : repo.Permissions.Push);
                        output.Set("permissions_pull", repo.Permissions == null ? (bool?)null : repo.Permissions.Pull);
                        output.Set("parentRepoId", repo.Parent == null ? (long?)null : repo.Parent.Id);
                        output.Set("sourceRepoId", repo.Source == null ? (long?)null : repo.Source.Id);
                        output.Set("hasIssues", repo.HasIssues);
                        output.Set("defaultBranch", repo.DefaultBranch);
                        output.Set("hasWiki", repo.HasWiki);
                        output.Set("hasDownload", repo.HasDownloads);
                        output.Set("allowRebaseMerge", repo.AllowRebaseMerge);
                        output.Set("allowSquashMerge", repo.AllowSquashMerge);
                        output.Set("allowMergeCommit", repo.AllowMergeCommit);
                        output.Set("hasPages", repo.HasPages);
                        output.Set("licenseMetadata_Key", repo.License == null ? null : repo.License.Key);
                        output.Set("licenseMetadata_Name", repo.License == null ? null : repo.License.Name);
                        output.Set("licenseMetadata_SpdxId", repo.License == null ? null : repo.License.SpdxId);
                        output.Set("licenseMetadata_Url", repo.License == null ? null : repo.License.Url);
                        output.Set("licenseMetadata_Featured", repo.License == null ? (bool?)null : repo.License.Featured);
                        output.Set("stargazersCount", repo.StargazersCount);
                        output.Set("forksCount", repo.ForksCount);
                        output.Set("fork", repo.Fork);
                        output.Set("url", repo.Url);
                        output.Set("htmlUrl", repo.HtmlUrl);
                        output.Set("cloneUrl", repo.CloneUrl);
                        output.Set("gitUrl", repo.GitUrl);
                        output.Set("sshUrl", repo.SshUrl);
                        output.Set("svnUrl", repo.SvnUrl);
                        output.Set("mirrorUrl", repo.MirrorUrl);
                        output.Set("id", repo.Id);
                        output.Set("ownerId", repo.Owner == null?(int?)null:repo.Owner.Id);
                        output.Set("name", repo.Name);
                        output.Set("fullName", repo.FullName);
                        output.Set("description", repo.Description);
                        output.Set("homepage", repo.Homepage);
                        output.Set("language", repo.Language);
                        output.Set("private", repo.Private);
                        output.Set("subscribersCount", repo.SubscribersCount);
                        output.Set("size", repo.Size);
                    }
                }
                yield return(output.AsReadOnly());
            }
        }
        public override IEnumerable <IRow> Extract(IUnstructuredReader input, IUpdatableRow output)
        {
            string line = string.Empty;

            foreach (Stream current in input.Split(_rowDelim))
            {
                using (StreamReader streamReader = new StreamReader(current, _encoding))
                {
                    line = streamReader.ReadToEnd().Trim();
                    if (!string.IsNullOrEmpty(line))
                    {
                        GithubCommits commit = JsonConvert.DeserializeObject <GithubCommits>(line);
                        output.Set("authorId", commit.Author == null ? (int?)null : commit.Author.Id);
                        output.Set("commentsUrl", commit.CommentsUrl);
                        output.Set("commit_AuthorMessage", commit.Commit == null ? null : commit.Commit.Message);
                        output.Set("commit_AuthorEmail", commit.Commit == null ? null : commit.Commit.Author == null ? null : commit.Commit.Author.Email);
                        output.Set("commit_AuthorDate", commit.Commit == null ? (DateTime?)null : commit.Commit.Author == null ? (DateTime?)null : commit.Commit.Author.Date.UtcDateTime);
                        output.Set("commit_CommitterEmail", commit.Commit == null ? null : commit.Commit.Committer == null ? null : commit.Commit.Committer.Email);
                        output.Set("commit_CommitterDate", commit.Commit == null ? (DateTime?)null : commit.Commit.Committer == null ? (DateTime?)null : commit.Commit.Committer.Date.UtcDateTime);
                        output.Set("commit_Tree", commit.Commit == null ? null : commit.Commit.Tree == null ? null : JsonConvert.SerializeObject(commit.Commit.Tree));
                        output.Set("commit_Parents", commit.Commit == null ? null : commit.Commit.Parents == null ? null : JsonConvert.SerializeObject(commit.Commit.Parents));
                        output.Set("commit_CommentCount", commit.Commit == null ? (int?)null : commit.Commit.CommentCount);
                        output.Set("commit_Verification", commit.Commit == null ? null : commit.Commit.Verification == null ? null : JsonConvert.SerializeObject(commit.Commit.Verification));
                        output.Set("committerId", commit.Committer == null ? (int?)null : commit.Committer.Id);
                        output.Set("htmlUrl", commit.HtmlUrl);
                        output.Set("stats_Additions", commit.Stats == null ? (int?)null : commit.Stats.Additions);
                        output.Set("stats_Deletions", commit.Stats == null ? (int?)null : commit.Stats.Deletions);
                        output.Set("stats_Total", commit.Stats == null ? (int?)null : commit.Stats.Total);
                        output.Set("parents", commit.Parents == null ? null : JsonConvert.SerializeObject(commit.Parents));
                        //output.Set("files", commit.Files == null ? null : Encoding.UTF8.GetBytes(JsonConvert.SerializeObject(commit.Files)));
                        output.Set("url", commit.Url);
                        output.Set("label", commit.Label);
                        output.Set("ref", commit.Ref);
                        output.Set("sha", commit.Sha);
                        output.Set("userId", commit.User == null?(int?)null:commit.User.Id);
                        output.Set("repositoryId", commit.Repository == null?(long?)null:commit.Repository.Id);
                        List <GitProcessedFiles> processedFiles = new List <GitProcessedFiles>();
                        if (commit.Files != null)
                        {
                            foreach (var f in commit.Files)
                            {
                                processedFiles.Add(new GitProcessedFiles()
                                {
                                    Status           = f.Status,
                                    RawUrl           = f.RawUrl,
                                    PreviousFileName = f.PreviousFileName
                                });
                            }
                            output.Set("files", JsonConvert.SerializeObject(processedFiles));
                        }
                        else
                        {
                            output.Set("files", (string)null);
                        }
                    }
                }
                yield return(output.AsReadOnly());
            }
        }
Exemple #30
0
        public override IEnumerable <IRow> Reduce(IRowset input, IUpdatableRow output)
        {
            // Init aggregation values
            var firstRowProcessed = false;
            var begin             = DateTime.MinValue;
            var end        = DateTime.MinValue;
            var finalvalue = 0.0;

            // requires that the reducer is PRESORTED on begin and READONLY on the reduce key.
            foreach (var row in input.Rows)
            {
                if (!firstRowProcessed)
                {
                    firstRowProcessed = true;
                    begin             = row.Get <DateTime>(BeginColName);
                    end        = row.Get <DateTime>(EndColName);
                    finalvalue = row.Get <double>(ValueColName);
                }
                else
                {
                    var b        = row.Get <DateTime>(BeginColName);
                    var e        = row.Get <DateTime>(EndColName);
                    var tmpvalue = row.Get <double>(ValueColName);
                    if ((b - end).TotalSeconds <= _maxDuration)
                    {
                        finalvalue += tmpvalue;
                    }
                    else
                    {
                        output.Set <double>(ValueColName, finalvalue);
                        output.Set <DateTime>(BeginColName, begin);
                        output.Set <DateTime>(EndColName, end);

                        yield return(output.AsReadOnly());

                        finalvalue = tmpvalue;
                        begin      = b;
                    }
                    end = e;
                }
            }
            output.Set <DateTime>(BeginColName, begin);
            output.Set <DateTime>(EndColName, end);
            output.Set <double>(ValueColName, finalvalue);
            yield return(output.AsReadOnly());
        }
 public override IEnumerable <IRow> Apply(IRow input, IUpdatableRow output)
 {
     for (int i = this.Start; i <= this.End; i++)
     {
         output.Set <int>("Value", i);
         yield return(output.AsReadOnly());
     }
 }
Exemple #32
0
        /// <summary>Apply is called at least once per instance</summary>
        /// <param name="input">A SQLIP row</param>
        /// <param name="output">A SQLIP updatable row.</param>
        /// <returns>IEnumerable of IRow, one IRow per SQLIP row.</returns>
        /// <remarks>Because applier constructor arguments cannot depend on
        /// column references, the name of the column to parse is given as a string. Then
        /// the actual column value is obtained by calling IRow.Get. The rest of the code
        /// is the same as XmlDomExtractor.</remarks>
        public override IEnumerable <IRow> Apply(IRow input, IUpdatableRow output)
        {
            // Make sure that all requested columns are of type string
            IColumn column = output.Schema.FirstOrDefault(col => col.Type != typeof(string));

            if (column != null)
            {
                throw new ArgumentException(string.Format("Column '{0}' must be of type 'string', not '{1}'", column.Name, column.Type.Name));
            }

            XmlDocument xmlDocument = new XmlDocument();

            xmlDocument.LoadXml(input.Get <string>(this.xmlColumnName));
            XmlNamespaceManager nsmanager = new XmlNamespaceManager(xmlDocument.NameTable);

            // If namespace declarations have been provided, add them to the namespace manager
            if (this.namespaceDecls != null)
            {
                foreach (var namespaceDecl in this.namespaceDecls)
                {
                    nsmanager.AddNamespace(namespaceDecl.Key, namespaceDecl.Value);
                }
            }

            foreach (XmlNode xmlNode in xmlDocument.DocumentElement.SelectNodes(this.rowPath, nsmanager))
            {
                // IUpdatableRow implements a builder pattern to save memory allocations,
                // so call output.Set in a loop
                foreach (IColumn col in output.Schema)
                {
                    switch (col.Name)
                    {
                    // populate hiearchy columns with their XmlNode.Name
                    case "ElementName":
                        output.Set <string>("ElementName", xmlNode.Name);
                        break;

                    case "ChildName":
                        output.Set <string>("ChildName", xmlNode.FirstChild?.Name);
                        break;

                    case "GrandChildName":
                        output.Set <string>("GrandChildName", xmlNode.FirstChild?.FirstChild?.Name);
                        break;

                    // populate mapped columns with their XPath result from XmlNode.InnerXml
                    default:
                        var explicitColumnMapping = this.columnPaths.FirstOrDefault(columnPath => columnPath.Value == col.Name);
                        var xml = xmlNode.SelectSingleNode(explicitColumnMapping.Key ?? col.Name, nsmanager);
                        output.Set(explicitColumnMapping.Value ?? col.Name, xml?.InnerXml);
                        break;
                    }
                }

                // then call output.AsReadOnly to build an immutable IRow.
                yield return(output.AsReadOnly());
            }
        }
 public override IEnumerable<IRow> Extract(IUnstructuredReader input, IUpdatableRow output)
 {
     using(var reader = new StreamReader(input.BaseStream))
     {
         string line;
         while ((line = reader.ReadLine()) != null)
         {
             LineToRow(line, output);
             yield return output.AsReadOnly();
         }
     }
 }
        public override IEnumerable<IRow> Extract(IUnstructuredReader input, IUpdatableRow output)
        {
            using (var reader = new JsonTextReader(new StreamReader(input.BaseStream, Encoding.UTF8)))
            {
                reader.SupportMultipleContent = true;
                
                while (reader.Read())
                {
                    var row = JToken.ReadFrom(reader);

                    var size = 0;
                    var flattendData = GHInsights.USql.Utility.FlattenJson(row, ref size);

                    if (size < (_dataLakeMaxRowSize))
                    {
                        output.Set(_outputColumnName, new SqlMap<string, byte[]>(flattendData));
                    }
                    else
                    {
                        var compressedData = GHInsights.USql.Utility.GzipByteArray(Encoding.UTF8.GetBytes(row.ToString(Formatting.None)));

                        if (compressedData.Length < (_dataLakeMaxRowSize))
                        {
                            var compressedRow = new Dictionary<string, byte[]>
                                {
                                    {
                                        "!CompressedRow",
                                        compressedData
                                    }
                                };
                            output.Set(_outputColumnName, new SqlMap<string, byte[]>(compressedRow));
                        }
                        else {
                            //throw new ArgumentOutOfRangeException($"Resulting SqlMap is too large: {size} - {row.ToString(Formatting.None).Substring(0,100)}");
                            var error = new Dictionary<string, byte[]>
                                {
                                    {
                                        "!RowExtractorError",
                                        Encoding.UTF8.GetBytes($"Resulting SqlMap is too large: OriginalSize:{size} CompressedSize: {compressedData.Length} - {row.ToString(Formatting.None).Substring(0, 100)}")
                                    }
                                };
                            output.Set(_outputColumnName, new SqlMap<string, byte[]>(error));
                        }

                    }
                        

                    yield return output.AsReadOnly();

                }
            }
        }
Exemple #35
0
        public override IRow Process(IRow input, IUpdatableRow output)
        {
            var img = input.Get<byte[]>("image_data");

                // load image only once into memory per row
                using (StreamImage inImage = new StreamImage(img))
                {
                    output.SetColumnIfExists("equipment_make", inImage.getStreamImageProperty(ImageProperties.equipment_make));
                    output.SetColumnIfExists("equipment_model", inImage.getStreamImageProperty(ImageProperties.equipment_model));
                    output.SetColumnIfExists("description", inImage.getStreamImageProperty(ImageProperties.description));
                    output.SetColumnIfExists("copyright", inImage.getStreamImageProperty(ImageProperties.copyright));
                    output.SetColumnIfExists("thumbnail", inImage.scaleStreamImageTo(150, 150));
                }
                return output.AsReadOnly();
        }
        public override IEnumerable<IRow> Extract(IUnstructuredReader input, IUpdatableRow output)
        {
            using (XmlReader reader = XmlReader.Create(input.BaseStream))
            {
                while (reader.Read())
                {
                    if (reader.NodeType == XmlNodeType.Element && reader.LocalName == "row")
                    {
                        foreach (IColumn column in output.Schema)
                        {
                            string rawValue = reader.GetAttribute(column.Name);

                            if (rawValue == null)
                            {
                                output.Set(column.Name, column.DefaultValue);
                            }
                            else
                            {
                                if (column.Type == typeof(string))
                                {
                                    string simplifiedValue = Simplify(rawValue);

                                    int byteCount = Encoding.UTF8.GetByteCount(simplifiedValue);

                                    if (byteCount > Constants.Limits.StringSizeInBytes) // 128kB
                                    {
                                        simplifiedValue = ShortenWithinBoundries(simplifiedValue);
                                    }

                                    output.Set(column.Name, simplifiedValue);
                                }
                                else
                                {
                                    var typeConverter = TypeDescriptor.GetConverter(column.Type);
                                    var castedValue = typeConverter.ConvertFromString(rawValue);

                                    output.Set(column.Name, castedValue);
                                }
                            }
                        }

                        yield return output.AsReadOnly();
                    }
                }
            }
        }
Exemple #37
0
 // IEnumerable<IRow> Extract(IUnstructuredReader input, IUpdatableRow outputrow)
 //
 // Actual implementation of DriverExtractor that overwrites the Extract method of IExtractor.
 public override IEnumerable<IRow> Extract(IUnstructuredReader input, IUpdatableRow outputrow)
 {
     foreach (Stream current in input.Split(this._row_delim))
     {
         using (StreamReader streamReader = new StreamReader(current, this._encoding))
         {
             int num = 0;
             string[] array = streamReader.ReadToEnd().Split(new string[]{this._col_delim}, StringSplitOptions.None);
             for (int i = 0; i < array.Length; i++)
             {
                 string c = array[i];
                 this.OutputValueAtCol_I(c, num++, outputrow);
             }
         }
         yield return outputrow.AsReadOnly();
     }
     yield break;
 }
Exemple #38
0
        public override IEnumerable<IRow> Extract(IUnstructuredReader input, IUpdatableRow output)
        {
            var serializer = AvroSerializer.CreateGeneric(avroSchema);
            using (var genericReader = AvroContainer.CreateGenericReader(input.BaseStream))
            {
                using (var reader = new SequentialReader<dynamic>(genericReader))
                {
                    foreach (var obj in reader.Objects)
                    {
                        foreach (var column in output.Schema)
                        {
                            output.Set(column.Name, obj[column.Name]);
                        }

                        yield return output.AsReadOnly();
                    }
                }
            }
        }
Exemple #39
0
        /// <summary/>
        public override IEnumerable<IRow>       Extract(IUnstructuredReader input, IUpdatableRow output)
        {
            // Json.Net
            using(var reader = new JsonTextReader(new StreamReader(input.BaseStream)))
            {
                // Parse Json
                //  TODO: Json.Net fails with empty input files
                var root = JToken.ReadFrom(reader);

                // Rows
                //  All objects are represented as rows
                foreach(JObject o in SelectChildren(root, this.rowpath))
                {
                    // All fields are represented as columns
                    this.JObjectToRow(o, output);

                    yield return output.AsReadOnly();
                }
            }
        }
Exemple #40
0
    public override IEnumerable<IRow> Extract(IUnstructuredReader input, IUpdatableRow output)
    {
        List<IRow> rows = new List<IRow>();

        XmlDocument xmlDocument = new XmlDocument();
        xmlDocument.Load(input.BaseStream);
        foreach (XmlNode xmlNode in xmlDocument.DocumentElement.SelectNodes(this.m_XPath))
        {
            foreach (IColumn col in output.Schema)
            {
                XmlNode xml = xmlNode.SelectSingleNode(col.Name);
                if (xml != null)
                {
                    object val = Convert.ChangeType(xml.InnerXml, col.Type);
                    output.Set(col.Name, val);
                }
            }

            yield return output.AsReadOnly();

        }
    }
Exemple #41
0
 public override IEnumerable<IRow> Extract(IUnstructuredReader input, IUpdatableRow output)
 {
     byte[] imageArray = ImageOps.GetByteArrayforImage(input.BaseStream);
     output.Set<byte[]>(0, imageArray);
     yield return output.AsReadOnly();
 }
Exemple #42
0
 // IRow Process(IRow input, IUpdatableRow output)
 //
 // Actual implementatoin of the user-defined processor. Overwrites the Process method of IProcessor.
 public override IRow Process(IRow input, IUpdatableRow output)
 {
     string text = input.Get<string>("country");
     if (EnglishCountryNames.CountryTranslation.Keys.Contains(text))
     {
         text = EnglishCountryNames.CountryTranslation[text];
     }
     output.Set<string>("country", text);
     return output.AsReadOnly();
 }
Exemple #43
0
        /// <summary>Apply is called at least once per instance</summary>
        /// <param name="input">A SQLIP row</param>
        /// <param name="output">A SQLIP updatable row.</param>
        /// <returns>IEnumerable of IRow, one IRow per SQLIP row.</returns>
        /// <remarks>Because applier constructor arguments cannot depend on
        /// column references, the name of the column to parse is given as a string. Then
        /// the actual column value is obtained by calling IRow.Get. The rest of the code
        /// is the same as XmlDomExtractor.</remarks>
        public override IEnumerable<IRow> Apply(IRow input, IUpdatableRow output)
        {
            // Make sure that all requested columns are of type string
            IColumn column = output.Schema.FirstOrDefault(col => col.Type != typeof(string));
            if (column != null)
            {
                throw new ArgumentException(string.Format("Column '{0}' must be of type 'string', not '{1}'", column.Name, column.Type.Name));
            }
            
            XmlDocument xmlDocument = new XmlDocument();
            xmlDocument.LoadXml(input.Get<string>(this.xmlColumnName));
            foreach (XmlNode xmlNode in xmlDocument.DocumentElement.SelectNodes(this.rowPath))
            {
                // IUpdatableRow implements a builder pattern to save memory allocations, 
                // so call output.Set in a loop
                foreach(IColumn col in output.Schema)
                {
                    var explicitColumnMapping = this.columnPaths.FirstOrDefault(columnPath => columnPath.Value == col.Name);
                    XmlNode xml = xmlNode.SelectSingleNode(explicitColumnMapping.Key ?? col.Name);
                    output.Set(explicitColumnMapping.Value ?? col.Name, xml == null ? null : xml.InnerXml);
                }

                // then call output.AsReadOnly to build an immutable IRow.
                yield return output.AsReadOnly();
            }
        }
Exemple #44
0
        // IRow Process(IRow input, IUpdatableRow output)
        //
        // Actual implementatoin of the user-defined processor. Overwrites the Process method of IProcessor.
        public override IRow Process(IRow input, IUpdatableRow output)
        {
            List<string> list = new List<string>();
            foreach (var current in input.Schema)
            {
                if (current.Type.IsGenericType && current.Type.GetGenericTypeDefinition() == typeof(SqlMap) && current.Type.GetGenericArguments()[0] == typeof(string))
                {
                    list.Add(current.Name);
                }
            }

            Dictionary<string, ArrayList> maps_to_be_changed = new Dictionary<string, ArrayList>();
            foreach (var current2 in output.Schema)
            {
                bool flag = list.Contains(current2.Name);
                if (-1 < input.Schema.IndexOf(current2.Name) && !flag)
                {
                    output.Set<object>(current2.Name, input.Get<object>(current2.Name));
                }
                else if (!flag)
                {
                    foreach (string current3 in list)
                    {
                        SqlMap<string, string> sqlMap = input.Get<SqlMap<string, string>>(current3);
                        SqlArray<string> sqlArray = null;
                        List<string> list2 = null;
                        if (sqlMap != null)
                        {
                            sqlArray = sqlMap.Keys;
                            if (sqlMap.Values != null)
                            {
                                list2 = sqlMap.Values.ToList<string>();
                            }
                        }
                        int num = (sqlArray == null) ? -1 : sqlArray.ToList<string>().IndexOf(current2.Name);
                        if (num != -1)
                        {
                            output.Set<string>(current2.Name, list2[num]);
                            if (maps_to_be_changed.Keys.Contains(current3))
                            {
                                maps_to_be_changed[current3].Add(current2.Name);
                            }
                            else
                            {
                                maps_to_be_changed.Add(current3, new ArrayList
                                {
                                    current2.Name
                                });
                            }
                            break;
                        }
                        output.Set<object>(current2.Name, current2.Type.IsValueType ? Activator.CreateInstance(current2.Type) : null);
                    }
                }
            }

            using (IEnumerator<IColumn> enumerator = output.Schema.GetEnumerator())
            {
                while (enumerator.MoveNext())
                {
                    IColumn out_col = enumerator.Current;
                    bool flag = list.Contains(out_col.Name);
                    if (flag)
                    {
                        SqlMap<string, string> sqlMap = input.Get<SqlMap<string, string>>(out_col.Name);
                        if (maps_to_be_changed != null && maps_to_be_changed.Keys.Contains(out_col.Name))
                        {
                            sqlMap = new SqlMap<string, string>(
                                from kvp in sqlMap
                                where !maps_to_be_changed[out_col.Name].Contains(kvp.Key)
                                select kvp);
                        }
                        output.Set<SqlMap<string, string>>(out_col.Name, sqlMap);
                    }
                }
            }
            return output.AsReadOnly();
        }
Exemple #45
0
        /// <summary>Extract is called at least once per instance</summary>
        /// <param name="input">Wrapper for a Stream</param>
        /// <param name="output">IUpdatableRow uses a mutable builder pattern -- 
        /// set individual fields with IUpdatableRow.Set, then build an immutable IRow by
        /// calling IUpdatableRow.AsReadOnly.</param>
        /// <returns>IEnumerable of IRow, one IRow per SQLIP row.</returns>
		public override IEnumerable<IRow> Extract(IUnstructuredReader input, IUpdatableRow output)
		{
            // Make sure that all requested columns are of type string
            IColumn column = output.Schema.FirstOrDefault(col => col.Type != typeof(string));
            if (column != null)
            {
                throw new ArgumentException(string.Format("Column '{0}' must be of type 'string', not '{1}'", column.Name, column.Type.Name));
            }

			var state = new ParseState();
			state.ClearAndJump(ParseLocation.Row);
			using (var reader = XmlReader.Create(input.BaseStream))
			{
				while (reader.Read())
				{
					switch (state.Location)
					{
                        case ParseLocation.Row:
                            // when looking for a new row, we are only interested in elements
                            // whose name matches the requested row element
                            if (reader.NodeType == XmlNodeType.Element && reader.Name == this.rowPath)
                            {
                                // when found, clear the IUpdatableRow's memory
                                // (this is no provided Clear method)
                                for (int i = 0; i < output.Schema.Count; i++)
                                {
                                    output.Set<string>(i, null);
                                }

                                state.ClearAndJump(ParseLocation.Column);
                            }

                            break;
                        case ParseLocation.Column:
                            // When looking for a new column, we are interested in elements
                            // whose name is a key in the columnPaths map or
                            // whose name is in the requested output schema.
                            // This indicates a column whose value needs to be read, 
                            // so prepare for reading it by clearing elementValue.
                            if (reader.NodeType == XmlNodeType.Element
                                && (this.columnPaths.ContainsKey(reader.Name)
                                    || output.Schema.Select(c => c.Name).Contains(reader.Name)))
                            {
                                if (reader.IsEmptyElement)
                                {
                                    // For an empty element, set an empty string 
                                    // and immediately jump to looking for the next column
                                    output.Set(this.columnPaths[reader.Name] ?? reader.Name, state.ReadElementValue());
                                    state.ClearAndJump(ParseLocation.Column);
                                }
                                else
                                {
                                    state.Location = ParseLocation.Data;
                                    state.ElementName = reader.Name;
                                    state.ClearElementValue();
                                }
                            }
                            else if (reader.NodeType == XmlNodeType.EndElement && reader.Name == this.rowPath)
                            {
                                // The other interesting case is an end element whose name matches 
                                // the current row element. This indicates the end of a row, 
                                // so yield the now-complete row and jump to looking for 
                                // another row.
                                yield return output.AsReadOnly();
                                state.ClearAndJump(ParseLocation.Row);
                            }

                            break;
                        case ParseLocation.Data:
                            // Most of the code for reading the value of a column
                            // deals with re-creating the inner XML from discrete elements.
                            // The only jump occurs when the reader hits an end element
                            // whose name matches the current column. In this case, we
                            // need to write the accumulated value to the appropriate 
                            // column in the output row.
                            switch (reader.NodeType)
                            {
                                case XmlNodeType.EndElement:
                                    if (reader.Name == state.ElementName)
                                    {
                                        output.Set(this.columnPaths[state.ElementName] ?? state.ElementName, state.ReadElementValue());
                                        state.ClearAndJump(ParseLocation.Column);
                                    }
                                    else
                                    {
                                        state.ElementWriter.WriteEndElement();
                                    }

                                    break;
                                case XmlNodeType.Element:
                                    state.ElementWriter.WriteStartElement(reader.Name);
                                    state.ElementWriter.WriteAttributes(reader, false);
                                    if (reader.IsEmptyElement)
                                    {
                                        state.ElementWriter.WriteEndElement();
                                    }

                                    break;
                                case XmlNodeType.CDATA:
                                    state.ElementWriter.WriteCData(reader.Value);
                                    break;
                                case XmlNodeType.Comment:
                                    state.ElementWriter.WriteComment(reader.Value);
                                    break;
                                case XmlNodeType.ProcessingInstruction:
                                    state.ElementWriter.WriteProcessingInstruction(reader.Name, reader.Value);
                                    break;
                                default:
                                    state.ElementWriter.WriteString(reader.Value);
                                    break;
                            }

                            break;
                        default:
                            throw new NotImplementedException("StreamFromXml has not implemented a new member of the ParseLocation enum");
                    }
				}

                if (state.Location != ParseLocation.Row)
				{
					throw new ArgumentException("XML document ended without proper closing tags");
				}
			}
		}
        /// <summary/>
        public override IEnumerable<IRow> Extract(IUnstructuredReader input, IUpdatableRow output)
        {
            if (input.Length == 0)
                yield break;

            using (var reader = new JsonTextReader(new StreamReader(input.BaseStream)))
            {
                IColumn currentColumn = null;
                StringBuilder valueBuilder = null;
                JsonTextWriter writer = null;
                var startedGlobalObjects = 0;
                var startedLocalObjects = 0;
                var startedGlobalArrays = 0;
                var startedLocalArrays = 0;

                while (reader.Read())
                {
                    switch (reader.TokenType)
                    {
                        case JsonToken.StartArray:
                            startedGlobalArrays++;
                            if (currentColumn != null && currentColumn.Type == typeof(string))
                            {
                                if (writer == null)
                                {
                                    valueBuilder = new StringBuilder();
                                    writer = new JsonTextWriter(new StringWriter(valueBuilder));
                                }
                                startedLocalArrays++;
                                writer.WriteStartArray();
                            }
                            break;
                        case JsonToken.EndArray:
                            startedGlobalArrays--;
                            if (writer != null)
                            {
                                startedLocalArrays--;
                                writer.WriteEndArray();
                            }
                            if (currentColumn != null && valueBuilder != null
                                && startedLocalArrays == 0 && startedLocalObjects == 0)
                            {
                                output.Set(currentColumn.Name, valueBuilder.ToString());
                                writer = null;
                                valueBuilder = null;
                                currentColumn = null;
                            }
                            if (startedGlobalArrays == 0)
                            {
                                yield break;
                            }
                            break;

                        case JsonToken.StartObject:
                            startedGlobalObjects++;
                            if (currentColumn != null && currentColumn.Type == typeof(string))
                            {
                                if (writer == null)
                                {
                                    valueBuilder = new StringBuilder();
                                    writer = new JsonTextWriter(new StringWriter(valueBuilder));
                                }
                                startedLocalObjects++;
                                writer.WriteStartObject();
                            }
                            break;
                        case JsonToken.EndObject:
                            startedGlobalObjects--;
                            if (writer != null)
                            {
                                startedLocalObjects--;
                                writer.WriteEndObject();
                            }
                            if (currentColumn != null && valueBuilder != null
                                && startedLocalArrays == 0 && startedLocalObjects == 0)
                            {
                                output.Set(currentColumn.Name, valueBuilder.ToString());
                                writer = null;
                                valueBuilder = null;
                                currentColumn = null;
                            }
                            if (startedGlobalObjects == 0)
                                yield return output.AsReadOnly();
                            break;

                        case JsonToken.PropertyName:
                            if (writer != null)
                            {
                                writer.WritePropertyName(reader.Value.ToString());
                            }
                            else
                            {
                                var currentPropertyName = reader.Value.ToString();
                                currentColumn = output.Schema
                                    .FirstOrDefault(s => s.Name == currentPropertyName);
                                if (currentColumn == null)
                                    reader.Skip();
                            }
                            break;

                        case JsonToken.String:
                        case JsonToken.Boolean:
                        case JsonToken.Bytes:
                        case JsonToken.Date:
                        case JsonToken.Integer:
                        case JsonToken.Float:
                            if (writer != null)
                            {
                                writer.WriteValue(reader.Value);
                            }
                            else if (currentColumn != null)
                            {
                                var typeConverter = TypeDescriptor.GetConverter(currentColumn.Type);
                                if (typeConverter != null && typeConverter.CanConvertFrom(reader.ValueType))
                                {
                                    output.Set(currentColumn.Name, typeConverter.ConvertFrom(reader.Value));
                                }
                                else
                                    output.Set(currentColumn.Name, reader.Value);
                                currentColumn = null;
                            }
                            break;
                        case JsonToken.Null:
                            if (writer != null)
                            {
                                writer.WriteNull();
                            }
                            else if (currentColumn != null)
                            {
                                output.Set(currentColumn.Name, currentColumn.DefaultValue);
                                currentColumn = null;
                            }
                            break;

                        case JsonToken.StartConstructor:
                            writer?.WriteStartConstructor(reader.Value.ToString());
                            break;
                        case JsonToken.EndConstructor:
                            writer?.WriteEndConstructor();
                            break;
                        case JsonToken.Comment:
                            writer?.WriteComment(reader.Value.ToString());
                            break;
                        case JsonToken.Raw:
                            writer?.WriteRaw(reader.Value.ToString());
                            break;
                        case JsonToken.None:
                        case JsonToken.Undefined:
                            // ignore
                            break;
                        default:
                            throw new NotImplementedException();
                    }
                } while (reader.TokenType != JsonToken.None);
            }
        }