public override IEnumerable <IRow> Reduce(IRowset input, IUpdatableRow output) { // Init aggregation values int i = 0; var begin = DateTime.MaxValue; // Dummy value to make compiler happy var end = DateTime.MinValue; // Dummy value to make compiler happy // requires that the reducer is PRESORTED on begin and READONLY on the reduce key. foreach (var row in input.Rows) { // Initialize the first interval with the first row if i is 0 if (i == 0) { i++; // mark that we handled the first row begin = row.Get <DateTime>("begin"); end = row.Get <DateTime>("end"); // If the end is just a time and not a date, it can be earlier than the begin, indicating it is on the next day. // This let's fix up the end to the next day in that case if (end < begin) { end = end.AddDays(1); } } else { var b = row.Get <DateTime>("begin"); var e = row.Get <DateTime>("end"); // fix up the date if end is earlier than begin if (e < b) { e = e.AddDays(1); } // if the begin is still inside the interval, increase the interval if it is longer if (b <= end) { // if the new end time is later than the current, extend the interval if (e > end) { end = e; } } else // output the previous interval and start a new one { output.Set <DateTime>("begin", begin); output.Set <DateTime>("end", end); yield return(output.AsReadOnly()); begin = b; end = e; } // if } // if } // foreach // now output the last interval output.Set <DateTime>("begin", begin); output.Set <DateTime>("end", end); yield return(output.AsReadOnly()); } // Reduce
public override IEnumerable<IRow> Reduce(IRowset input, IUpdatableRow output) { // Init aggregation values bool first_row_processed = false; var begin = DateTime.MaxValue; // Dummy value to make compiler happy var end = DateTime.MinValue; // Dummy value to make compiler happy // requires that the reducer is PRESORTED on begin and READONLY on the reduce key. foreach (var row in input.Rows) { // Initialize the first interval with the first row if i is 0 if (!first_row_processed) { first_row_processed = true; // mark that we handled the first row begin = row.Get<DateTime>("begin"); end = row.Get<DateTime>("end"); // If the end is just a time and not a date, it can be earlier than the begin, indicating it is on the next day. // This let's fix up the end to the next day in that case if (end < begin) { end = end.AddDays(1); } } else { var b = row.Get<DateTime>("begin"); var e = row.Get<DateTime>("end"); // fix up the date if end is earlier than begin if (e < b) { e = e.AddDays(1); } // if the begin is still inside the interval, increase the interval if it is longer if (b <= end) { // if the new end time is later than the current, extend the interval if (e > end) { end = e; } } else // output the previous interval and start a new one { output.Set<DateTime>("begin", begin); output.Set<DateTime>("end", end); yield return output.AsReadOnly(); begin = b; end = e; } // if } // if } // foreach // now output the last interval output.Set<DateTime>("begin", begin); output.Set<DateTime>("end", end); yield return output.AsReadOnly(); }
public override IEnumerable <IRow> Reduce(IRowset input, IUpdatableRow output) { int acc = 0; int max = 0; foreach (var row in input.Rows) { var timestamp = row.Get <DateTime>("timestamp"); var op = row.Get <string>("op"); if (op == "start") { acc++; } else { acc--; if (acc < 0) { acc = 0; } } max = System.Math.Max(max, acc); } output.Set <string>("cohort", "FOO"); output.Set <int>("max", max); yield return(output.AsReadOnly()); }
public override IEnumerable <IRow> Reduce(IRowset input, IUpdatableRow output) { // Cache the rows in the input rowset (should be records for a single vehicle registration) // Only save rows where the vehicle is not marked as having been recovered var stolenVehicleRecords = (from row in input.Rows select new StolenVehicleRecord( row.Get <string>("VehicleRegistration"), row.Get <string>("DateStolen"), row.Get <string>("DateRecovered") )).ToList(); // If there aren't any items in the stolenVehicleRecords list, then this vehicle is not stolen so skip over it if (stolenVehicleRecords.Count > 0) { // Sort the data in the stolenVehicleRecords list by DateStolen in descending order, so that the most recent record occurs first stolenVehicleRecords.Sort(); // Retrieve the first record in the stolenVehicleRecords list - this is the most recent record of the vehicle having been stolen var stolenVehicleRecord = stolenVehicleRecords.First(); // If the record does not have a recovery date, then output it, otherwise the vehicle is considered to have been recovered and is no longer stolen if (stolenVehicleRecord.DateRecovered == null) { output.Set <string>("VehicleRegistration", stolenVehicleRecord.VehicleRegistration); output.Set <DateTime>("DateStolen", stolenVehicleRecord.DateStolen); yield return(output.AsReadOnly()); } } }
public override IEnumerable <IRow> Extract(IUnstructuredReader input, IUpdatableRow output) { string line; //Read the input line by line foreach (Stream current in input.Split(_encoding.GetBytes("\r\n"))) { using (StreamReader streamReader = new StreamReader(current, this._encoding)) { line = streamReader.ReadToEnd().Trim(); //Split the input by the column delimiter string[] parts = line.Split(this._col_delim); int count = 0; foreach (string part in parts) { //If its the second column, treat it in a special way, split the column into first name and last name columns if (count == 1) { string[] name = part.Trim().Split(' '); output.Set <string>(count, name[0]); count += 1; output.Set <string>(count, name[1]); } else { output.Set <string>(count, part); } count += 1; } } yield return(output.AsReadOnly()); } yield break; }
public override IEnumerable <IRow> Extract(IUnstructuredReader input, IUpdatableRow outputrow) { outputrow.Set <long>("GC_TotalMem_Start", GC.GetTotalMemory(true)); outputrow.Set <long>("MaxUDOMemory", MyLimits.MaxUdoMemory); var buff_idx = 0; var failed = false; var gc_mem = GC.GetTotalMemory(true); try { while (buff_idx < no_buff) { alloc_mem[buff_idx] = new byte[increment]; alloc_mem[buff_idx][0] = 1; // to avoid it being optimized away buff_idx++; gc_mem = GC.GetTotalMemory(true); } } catch (Exception e) { failed = true; outputrow.Set <string>("error", e.Message); } outputrow.Set <long>("GC_TotalMem_End", gc_mem); outputrow.Set <bool>("failed", failed); outputrow.Set <long>("alloc_sz", buff_idx * increment); yield return(outputrow.AsReadOnly()); }
protected virtual IEnumerable <IRow> Extract(Stream inputStream, IUpdatableRow output) { // Json.Net using (var reader = new JsonTextReader(new StreamReader(inputStream))) { // Parse Json one token at a time if (!reader.Read()) { yield break; } if (reader.TokenType != JsonToken.StartObject) { yield break; } var token = JToken.Load(reader); // Rows // All objects are represented as rows foreach (JObject o in SelectChildren(token, this.rowpath)) { // All fields are represented as columns this.JObjectToRow(o, output); yield return(output.AsReadOnly()); } } }
public override IEnumerable <IRow> Extract(IUnstructuredReader input, IUpdatableRow output) { var avschema = Avro.Schema.Parse(avroSchema); var reader = new GenericDatumReader <GenericRecord>(avschema, avschema); using (var ms = new MemoryStream()) { CreateSeekableStream(input, ms); ms.Position = 0; var fileReader = DataFileReader <GenericRecord> .OpenReader(ms, avschema); while (fileReader.HasNext()) { var avroRecord = fileReader.Next(); foreach (var column in output.Schema) { if (avroRecord[column.Name] != null) { output.Set(column.Name, avroRecord[column.Name]); } else { output.Set <object>(column.Name, null); } yield return(output.AsReadOnly()); } } } }
/// <summary/> public override IEnumerable <IRow> Extract(IUnstructuredReader input, IUpdatableRow output) { // Json.Net using (var reader = new JsonTextReader(new StreamReader(input.BaseStream))) { // Parse Json one token at a time while (reader.Read()) { if (reader.TokenType == JsonToken.StartObject) { var token = JToken.Load(reader); // Rows // All objects are represented as rows foreach (JObject o in SelectChildren(token, this.rowpath)) { // All fields are represented as columns this.JObjectToRow(o, output); yield return(output.AsReadOnly()); } } } } }
public override IRow Process(IRow input, IUpdatableRow output) { output.Set <int>("DepID", input.Get <int>("DepID")); output.Set <string>("DepName", input.Get <string>("DepName")); output.Set <string>("HelloWorld", hw); return(output.AsReadOnly()); }
public override IEnumerable <IRow> Extract(IUnstructuredReader input, IUpdatableRow outputrow) { long length = input.Length; long start = input.Start; Stream baseStream = input.BaseStream; byte[] readBuffer = new byte[this._blocksz]; while (length > 0) { var globalPosition = start + baseStream.Position; // We need to make sure that we read block size or only the last remainder and not into the 4MB overscan area in the next extent block that is provided to handle row-oriented processing var readsize = (int)Math.Min(this._blocksz, length); // Cast from (long) to (int) is safe since Min will never give a value larger than (int) _blocksz. Array.Resize <byte>(ref readBuffer, readsize); // Make sure buffer is large enough. Assumes that Resize only resizes if needed. var bytesRead = baseStream.Read(readBuffer, 0, readsize); if (bytesRead <= 0 || bytesRead > readsize) { throw new Exception(string.Format("Unexpected amount of {2} bytes was read starting at global stream position {1}. Expected to read {0} bytes.", readsize, globalPosition, bytesRead)); } Array.Resize <byte>(ref readBuffer, bytesRead); length -= bytesRead; outputrow.Set <long>(0, globalPosition); // global position of the block outputrow.Set <long>(1, bytesRead); // block size outputrow.Set <byte[]>(2, readBuffer); // block data yield return(outputrow.AsReadOnly()); } }
public override IEnumerable <IRow> Extract(IUnstructuredReader input, IUpdatableRow output_row) { var s = new System.IO.StreamReader(input.BaseStream); { var rows = AzureDiagnostics.AzureDiagnosticsUtil.GetLogADLARecords(s); foreach (var row in rows) { output_row.Set <System.DateTime>("Time", row.Time.DateTime); output_row.Set <string>("ResourceId", row.ResourceId); output_row.Set <string>("Category", row.Category); output_row.Set <string>("OperationName", row.OperationName); output_row.Set <string>("ResultType", row.ResultType); output_row.Set <string>("ResultSignature", row.ResultType); output_row.Set <string>("CorrelationId", row.CorrelationId); output_row.Set <string>("Identity", row.Identity); var props = row.Properties; output_row.Set <string>("ADLA_JobId", props.JobId); output_row.Set <string>("ADLA_JobName", props.JobName); output_row.Set <string>("ADLA_JobRuntimeName", props.JobRuntimeName); output_row.Set <System.DateTime?>("ADLA_StartTime", props.StartTime.ToDateTimeNullable()); output_row.Set <System.DateTime?>("ADLA_SubmitTime", props.SubmitTime.ToDateTimeNullable()); output_row.Set <System.DateTime?>("ADLA_EndTime", props.EndTime.ToDateTimeNullable()); yield return(output_row.AsReadOnly()); } } }
public override IEnumerable <IRow> Extract(IUnstructuredReader input, IUpdatableRow output) { using (var streamReader = new StreamReader(input.BaseStream)) { // assumes each line is an independent json object. var recordLine = streamReader.ReadLine(); while (!string.IsNullOrEmpty(recordLine)) { Tweet tweet = Newtonsoft.Json.JsonConvert.DeserializeObject <Tweet> (recordLine); output.Set <string> ("tweetText", tweet.text); output.Set <string> ("tweetId", tweet.id_str); output.Set <string> ("timestampMs", tweet.timestamp_ms); output.Set <string> ("language", tweet.id_str); SqlArray <string> hashtags = new SqlArray <string> (tweet.entities.hashtags.Select(t => t.text)); SqlArray <string> usermentions = new SqlArray <string> (tweet.entities.user_mentions.Select(t => t.screen_name)); output.Set <SqlArray <string> > ("hashTags", hashtags); output.Set <SqlArray <string> > ("userMentions", usermentions); yield return(output.AsReadOnly()); recordLine = streamReader.ReadLine(); } } yield break; }
public override IEnumerable <IRow> Extract(IUnstructuredReader input, IUpdatableRow output) { string id; string from; string to; // 1. Collect partition informations. using (var reader = new StreamReader(input.BaseStream)) { string line = reader.ReadLine(); var parts = line.Split('\t'); id = parts[0]; from = parts[1]; to = parts[2]; } // 2. Read data source using partition information. using (var reader = ProviderFactory.CreateInstance(_cnxString, from, to)) { foreach (var row in reader.Rows) { output.Set("extractor_id", _id); output.Set("partition_id", id); output.Set("partition", row[0]); output.Set("value1", row[1]); output.Set("value2", row[2]); yield return(output.AsReadOnly()); } } // Add some latency to data read. Thread.Sleep(10000); }
public override IEnumerable <IRow> Extract(IUnstructuredReader input, IUpdatableRow output) { if (input.Length == 0) { yield break; } var serializer = AvroSerializer.CreateGeneric(avroSchema); using (var genericReader = AvroContainer.CreateGenericReader(input.BaseStream)) { using (var reader = new SequentialReader <dynamic>(genericReader)) { foreach (var obj in reader.Objects) { foreach (var column in output.Schema) { output.Set(column.Name, obj[column.Name]); } yield return(output.AsReadOnly()); } } } }
/// <summary/> public override IEnumerable <IRow> Combine(IRowset left, IRowset right, IUpdatableRow output) { var buffer = new List <Tuple <int, string> >(); foreach (var row2 in right.Rows) { buffer.Add(Tuple.Create <int, string>( row2.Get <int>("employee_id"), row2.Get <string>("employee_name") )); } foreach (var row in left.Rows) { foreach (var tuple in buffer) { if (row.Get <int>("employee_id") == tuple.Item1) { output.Set("employee_id", tuple.Item1); output.Set("employee_name", tuple.Item2); output.Set("department_name", row.Get <string>("department_name")); yield return(output.AsReadOnly()); } } } }
/// <summary> /// /// </summary> /// <param name="input"></param> /// <param name="output"></param> /// <returns></returns> public override IEnumerable<IRow> Reduce(IRowset input, IUpdatableRow output) { int count = 0; int[] colValues = new int[colNames.Length]; foreach (IRow row in input.Rows) { if (count == 0) { colValues[(int)ColNames.id] = int.Parse(row.Get<string>("id").ToString()); colValues[(int)ColNames.loc] = location.GetValue(row.Get<string>("loc").ToString()); colValues[(int)ColNames.fs] = int.Parse(row.Get<string>("fs").ToString()); colValues[(int)ColNames.tr] = int.Parse(row.Get<string>("tr").ToString()); colValues[(int)ColNames.st] = sevType.GetValue(row.Get<string>("st").ToString()); } colValues[eventType.GetValue(row.Get<string>("et").ToString())] = 1; int vol = int.Parse(row.Get<string>("vol").ToString()); colValues[logFeature.GetValue(row.Get<string>("lf").ToString())] = vol; colValues[resType.GetValue(row.Get<string>("rt").ToString())] = 1; count++; } // Write output for (int n = (int)ColNames.lf_1; n < colValues.Length; n++) { string colName = colNames[n]; output.Set(colName, colValues[n].ToString()); } yield return output.AsReadOnly(); }
private static IEnumerable <IRow> ExtractInternal(IUpdatableRow output, Stream input) { if (!input.CanSeek) { throw new ArgumentOutOfRangeException(nameof(input), "Input stream must be seekable for ORC reader. Enable the hack to copy to a Memory Stream or to a non-Persisted Memory Mapped file. The hack is the default setting."); } using (var fileTail = new FileTail(input)) { var stripes = fileTail.GetStripeCollection(); var columnsToRead = GetIntersectedColumnMetadata(output.Schema, fileTail).ToArray(); foreach (var stripe in stripes) { var extractedColumns = ReadStripe(stripe, columnsToRead).ToArray(); for (int i = 0; i < (int)stripe.NumRows; i++) { foreach (var col in extractedColumns) { var outputColumn = col.Item1.USqlProjectionColumnIndex; var value = col.Item2?.GetValue(i) ?? col.Item1.USqlProjectionColumn.DefaultValue; output.Set(outputColumn, value); } yield return(output.AsReadOnly()); } } } }
/// <summary>Extract is called at least once per vertex</summary> /// <param name="input">Wrapper for a Stream</param> /// <param name="output">IUpdatableRow uses a mutable builder pattern -- /// set individual fields with IUpdatableRow.Set, then build an immutable IRow by /// calling IUpdatableRow.AsReadOnly.</param> /// <returns>A sequence of IRows.</returns> public override IEnumerable <IRow> Extract(IUnstructuredReader input, IUpdatableRow output) { // Make sure that all requested columns are of type string IColumn column = output.Schema.FirstOrDefault(col => col.Type != typeof(string)); if (column != null) { throw new ArgumentException(string.Format("Column '{0}' must be of type 'string', not '{1}'", column.Name, column.Type.Name)); } XmlDocument xmlDocument = new XmlDocument(); xmlDocument.Load(input.BaseStream); foreach (XmlNode xmlNode in xmlDocument.DocumentElement.SelectNodes(this.rowPath)) { // IUpdatableRow implements a builder pattern to save memory allocations, // so call output.Set in a loop foreach (IColumn col in output.Schema) { var explicitColumnMapping = this.columnPaths.FirstOrDefault(columnPath => columnPath.Value == col.Name); XmlNode xml = xmlNode.SelectSingleNode(explicitColumnMapping.Key ?? col.Name); output.Set(explicitColumnMapping.Value ?? col.Name, xml == null ? null : xml.InnerXml); } // then call output.AsReadOnly to build an immutable IRow. yield return(output.AsReadOnly()); } }
public override IRow Process(IRow input, IUpdatableRow output) { string UserID = input.Get <string>("UserID"); string Name = input.Get <string>("Name"); string Address = input.Get <string>("Address"); string City = input.Get <string>("City"); string State = input.Get <string>("State"); string PostalCode = input.Get <string>("PostalCode"); string Country = input.Get <string>("Country"); string Phone = input.Get <string>("Phone"); if (CountryTranslation.Keys.Contains(Country)) { Country = CountryTranslation[Country]; } output.Set <string>(0, UserID); output.Set <string>(1, Name); output.Set <string>(2, Address); output.Set <string>(3, City); output.Set <string>(4, State); output.Set <string>(5, PostalCode); output.Set <string>(6, Country); output.Set <string>(7, Phone); return(output.AsReadOnly()); }
public override IEnumerable <IRow> Apply(IRow input, IUpdatableRow output) { DateTime startTime = input.Get <DateTime>(startColumn); DateTime endTime = input.Get <DateTime>(endColumn); var startDate = startTime.Date; var endDate = endTime.Date; DateTime startTimeOfDay; DateTime endTimeOfDay; var lastSecond = new TimeSpan(23, 59, 59); for (var dt = startDate; dt <= endDate; dt = dt.AddDays(1)) { if (dt == startDate) { startTimeOfDay = startTime; } else { startTimeOfDay = dt; } if (dt == endDate) { endTimeOfDay = endTime; } else { endTimeOfDay = dt + lastSecond; } output.Set <DateTime>("startTimeOfDay", startTimeOfDay); output.Set <DateTime>("endTimeOfDay", endTimeOfDay); yield return(output.AsReadOnly()); } }
public override IRow Process(IRow input, IUpdatableRow output) { var tag = input.Get<string>("Tag"); var category = input.Get<string>("Category"); category = "other"; foreach (var cat in categoryMapper) { var categoryName = cat.Key; var listOfPrefixes = cat.Value; var found = false; foreach (var pref in listOfPrefixes) { if (tag.StartsWith(pref)) { category = categoryName; found = true; break; } } if (found) { break; } } output.Set("Category", category); return output.AsReadOnly(); }
public override IEnumerable <IRow> Extract(IUnstructuredReader input, IUpdatableRow output) { string line; using (StreamReader streamReader = new StreamReader(input.BaseStream, Encoding.UTF8)) { while ((line = streamReader.ReadLine()) != null) { var jObject = JsonConvert.DeserializeObject <JObject>(line); foreach (var column in output.Schema) { if (column.Type == typeof(string)) { output.Set(column.Name, jObject[column.Name].ToString()); } if (column.Type == typeof(DateTime)) { output.Set(column.Name, (DateTime.Parse(jObject[column.Name].ToString()))); } } yield return(output.AsReadOnly()); } } yield break; }
public override IRow Process(IRow input, IUpdatableRow output) { var s = input.Get <string>("name"); output.Set <string>("reversed", Reverse(s)); return(output.AsReadOnly()); }
public override IEnumerable <IRow> Extract(IUnstructuredReader input, IUpdatableRow output) { string line; //Read the input line by line foreach (Stream current in input.Split(_row_delim)) { using (StreamReader streamReader = new StreamReader(current, this._encoding)) { line = streamReader.ReadToEnd().Trim(); LogRowParser splitter = new LogRowParser(); LogRowElements parts = new LogRowElements(); parts = splitter.ParseElements(line); output.Set <string>(0, parts.IP); output.Set <string>(1, parts.Identity); output.Set <string>(2, parts.UserId); output.Set <string>(3, parts.Timestamp); output.Set <string>(4, parts.Offset); output.Set <string>(5, parts.RequestMessage); output.Set <string>(6, parts.StatusCode); output.Set <string>(7, parts.Size); output.Set <string>(8, parts.Referer); output.Set <string>(9, parts.URL); output.Set <string>(10, parts.UserAgent); output.Set <string>(11, parts.Forwarded); yield return(output.AsReadOnly()); } } }
public override IRow Process(IRow input, IUpdatableRow output) { double lat = input.Get <double>(latColumn); double lon = input.Get <double>(lonColumn); GeoLocation loc = new GeoLocation { Longitude = lon, Latitude = lat }; var country = _service.FindCountry(loc); var USstates = _service.FindUsaState(loc); if (country != null && country.Name != null) { output.Set <string>("country", country.Name); } else { output.Set <string>("country", ""); } if (USstates != null && USstates.Name != null) { output.Set <string>("USstates", USstates.Name); } else { output.Set <string>("USstates", ""); } return(output.AsReadOnly()); }
/// https://docs.microsoft.com/en-us/azure/data-lake-analytics/data-lake-analytics-u-sql-programmability-guide#use-user-defined-extractors /// <summary>Extract is called at least once per vertex</summary> /// <param name="input">Wrapper for a Stream</param> /// <param name="output">IUpdatableRow uses a mutable builder pattern -- /// set individual fields with IUpdatableRow.Set, /// then build an immutable IRow by calling IUpdatableRow.AsReadOnly.</param> /// <returns>A sequence of IRows.</returns> public override IEnumerable <IRow> Extract(IUnstructuredReader input, IUpdatableRow output) { // use XML Reader for streaming the XML to keep memory usage to a minimum using (XmlReader reader = XmlReader.Create(input.BaseStream)) { reader.MoveToContent(); // forward reader to next available Element while (reader.ReadToFollowing(this.elementName)) { // decouple from reader position with new subtreeReader // this prevents reader.ReadToFollowing() from skipping rows as its not forwarded now by ReadOuterXml() using (XmlReader subtreeReader = reader.ReadSubtree()) { subtreeReader.MoveToContent(); // Replace CRLF & CR & LF character (\r\n) by space ( ) within the XML to ensure the string fits in 1 row output.Set <string>(0, XElement.Parse(subtreeReader.ReadOuterXml()). ToString(SaveOptions.DisableFormatting). Replace("\r\n", " ").Replace('\n', ' ').Replace('\r', ' ')); // then call output.AsReadOnly to build an immutable IRow. yield return(output.AsReadOnly()); } } } }
public override IEnumerable <IRow> Extract(IUnstructuredReader input, IUpdatableRow output) { string line = string.Empty; foreach (Stream current in input.Split(_rowDelim)) { using (StreamReader streamReader = new StreamReader(current, _encoding)) { line = streamReader.ReadToEnd().Trim(); if (!string.IsNullOrEmpty(line)) { GitRepository repo = JsonConvert.DeserializeObject <GitRepository>(line); output.Set("openIssuesCount", repo.OpenIssuesCount); output.Set("pushedAt", repo.PushedAt == null ? (DateTime?)null : repo.PushedAt.Value.UtcDateTime); output.Set("createdAt", repo.CreatedAt.UtcDateTime); output.Set("updatedAt", repo.UpdatedAt.UtcDateTime); output.Set("permissions_admin", repo.Permissions == null ? (bool?)null : repo.Permissions.Admin); output.Set("permissions_push", repo.Permissions == null ? (bool?)null : repo.Permissions.Push); output.Set("permissions_pull", repo.Permissions == null ? (bool?)null : repo.Permissions.Pull); output.Set("parentRepoId", repo.Parent == null ? (long?)null : repo.Parent.Id); output.Set("sourceRepoId", repo.Source == null ? (long?)null : repo.Source.Id); output.Set("hasIssues", repo.HasIssues); output.Set("defaultBranch", repo.DefaultBranch); output.Set("hasWiki", repo.HasWiki); output.Set("hasDownload", repo.HasDownloads); output.Set("allowRebaseMerge", repo.AllowRebaseMerge); output.Set("allowSquashMerge", repo.AllowSquashMerge); output.Set("allowMergeCommit", repo.AllowMergeCommit); output.Set("hasPages", repo.HasPages); output.Set("licenseMetadata_Key", repo.License == null ? null : repo.License.Key); output.Set("licenseMetadata_Name", repo.License == null ? null : repo.License.Name); output.Set("licenseMetadata_SpdxId", repo.License == null ? null : repo.License.SpdxId); output.Set("licenseMetadata_Url", repo.License == null ? null : repo.License.Url); output.Set("licenseMetadata_Featured", repo.License == null ? (bool?)null : repo.License.Featured); output.Set("stargazersCount", repo.StargazersCount); output.Set("forksCount", repo.ForksCount); output.Set("fork", repo.Fork); output.Set("url", repo.Url); output.Set("htmlUrl", repo.HtmlUrl); output.Set("cloneUrl", repo.CloneUrl); output.Set("gitUrl", repo.GitUrl); output.Set("sshUrl", repo.SshUrl); output.Set("svnUrl", repo.SvnUrl); output.Set("mirrorUrl", repo.MirrorUrl); output.Set("id", repo.Id); output.Set("ownerId", repo.Owner == null?(int?)null:repo.Owner.Id); output.Set("name", repo.Name); output.Set("fullName", repo.FullName); output.Set("description", repo.Description); output.Set("homepage", repo.Homepage); output.Set("language", repo.Language); output.Set("private", repo.Private); output.Set("subscribersCount", repo.SubscribersCount); output.Set("size", repo.Size); } } yield return(output.AsReadOnly()); } }
public override IEnumerable <IRow> Extract(IUnstructuredReader input, IUpdatableRow output) { string line = string.Empty; foreach (Stream current in input.Split(_rowDelim)) { using (StreamReader streamReader = new StreamReader(current, _encoding)) { line = streamReader.ReadToEnd().Trim(); if (!string.IsNullOrEmpty(line)) { GithubCommits commit = JsonConvert.DeserializeObject <GithubCommits>(line); output.Set("authorId", commit.Author == null ? (int?)null : commit.Author.Id); output.Set("commentsUrl", commit.CommentsUrl); output.Set("commit_AuthorMessage", commit.Commit == null ? null : commit.Commit.Message); output.Set("commit_AuthorEmail", commit.Commit == null ? null : commit.Commit.Author == null ? null : commit.Commit.Author.Email); output.Set("commit_AuthorDate", commit.Commit == null ? (DateTime?)null : commit.Commit.Author == null ? (DateTime?)null : commit.Commit.Author.Date.UtcDateTime); output.Set("commit_CommitterEmail", commit.Commit == null ? null : commit.Commit.Committer == null ? null : commit.Commit.Committer.Email); output.Set("commit_CommitterDate", commit.Commit == null ? (DateTime?)null : commit.Commit.Committer == null ? (DateTime?)null : commit.Commit.Committer.Date.UtcDateTime); output.Set("commit_Tree", commit.Commit == null ? null : commit.Commit.Tree == null ? null : JsonConvert.SerializeObject(commit.Commit.Tree)); output.Set("commit_Parents", commit.Commit == null ? null : commit.Commit.Parents == null ? null : JsonConvert.SerializeObject(commit.Commit.Parents)); output.Set("commit_CommentCount", commit.Commit == null ? (int?)null : commit.Commit.CommentCount); output.Set("commit_Verification", commit.Commit == null ? null : commit.Commit.Verification == null ? null : JsonConvert.SerializeObject(commit.Commit.Verification)); output.Set("committerId", commit.Committer == null ? (int?)null : commit.Committer.Id); output.Set("htmlUrl", commit.HtmlUrl); output.Set("stats_Additions", commit.Stats == null ? (int?)null : commit.Stats.Additions); output.Set("stats_Deletions", commit.Stats == null ? (int?)null : commit.Stats.Deletions); output.Set("stats_Total", commit.Stats == null ? (int?)null : commit.Stats.Total); output.Set("parents", commit.Parents == null ? null : JsonConvert.SerializeObject(commit.Parents)); //output.Set("files", commit.Files == null ? null : Encoding.UTF8.GetBytes(JsonConvert.SerializeObject(commit.Files))); output.Set("url", commit.Url); output.Set("label", commit.Label); output.Set("ref", commit.Ref); output.Set("sha", commit.Sha); output.Set("userId", commit.User == null?(int?)null:commit.User.Id); output.Set("repositoryId", commit.Repository == null?(long?)null:commit.Repository.Id); List <GitProcessedFiles> processedFiles = new List <GitProcessedFiles>(); if (commit.Files != null) { foreach (var f in commit.Files) { processedFiles.Add(new GitProcessedFiles() { Status = f.Status, RawUrl = f.RawUrl, PreviousFileName = f.PreviousFileName }); } output.Set("files", JsonConvert.SerializeObject(processedFiles)); } else { output.Set("files", (string)null); } } } yield return(output.AsReadOnly()); } }
public override IEnumerable <IRow> Reduce(IRowset input, IUpdatableRow output) { // Init aggregation values var firstRowProcessed = false; var begin = DateTime.MinValue; var end = DateTime.MinValue; var finalvalue = 0.0; // requires that the reducer is PRESORTED on begin and READONLY on the reduce key. foreach (var row in input.Rows) { if (!firstRowProcessed) { firstRowProcessed = true; begin = row.Get <DateTime>(BeginColName); end = row.Get <DateTime>(EndColName); finalvalue = row.Get <double>(ValueColName); } else { var b = row.Get <DateTime>(BeginColName); var e = row.Get <DateTime>(EndColName); var tmpvalue = row.Get <double>(ValueColName); if ((b - end).TotalSeconds <= _maxDuration) { finalvalue += tmpvalue; } else { output.Set <double>(ValueColName, finalvalue); output.Set <DateTime>(BeginColName, begin); output.Set <DateTime>(EndColName, end); yield return(output.AsReadOnly()); finalvalue = tmpvalue; begin = b; } end = e; } } output.Set <DateTime>(BeginColName, begin); output.Set <DateTime>(EndColName, end); output.Set <double>(ValueColName, finalvalue); yield return(output.AsReadOnly()); }
public override IEnumerable <IRow> Apply(IRow input, IUpdatableRow output) { for (int i = this.Start; i <= this.End; i++) { output.Set <int>("Value", i); yield return(output.AsReadOnly()); } }
/// <summary>Apply is called at least once per instance</summary> /// <param name="input">A SQLIP row</param> /// <param name="output">A SQLIP updatable row.</param> /// <returns>IEnumerable of IRow, one IRow per SQLIP row.</returns> /// <remarks>Because applier constructor arguments cannot depend on /// column references, the name of the column to parse is given as a string. Then /// the actual column value is obtained by calling IRow.Get. The rest of the code /// is the same as XmlDomExtractor.</remarks> public override IEnumerable <IRow> Apply(IRow input, IUpdatableRow output) { // Make sure that all requested columns are of type string IColumn column = output.Schema.FirstOrDefault(col => col.Type != typeof(string)); if (column != null) { throw new ArgumentException(string.Format("Column '{0}' must be of type 'string', not '{1}'", column.Name, column.Type.Name)); } XmlDocument xmlDocument = new XmlDocument(); xmlDocument.LoadXml(input.Get <string>(this.xmlColumnName)); XmlNamespaceManager nsmanager = new XmlNamespaceManager(xmlDocument.NameTable); // If namespace declarations have been provided, add them to the namespace manager if (this.namespaceDecls != null) { foreach (var namespaceDecl in this.namespaceDecls) { nsmanager.AddNamespace(namespaceDecl.Key, namespaceDecl.Value); } } foreach (XmlNode xmlNode in xmlDocument.DocumentElement.SelectNodes(this.rowPath, nsmanager)) { // IUpdatableRow implements a builder pattern to save memory allocations, // so call output.Set in a loop foreach (IColumn col in output.Schema) { switch (col.Name) { // populate hiearchy columns with their XmlNode.Name case "ElementName": output.Set <string>("ElementName", xmlNode.Name); break; case "ChildName": output.Set <string>("ChildName", xmlNode.FirstChild?.Name); break; case "GrandChildName": output.Set <string>("GrandChildName", xmlNode.FirstChild?.FirstChild?.Name); break; // populate mapped columns with their XPath result from XmlNode.InnerXml default: var explicitColumnMapping = this.columnPaths.FirstOrDefault(columnPath => columnPath.Value == col.Name); var xml = xmlNode.SelectSingleNode(explicitColumnMapping.Key ?? col.Name, nsmanager); output.Set(explicitColumnMapping.Value ?? col.Name, xml?.InnerXml); break; } } // then call output.AsReadOnly to build an immutable IRow. yield return(output.AsReadOnly()); } }
public override IEnumerable<IRow> Extract(IUnstructuredReader input, IUpdatableRow output) { using(var reader = new StreamReader(input.BaseStream)) { string line; while ((line = reader.ReadLine()) != null) { LineToRow(line, output); yield return output.AsReadOnly(); } } }
public override IEnumerable<IRow> Extract(IUnstructuredReader input, IUpdatableRow output) { using (var reader = new JsonTextReader(new StreamReader(input.BaseStream, Encoding.UTF8))) { reader.SupportMultipleContent = true; while (reader.Read()) { var row = JToken.ReadFrom(reader); var size = 0; var flattendData = GHInsights.USql.Utility.FlattenJson(row, ref size); if (size < (_dataLakeMaxRowSize)) { output.Set(_outputColumnName, new SqlMap<string, byte[]>(flattendData)); } else { var compressedData = GHInsights.USql.Utility.GzipByteArray(Encoding.UTF8.GetBytes(row.ToString(Formatting.None))); if (compressedData.Length < (_dataLakeMaxRowSize)) { var compressedRow = new Dictionary<string, byte[]> { { "!CompressedRow", compressedData } }; output.Set(_outputColumnName, new SqlMap<string, byte[]>(compressedRow)); } else { //throw new ArgumentOutOfRangeException($"Resulting SqlMap is too large: {size} - {row.ToString(Formatting.None).Substring(0,100)}"); var error = new Dictionary<string, byte[]> { { "!RowExtractorError", Encoding.UTF8.GetBytes($"Resulting SqlMap is too large: OriginalSize:{size} CompressedSize: {compressedData.Length} - {row.ToString(Formatting.None).Substring(0, 100)}") } }; output.Set(_outputColumnName, new SqlMap<string, byte[]>(error)); } } yield return output.AsReadOnly(); } } }
public override IRow Process(IRow input, IUpdatableRow output) { var img = input.Get<byte[]>("image_data"); // load image only once into memory per row using (StreamImage inImage = new StreamImage(img)) { output.SetColumnIfExists("equipment_make", inImage.getStreamImageProperty(ImageProperties.equipment_make)); output.SetColumnIfExists("equipment_model", inImage.getStreamImageProperty(ImageProperties.equipment_model)); output.SetColumnIfExists("description", inImage.getStreamImageProperty(ImageProperties.description)); output.SetColumnIfExists("copyright", inImage.getStreamImageProperty(ImageProperties.copyright)); output.SetColumnIfExists("thumbnail", inImage.scaleStreamImageTo(150, 150)); } return output.AsReadOnly(); }
public override IEnumerable<IRow> Extract(IUnstructuredReader input, IUpdatableRow output) { using (XmlReader reader = XmlReader.Create(input.BaseStream)) { while (reader.Read()) { if (reader.NodeType == XmlNodeType.Element && reader.LocalName == "row") { foreach (IColumn column in output.Schema) { string rawValue = reader.GetAttribute(column.Name); if (rawValue == null) { output.Set(column.Name, column.DefaultValue); } else { if (column.Type == typeof(string)) { string simplifiedValue = Simplify(rawValue); int byteCount = Encoding.UTF8.GetByteCount(simplifiedValue); if (byteCount > Constants.Limits.StringSizeInBytes) // 128kB { simplifiedValue = ShortenWithinBoundries(simplifiedValue); } output.Set(column.Name, simplifiedValue); } else { var typeConverter = TypeDescriptor.GetConverter(column.Type); var castedValue = typeConverter.ConvertFromString(rawValue); output.Set(column.Name, castedValue); } } } yield return output.AsReadOnly(); } } } }
// IEnumerable<IRow> Extract(IUnstructuredReader input, IUpdatableRow outputrow) // // Actual implementation of DriverExtractor that overwrites the Extract method of IExtractor. public override IEnumerable<IRow> Extract(IUnstructuredReader input, IUpdatableRow outputrow) { foreach (Stream current in input.Split(this._row_delim)) { using (StreamReader streamReader = new StreamReader(current, this._encoding)) { int num = 0; string[] array = streamReader.ReadToEnd().Split(new string[]{this._col_delim}, StringSplitOptions.None); for (int i = 0; i < array.Length; i++) { string c = array[i]; this.OutputValueAtCol_I(c, num++, outputrow); } } yield return outputrow.AsReadOnly(); } yield break; }
public override IEnumerable<IRow> Extract(IUnstructuredReader input, IUpdatableRow output) { var serializer = AvroSerializer.CreateGeneric(avroSchema); using (var genericReader = AvroContainer.CreateGenericReader(input.BaseStream)) { using (var reader = new SequentialReader<dynamic>(genericReader)) { foreach (var obj in reader.Objects) { foreach (var column in output.Schema) { output.Set(column.Name, obj[column.Name]); } yield return output.AsReadOnly(); } } } }
/// <summary/> public override IEnumerable<IRow> Extract(IUnstructuredReader input, IUpdatableRow output) { // Json.Net using(var reader = new JsonTextReader(new StreamReader(input.BaseStream))) { // Parse Json // TODO: Json.Net fails with empty input files var root = JToken.ReadFrom(reader); // Rows // All objects are represented as rows foreach(JObject o in SelectChildren(root, this.rowpath)) { // All fields are represented as columns this.JObjectToRow(o, output); yield return output.AsReadOnly(); } } }
public override IEnumerable<IRow> Extract(IUnstructuredReader input, IUpdatableRow output) { List<IRow> rows = new List<IRow>(); XmlDocument xmlDocument = new XmlDocument(); xmlDocument.Load(input.BaseStream); foreach (XmlNode xmlNode in xmlDocument.DocumentElement.SelectNodes(this.m_XPath)) { foreach (IColumn col in output.Schema) { XmlNode xml = xmlNode.SelectSingleNode(col.Name); if (xml != null) { object val = Convert.ChangeType(xml.InnerXml, col.Type); output.Set(col.Name, val); } } yield return output.AsReadOnly(); } }
public override IEnumerable<IRow> Extract(IUnstructuredReader input, IUpdatableRow output) { byte[] imageArray = ImageOps.GetByteArrayforImage(input.BaseStream); output.Set<byte[]>(0, imageArray); yield return output.AsReadOnly(); }
// IRow Process(IRow input, IUpdatableRow output) // // Actual implementatoin of the user-defined processor. Overwrites the Process method of IProcessor. public override IRow Process(IRow input, IUpdatableRow output) { string text = input.Get<string>("country"); if (EnglishCountryNames.CountryTranslation.Keys.Contains(text)) { text = EnglishCountryNames.CountryTranslation[text]; } output.Set<string>("country", text); return output.AsReadOnly(); }
/// <summary>Apply is called at least once per instance</summary> /// <param name="input">A SQLIP row</param> /// <param name="output">A SQLIP updatable row.</param> /// <returns>IEnumerable of IRow, one IRow per SQLIP row.</returns> /// <remarks>Because applier constructor arguments cannot depend on /// column references, the name of the column to parse is given as a string. Then /// the actual column value is obtained by calling IRow.Get. The rest of the code /// is the same as XmlDomExtractor.</remarks> public override IEnumerable<IRow> Apply(IRow input, IUpdatableRow output) { // Make sure that all requested columns are of type string IColumn column = output.Schema.FirstOrDefault(col => col.Type != typeof(string)); if (column != null) { throw new ArgumentException(string.Format("Column '{0}' must be of type 'string', not '{1}'", column.Name, column.Type.Name)); } XmlDocument xmlDocument = new XmlDocument(); xmlDocument.LoadXml(input.Get<string>(this.xmlColumnName)); foreach (XmlNode xmlNode in xmlDocument.DocumentElement.SelectNodes(this.rowPath)) { // IUpdatableRow implements a builder pattern to save memory allocations, // so call output.Set in a loop foreach(IColumn col in output.Schema) { var explicitColumnMapping = this.columnPaths.FirstOrDefault(columnPath => columnPath.Value == col.Name); XmlNode xml = xmlNode.SelectSingleNode(explicitColumnMapping.Key ?? col.Name); output.Set(explicitColumnMapping.Value ?? col.Name, xml == null ? null : xml.InnerXml); } // then call output.AsReadOnly to build an immutable IRow. yield return output.AsReadOnly(); } }
// IRow Process(IRow input, IUpdatableRow output) // // Actual implementatoin of the user-defined processor. Overwrites the Process method of IProcessor. public override IRow Process(IRow input, IUpdatableRow output) { List<string> list = new List<string>(); foreach (var current in input.Schema) { if (current.Type.IsGenericType && current.Type.GetGenericTypeDefinition() == typeof(SqlMap) && current.Type.GetGenericArguments()[0] == typeof(string)) { list.Add(current.Name); } } Dictionary<string, ArrayList> maps_to_be_changed = new Dictionary<string, ArrayList>(); foreach (var current2 in output.Schema) { bool flag = list.Contains(current2.Name); if (-1 < input.Schema.IndexOf(current2.Name) && !flag) { output.Set<object>(current2.Name, input.Get<object>(current2.Name)); } else if (!flag) { foreach (string current3 in list) { SqlMap<string, string> sqlMap = input.Get<SqlMap<string, string>>(current3); SqlArray<string> sqlArray = null; List<string> list2 = null; if (sqlMap != null) { sqlArray = sqlMap.Keys; if (sqlMap.Values != null) { list2 = sqlMap.Values.ToList<string>(); } } int num = (sqlArray == null) ? -1 : sqlArray.ToList<string>().IndexOf(current2.Name); if (num != -1) { output.Set<string>(current2.Name, list2[num]); if (maps_to_be_changed.Keys.Contains(current3)) { maps_to_be_changed[current3].Add(current2.Name); } else { maps_to_be_changed.Add(current3, new ArrayList { current2.Name }); } break; } output.Set<object>(current2.Name, current2.Type.IsValueType ? Activator.CreateInstance(current2.Type) : null); } } } using (IEnumerator<IColumn> enumerator = output.Schema.GetEnumerator()) { while (enumerator.MoveNext()) { IColumn out_col = enumerator.Current; bool flag = list.Contains(out_col.Name); if (flag) { SqlMap<string, string> sqlMap = input.Get<SqlMap<string, string>>(out_col.Name); if (maps_to_be_changed != null && maps_to_be_changed.Keys.Contains(out_col.Name)) { sqlMap = new SqlMap<string, string>( from kvp in sqlMap where !maps_to_be_changed[out_col.Name].Contains(kvp.Key) select kvp); } output.Set<SqlMap<string, string>>(out_col.Name, sqlMap); } } } return output.AsReadOnly(); }
/// <summary>Extract is called at least once per instance</summary> /// <param name="input">Wrapper for a Stream</param> /// <param name="output">IUpdatableRow uses a mutable builder pattern -- /// set individual fields with IUpdatableRow.Set, then build an immutable IRow by /// calling IUpdatableRow.AsReadOnly.</param> /// <returns>IEnumerable of IRow, one IRow per SQLIP row.</returns> public override IEnumerable<IRow> Extract(IUnstructuredReader input, IUpdatableRow output) { // Make sure that all requested columns are of type string IColumn column = output.Schema.FirstOrDefault(col => col.Type != typeof(string)); if (column != null) { throw new ArgumentException(string.Format("Column '{0}' must be of type 'string', not '{1}'", column.Name, column.Type.Name)); } var state = new ParseState(); state.ClearAndJump(ParseLocation.Row); using (var reader = XmlReader.Create(input.BaseStream)) { while (reader.Read()) { switch (state.Location) { case ParseLocation.Row: // when looking for a new row, we are only interested in elements // whose name matches the requested row element if (reader.NodeType == XmlNodeType.Element && reader.Name == this.rowPath) { // when found, clear the IUpdatableRow's memory // (this is no provided Clear method) for (int i = 0; i < output.Schema.Count; i++) { output.Set<string>(i, null); } state.ClearAndJump(ParseLocation.Column); } break; case ParseLocation.Column: // When looking for a new column, we are interested in elements // whose name is a key in the columnPaths map or // whose name is in the requested output schema. // This indicates a column whose value needs to be read, // so prepare for reading it by clearing elementValue. if (reader.NodeType == XmlNodeType.Element && (this.columnPaths.ContainsKey(reader.Name) || output.Schema.Select(c => c.Name).Contains(reader.Name))) { if (reader.IsEmptyElement) { // For an empty element, set an empty string // and immediately jump to looking for the next column output.Set(this.columnPaths[reader.Name] ?? reader.Name, state.ReadElementValue()); state.ClearAndJump(ParseLocation.Column); } else { state.Location = ParseLocation.Data; state.ElementName = reader.Name; state.ClearElementValue(); } } else if (reader.NodeType == XmlNodeType.EndElement && reader.Name == this.rowPath) { // The other interesting case is an end element whose name matches // the current row element. This indicates the end of a row, // so yield the now-complete row and jump to looking for // another row. yield return output.AsReadOnly(); state.ClearAndJump(ParseLocation.Row); } break; case ParseLocation.Data: // Most of the code for reading the value of a column // deals with re-creating the inner XML from discrete elements. // The only jump occurs when the reader hits an end element // whose name matches the current column. In this case, we // need to write the accumulated value to the appropriate // column in the output row. switch (reader.NodeType) { case XmlNodeType.EndElement: if (reader.Name == state.ElementName) { output.Set(this.columnPaths[state.ElementName] ?? state.ElementName, state.ReadElementValue()); state.ClearAndJump(ParseLocation.Column); } else { state.ElementWriter.WriteEndElement(); } break; case XmlNodeType.Element: state.ElementWriter.WriteStartElement(reader.Name); state.ElementWriter.WriteAttributes(reader, false); if (reader.IsEmptyElement) { state.ElementWriter.WriteEndElement(); } break; case XmlNodeType.CDATA: state.ElementWriter.WriteCData(reader.Value); break; case XmlNodeType.Comment: state.ElementWriter.WriteComment(reader.Value); break; case XmlNodeType.ProcessingInstruction: state.ElementWriter.WriteProcessingInstruction(reader.Name, reader.Value); break; default: state.ElementWriter.WriteString(reader.Value); break; } break; default: throw new NotImplementedException("StreamFromXml has not implemented a new member of the ParseLocation enum"); } } if (state.Location != ParseLocation.Row) { throw new ArgumentException("XML document ended without proper closing tags"); } } }
/// <summary/> public override IEnumerable<IRow> Extract(IUnstructuredReader input, IUpdatableRow output) { if (input.Length == 0) yield break; using (var reader = new JsonTextReader(new StreamReader(input.BaseStream))) { IColumn currentColumn = null; StringBuilder valueBuilder = null; JsonTextWriter writer = null; var startedGlobalObjects = 0; var startedLocalObjects = 0; var startedGlobalArrays = 0; var startedLocalArrays = 0; while (reader.Read()) { switch (reader.TokenType) { case JsonToken.StartArray: startedGlobalArrays++; if (currentColumn != null && currentColumn.Type == typeof(string)) { if (writer == null) { valueBuilder = new StringBuilder(); writer = new JsonTextWriter(new StringWriter(valueBuilder)); } startedLocalArrays++; writer.WriteStartArray(); } break; case JsonToken.EndArray: startedGlobalArrays--; if (writer != null) { startedLocalArrays--; writer.WriteEndArray(); } if (currentColumn != null && valueBuilder != null && startedLocalArrays == 0 && startedLocalObjects == 0) { output.Set(currentColumn.Name, valueBuilder.ToString()); writer = null; valueBuilder = null; currentColumn = null; } if (startedGlobalArrays == 0) { yield break; } break; case JsonToken.StartObject: startedGlobalObjects++; if (currentColumn != null && currentColumn.Type == typeof(string)) { if (writer == null) { valueBuilder = new StringBuilder(); writer = new JsonTextWriter(new StringWriter(valueBuilder)); } startedLocalObjects++; writer.WriteStartObject(); } break; case JsonToken.EndObject: startedGlobalObjects--; if (writer != null) { startedLocalObjects--; writer.WriteEndObject(); } if (currentColumn != null && valueBuilder != null && startedLocalArrays == 0 && startedLocalObjects == 0) { output.Set(currentColumn.Name, valueBuilder.ToString()); writer = null; valueBuilder = null; currentColumn = null; } if (startedGlobalObjects == 0) yield return output.AsReadOnly(); break; case JsonToken.PropertyName: if (writer != null) { writer.WritePropertyName(reader.Value.ToString()); } else { var currentPropertyName = reader.Value.ToString(); currentColumn = output.Schema .FirstOrDefault(s => s.Name == currentPropertyName); if (currentColumn == null) reader.Skip(); } break; case JsonToken.String: case JsonToken.Boolean: case JsonToken.Bytes: case JsonToken.Date: case JsonToken.Integer: case JsonToken.Float: if (writer != null) { writer.WriteValue(reader.Value); } else if (currentColumn != null) { var typeConverter = TypeDescriptor.GetConverter(currentColumn.Type); if (typeConverter != null && typeConverter.CanConvertFrom(reader.ValueType)) { output.Set(currentColumn.Name, typeConverter.ConvertFrom(reader.Value)); } else output.Set(currentColumn.Name, reader.Value); currentColumn = null; } break; case JsonToken.Null: if (writer != null) { writer.WriteNull(); } else if (currentColumn != null) { output.Set(currentColumn.Name, currentColumn.DefaultValue); currentColumn = null; } break; case JsonToken.StartConstructor: writer?.WriteStartConstructor(reader.Value.ToString()); break; case JsonToken.EndConstructor: writer?.WriteEndConstructor(); break; case JsonToken.Comment: writer?.WriteComment(reader.Value.ToString()); break; case JsonToken.Raw: writer?.WriteRaw(reader.Value.ToString()); break; case JsonToken.None: case JsonToken.Undefined: // ignore break; default: throw new NotImplementedException(); } } while (reader.TokenType != JsonToken.None); } }