/// <summary> /// Creation of the pipeline knowing parameters _inDataFrame, _cacheFile, _reuse. /// </summary> protected IDataTransform CreatePipeline(IHostEnvironment env, IDataView input) { if (_inDataFrame) { if (_async) { var view = new CacheDataView(env, input, null); var tr = new PassThroughTransform(env, new PassThroughTransform.Arguments(), view); return(tr); } else { var args = new SortInDataFrameTransform.Arguments() { numThreads = _numThreads, sortColumn = null }; var tr = new SortInDataFrameTransform(env, args, input); return(tr); } } else { string nt = _numThreads > 0 ? string.Format("{{t={0}}}", _numThreads) : string.Empty; using (var ch = Host.Start("Caching data...")) { if (_reuse && File.Exists(_cacheFile)) { ch.Info(MessageSensitivity.UserData, "Reusing cache '{0}'", _cacheFile); } else { ch.Info(MessageSensitivity.UserData, "Building cache '{0}'", _cacheFile); var saver = ComponentCreation.CreateSaver(env, _saverSettings); using (var fs0 = Host.CreateOutputFile(_cacheFile)) DataSaverUtils.SaveDataView(ch, saver, input, fs0, true); } } var loader = ComponentCreation.CreateLoader(env, string.Format("binary{{{0}}}", nt), new MultiFileSource(_cacheFile)); SchemaHelper.CheckSchema(Host, input.Schema, loader.Schema); var copy = ComponentCreation.CreateTransform(env, "skip{s=0}", loader); return(copy); } }
/// <summary> /// Constructor. /// </summary> /// <param name="inputSchema">existing schema</param> /// <param name="names">new columns</param> /// <param name="types">corresponding types</param> public TypeReplacementSchema(DataViewSchema inputSchema, string[] names, DataViewType[] types) { _schemaInput = new ExtendedSchema(inputSchema); if (names == null || names.Length == 0) { throw Contracts.ExceptEmpty("The extended schema must contain new names."); } if (types == null || types.Length != names.Length) { throw Contracts.Except("names and types must have the same length."); } _types = new Dictionary <int, DataViewType>(); _mappedColumns = new Dictionary <int, int>(); Contracts.Assert(types.Length == names.Length); int index; for (int i = 0; i < names.Length; ++i) { index = SchemaHelper.GetColumnIndex(inputSchema, names[i]); _types[index] = types[i]; _mappedColumns[inputSchema.Count + i] = index; } }
public static Delegate GetGetterChoice <T1, T2, T3>(DataViewRowCursor cur, DataViewSchema.Column col) { Delegate res = null; try { res = cur.GetGetter <T1>(col); if (res != null) { return(res); } } catch (Exception) { } try { res = cur.GetGetter <T2>(col); if (res != null) { return(res); } } catch (Exception) { } try { res = cur.GetGetter <T3>(col); if (res != null) { return(res); } } catch (Exception) { } if (res == null) { throw Contracts.ExceptNotImpl($"Unable to get a getter for column {col} of type {typeof(T1)} or {typeof(T2)} or {typeof(T3)} from schema\n{SchemaHelper.ToString(cur.Schema)}."); } return(res); }
/// <summary> /// Returns the getters for all columns. /// </summary> public static Delegate[] GetAllGetters(DataViewRowCursor cur) { var sch = cur.Schema; var res = new List <Delegate>(); for (int i = 0; i < sch.Count; ++i) { if (sch[i].IsHidden) { continue; } var getter = GetColumnGetter(cur, SchemaHelper._dc(i, cur), sch); if (getter == null) { throw Contracts.Except($"Unable to get getter for column {i} from schema\n{SchemaHelper.ToString(sch)}."); } res.Add(getter); } return(res.ToArray()); }
/// <summary> /// Parses a schema and creates an ExtendedSchema for it. /// </summary> public static ExtendedSchema Parse(string schema, IChannel ch = null) { var cols = schema.Split(new char[] { ' ', ';' }, StringSplitOptions.RemoveEmptyEntries); var tlcols = new List <TextLoader.Column>(); for (int i = 0; i < cols.Length; ++i) { if (cols[i].StartsWith("col=")) { cols[i] = cols[i].Substring(4); } if (cols[i].Contains(":Vec<")) { var dot = cols[i].Split(':'); if (dot.Length != 3) { throw ch != null?ch.Except("Expects 3 parts in '{0}'", cols[i]) : Contracts.Except("Expects 3 parts in '{0}'", cols[i]); } if (!dot[1].StartsWith("Vec<")) { throw ch != null?ch.Except("Unable to parse '{0}'", cols[i]) : Contracts.Except("Unable to parse '{0}'", cols[i]); } if (!dot[1].EndsWith(">")) { throw ch != null?ch.Except("Unable to parse '{0}'", cols[i]) : Contracts.Except("Unable to parse '{0}'", cols[i]); } var temp = dot[1].Substring(4); temp = temp.Substring(0, temp.Length - 1); var splc = temp.Split(','); if (splc.Length != 2) { throw ch != null?ch.Except("Unable to parse '{0}'", cols[i]) : Contracts.Except("Unable to parse '{0}'", cols[i]); } int last, nb; try { last = int.Parse(dot[2]); nb = int.Parse(splc[1]); } catch { throw ch != null?ch.Except("Unable to parse '{0}'", cols[i]) : Contracts.Except("Unable to parse '{0}'", cols[i]); } dot[1] = splc[0]; dot[2] = string.Format("{0}-{1}", last, last + nb - 1); cols[i] = string.Format("{0}:{1}:{2}", dot[0], dot[1], dot[2]); } else if (cols[i].Contains(":Key<")) { var dot = cols[i].Split(':'); if (dot.Length == 4) { // GroupId:Key<U4, Min:0>:8 dot = new[] { dot[0], dot[1] + ":" + dot[2], dot[3] }; if (dot[1].Contains(",Min:0>")) { dot[1] = dot[1].Replace(",Min:0>", ",0-*>"); } else { throw ch != null?ch.Except("Expects 3 parts in '{0}'", cols[i]) : Contracts.Except("Expects 3 parts in '{0}'", cols[i]); } } if (dot.Length != 3) { throw ch != null?ch.Except("Expects 3 parts in '{0}'", cols[i]) : Contracts.Except("Expects 3 parts in '{0}'", cols[i]); } if (!dot[1].StartsWith("Key<")) { throw ch != null?ch.Except("Unable to parse '{0}'", cols[i]) : Contracts.Except("Unable to parse '{0}'", cols[i]); } if (!dot[1].EndsWith(">")) { throw ch != null?ch.Except("Unable to parse '{0}'", cols[i]) : Contracts.Except("Unable to parse '{0}'", cols[i]); } var temp = dot[1].Substring(4); temp = temp.Substring(0, temp.Length - 1); var splc = temp.Split(','); if (splc.Length != 2) { throw ch != null?ch.Except("Unable to parse '{0}'", cols[i]) : Contracts.Except("Unable to parse '{0}'", cols[i]); } dot[1] = string.Format("{0}[{1}]", splc[0], splc[1]); cols[i] = string.Format("{0}:{1}:{2}", dot[0], dot[1], dot[2]); } var t = TextLoader.Column.Parse(cols[i]); if (t == null) { t = TextLoader.Column.Parse(string.Format("{0}:{1}", cols[i], i)); } if (t == null) { throw ch != null?ch.Except("Unable to parse '{0}' or '{1}'", cols[i], string.Format("{0}:{1}", cols[i], i)) : Contracts.Except("Unable to parse '{0}' or '{1}'", cols[i], string.Format("{0}:{1}", cols[i], i)); } tlcols.Add(t); } return(new ExtendedSchema(null, tlcols.Select(c => c.Name).ToArray(), tlcols.Select(c => SchemaHelper.Convert(c, ch)).ToArray())); }
void Init(DataKind destKind) { _kind = SchemaHelper.GetColumnType <TLabel>().RawKind(); _destKind = destKind; mapperBL = null; mapperU1 = null; mapperU2 = null; mapperU4 = null; mapperI4 = null; mapperR4 = null; mapperFromBL = null; mapperFromU1 = null; mapperFromU2 = null; mapperFromU4 = null; mapperFromI4 = null; mapperFromR4 = null; switch (destKind) { case DataKind.Boolean: mapperBL = SchemaHelper.GetConverter <TLabel, bool>(out identity); mapperFromBL = SchemaHelper.GetConverter <bool, TLabel>(out identity); break; case DataKind.SByte: mapperU1 = SchemaHelper.GetConverter <TLabel, byte>(out identity); mapperFromU1 = SchemaHelper.GetConverter <byte, TLabel>(out identity); break; case DataKind.UInt16: mapperU2 = SchemaHelper.GetConverter <TLabel, ushort>(out identity); mapperFromU2 = SchemaHelper.GetConverter <ushort, TLabel>(out identity); break; case DataKind.UInt32: mapperU4 = SchemaHelper.GetConverter <TLabel, uint>(out identity); mapperFromU4 = SchemaHelper.GetConverter <uint, TLabel>(out identity); break; case DataKind.Int32: var temp = SchemaHelper.GetConverter <TLabel, float>(out identity); mapperI4 = (in TLabel src, ref int dst) => { float v = 0f; temp(in src, ref v); dst = (int)v; }; var temp2 = SchemaHelper.GetConverter <float, TLabel>(out identity); mapperFromI4 = (in int src, ref TLabel dst) => { float v = (float)src; temp2(in v, ref dst); }; break; case DataKind.Single: mapperR4 = SchemaHelper.GetConverter <TLabel, float>(out identity); mapperFromR4 = SchemaHelper.GetConverter <float, TLabel>(out identity); break; default: throw Contracts.ExceptNotSupp("Unsupported kinds {0} --> {1}", _kind, _destKind); } }
public TypedConverters() { Init(SchemaHelper.GetColumnType <TLabel>().RawKind()); }
/// <summary> /// Constructor. /// </summary> /// <param name="inputSchema">existing schema</param> /// <param name="names">new columns</param> /// <param name="types">corresponding types</param> public TypeReplacementSchema(ISchema inputSchema, string[] names, DataViewType[] types) { _schemaInput = inputSchema; if (names == null || names.Length == 0) { throw Contracts.ExceptEmpty("The extended schema must contain new names."); } if (types == null || types.Length != names.Length) { throw Contracts.Except("names and types must have the same length."); } _types = new Dictionary <int, DataViewType>(); _mappedColumns = new Dictionary <int, int>(); Contracts.Assert(types.Length == names.Length); int index; for (int i = 0; i < names.Length; ++i) { if (!inputSchema.TryGetColumnIndex(names[i], out index)) { throw Contracts.Except("Unable to find column '{0}' in '{1}'", names[i], SchemaHelper.ToString(inputSchema)); } _types[index] = types[i]; _mappedColumns[inputSchema.ColumnCount + i] = index; } }