/// <summary>
 /// Creation of the pipeline knowing parameters _inDataFrame, _cacheFile, _reuse.
 /// </summary>
 protected IDataTransform CreatePipeline(IHostEnvironment env, IDataView input)
 {
     if (_inDataFrame)
     {
         if (_async)
         {
             var view = new CacheDataView(env, input, null);
             var tr   = new PassThroughTransform(env, new PassThroughTransform.Arguments(), view);
             return(tr);
         }
         else
         {
             var args = new SortInDataFrameTransform.Arguments()
             {
                 numThreads = _numThreads, sortColumn = null
             };
             var tr = new SortInDataFrameTransform(env, args, input);
             return(tr);
         }
     }
     else
     {
         string nt = _numThreads > 0 ? string.Format("{{t={0}}}", _numThreads) : string.Empty;
         using (var ch = Host.Start("Caching data..."))
         {
             if (_reuse && File.Exists(_cacheFile))
             {
                 ch.Info(MessageSensitivity.UserData, "Reusing cache '{0}'", _cacheFile);
             }
             else
             {
                 ch.Info(MessageSensitivity.UserData, "Building cache '{0}'", _cacheFile);
                 var saver = ComponentCreation.CreateSaver(env, _saverSettings);
                 using (var fs0 = Host.CreateOutputFile(_cacheFile))
                     DataSaverUtils.SaveDataView(ch, saver, input, fs0, true);
             }
         }
         var loader = ComponentCreation.CreateLoader(env, string.Format("binary{{{0}}}", nt),
                                                     new MultiFileSource(_cacheFile));
         SchemaHelper.CheckSchema(Host, input.Schema, loader.Schema);
         var copy = ComponentCreation.CreateTransform(env, "skip{s=0}", loader);
         return(copy);
     }
 }
Ejemplo n.º 2
0
        /// <summary>
        /// Constructor.
        /// </summary>
        /// <param name="inputSchema">existing schema</param>
        /// <param name="names">new columns</param>
        /// <param name="types">corresponding types</param>
        public TypeReplacementSchema(DataViewSchema inputSchema, string[] names, DataViewType[] types)
        {
            _schemaInput = new ExtendedSchema(inputSchema);
            if (names == null || names.Length == 0)
            {
                throw Contracts.ExceptEmpty("The extended schema must contain new names.");
            }
            if (types == null || types.Length != names.Length)
            {
                throw Contracts.Except("names and types must have the same length.");
            }
            _types         = new Dictionary <int, DataViewType>();
            _mappedColumns = new Dictionary <int, int>();
            Contracts.Assert(types.Length == names.Length);
            int index;

            for (int i = 0; i < names.Length; ++i)
            {
                index         = SchemaHelper.GetColumnIndex(inputSchema, names[i]);
                _types[index] = types[i];
                _mappedColumns[inputSchema.Count + i] = index;
            }
        }
Ejemplo n.º 3
0
        public static Delegate GetGetterChoice <T1, T2, T3>(DataViewRowCursor cur, DataViewSchema.Column col)
        {
            Delegate res = null;

            try
            {
                res = cur.GetGetter <T1>(col);
                if (res != null)
                {
                    return(res);
                }
            }
            catch (Exception)
            {
            }
            try
            {
                res = cur.GetGetter <T2>(col);
                if (res != null)
                {
                    return(res);
                }
            }
            catch (Exception)
            {
            }
            try
            {
                res = cur.GetGetter <T3>(col);
                if (res != null)
                {
                    return(res);
                }
            }
            catch (Exception)
            {
            }
            if (res == null)
            {
                throw Contracts.ExceptNotImpl($"Unable to get a getter for column {col} of type {typeof(T1)} or {typeof(T2)} or {typeof(T3)} from schema\n{SchemaHelper.ToString(cur.Schema)}.");
            }
            return(res);
        }
Ejemplo n.º 4
0
        /// <summary>
        /// Returns the getters for all columns.
        /// </summary>
        public static Delegate[] GetAllGetters(DataViewRowCursor cur)
        {
            var sch = cur.Schema;
            var res = new List <Delegate>();

            for (int i = 0; i < sch.Count; ++i)
            {
                if (sch[i].IsHidden)
                {
                    continue;
                }
                var getter = GetColumnGetter(cur, SchemaHelper._dc(i, cur), sch);
                if (getter == null)
                {
                    throw Contracts.Except($"Unable to get getter for column {i} from schema\n{SchemaHelper.ToString(sch)}.");
                }
                res.Add(getter);
            }
            return(res.ToArray());
        }
Ejemplo n.º 5
0
        /// <summary>
        /// Parses a schema and creates an ExtendedSchema for it.
        /// </summary>
        public static ExtendedSchema Parse(string schema, IChannel ch = null)
        {
            var cols   = schema.Split(new char[] { ' ', ';' }, StringSplitOptions.RemoveEmptyEntries);
            var tlcols = new List <TextLoader.Column>();

            for (int i = 0; i < cols.Length; ++i)
            {
                if (cols[i].StartsWith("col="))
                {
                    cols[i] = cols[i].Substring(4);
                }
                if (cols[i].Contains(":Vec<"))
                {
                    var dot = cols[i].Split(':');
                    if (dot.Length != 3)
                    {
                        throw ch != null?ch.Except("Expects 3 parts in '{0}'", cols[i])
                                  : Contracts.Except("Expects 3 parts in '{0}'", cols[i]);
                    }
                    if (!dot[1].StartsWith("Vec<"))
                    {
                        throw ch != null?ch.Except("Unable to parse '{0}'", cols[i])
                                  : Contracts.Except("Unable to parse '{0}'", cols[i]);
                    }
                    if (!dot[1].EndsWith(">"))
                    {
                        throw ch != null?ch.Except("Unable to parse '{0}'", cols[i])
                                  : Contracts.Except("Unable to parse '{0}'", cols[i]);
                    }
                    var temp = dot[1].Substring(4);
                    temp = temp.Substring(0, temp.Length - 1);
                    var splc = temp.Split(',');
                    if (splc.Length != 2)
                    {
                        throw ch != null?ch.Except("Unable to parse '{0}'", cols[i])
                                  : Contracts.Except("Unable to parse '{0}'", cols[i]);
                    }
                    int last, nb;
                    try
                    {
                        last = int.Parse(dot[2]);
                        nb   = int.Parse(splc[1]);
                    }
                    catch
                    {
                        throw ch != null?ch.Except("Unable to parse '{0}'", cols[i])
                                  : Contracts.Except("Unable to parse '{0}'", cols[i]);
                    }
                    dot[1]  = splc[0];
                    dot[2]  = string.Format("{0}-{1}", last, last + nb - 1);
                    cols[i] = string.Format("{0}:{1}:{2}", dot[0], dot[1], dot[2]);
                }
                else if (cols[i].Contains(":Key<"))
                {
                    var dot = cols[i].Split(':');
                    if (dot.Length == 4)
                    {
                        // GroupId:Key<U4, Min:0>:8
                        dot = new[] { dot[0], dot[1] + ":" + dot[2], dot[3] };
                        if (dot[1].Contains(",Min:0>"))
                        {
                            dot[1] = dot[1].Replace(",Min:0>", ",0-*>");
                        }
                        else
                        {
                            throw ch != null?ch.Except("Expects 3 parts in '{0}'", cols[i])
                                      : Contracts.Except("Expects 3 parts in '{0}'", cols[i]);
                        }
                    }
                    if (dot.Length != 3)
                    {
                        throw ch != null?ch.Except("Expects 3 parts in '{0}'", cols[i])
                                  : Contracts.Except("Expects 3 parts in '{0}'", cols[i]);
                    }
                    if (!dot[1].StartsWith("Key<"))
                    {
                        throw ch != null?ch.Except("Unable to parse '{0}'", cols[i])
                                  : Contracts.Except("Unable to parse '{0}'", cols[i]);
                    }
                    if (!dot[1].EndsWith(">"))
                    {
                        throw ch != null?ch.Except("Unable to parse '{0}'", cols[i])
                                  : Contracts.Except("Unable to parse '{0}'", cols[i]);
                    }
                    var temp = dot[1].Substring(4);
                    temp = temp.Substring(0, temp.Length - 1);
                    var splc = temp.Split(',');
                    if (splc.Length != 2)
                    {
                        throw ch != null?ch.Except("Unable to parse '{0}'", cols[i])
                                  : Contracts.Except("Unable to parse '{0}'", cols[i]);
                    }
                    dot[1]  = string.Format("{0}[{1}]", splc[0], splc[1]);
                    cols[i] = string.Format("{0}:{1}:{2}", dot[0], dot[1], dot[2]);
                }
                var t = TextLoader.Column.Parse(cols[i]);
                if (t == null)
                {
                    t = TextLoader.Column.Parse(string.Format("{0}:{1}", cols[i], i));
                }
                if (t == null)
                {
                    throw ch != null?ch.Except("Unable to parse '{0}' or '{1}'", cols[i], string.Format("{0}:{1}", cols[i], i))
                              : Contracts.Except("Unable to parse '{0}' or '{1}'", cols[i], string.Format("{0}:{1}", cols[i], i));
                }
                tlcols.Add(t);
            }
            return(new ExtendedSchema(null, tlcols.Select(c => c.Name).ToArray(),
                                      tlcols.Select(c => SchemaHelper.Convert(c, ch)).ToArray()));
        }
Ejemplo n.º 6
0
        void Init(DataKind destKind)
        {
            _kind     = SchemaHelper.GetColumnType <TLabel>().RawKind();
            _destKind = destKind;

            mapperBL = null;
            mapperU1 = null;
            mapperU2 = null;
            mapperU4 = null;
            mapperI4 = null;
            mapperR4 = null;

            mapperFromBL = null;
            mapperFromU1 = null;
            mapperFromU2 = null;
            mapperFromU4 = null;
            mapperFromI4 = null;
            mapperFromR4 = null;

            switch (destKind)
            {
            case DataKind.Boolean:
                mapperBL     = SchemaHelper.GetConverter <TLabel, bool>(out identity);
                mapperFromBL = SchemaHelper.GetConverter <bool, TLabel>(out identity);
                break;

            case DataKind.SByte:
                mapperU1     = SchemaHelper.GetConverter <TLabel, byte>(out identity);
                mapperFromU1 = SchemaHelper.GetConverter <byte, TLabel>(out identity);
                break;

            case DataKind.UInt16:
                mapperU2     = SchemaHelper.GetConverter <TLabel, ushort>(out identity);
                mapperFromU2 = SchemaHelper.GetConverter <ushort, TLabel>(out identity);
                break;

            case DataKind.UInt32:
                mapperU4     = SchemaHelper.GetConverter <TLabel, uint>(out identity);
                mapperFromU4 = SchemaHelper.GetConverter <uint, TLabel>(out identity);
                break;

            case DataKind.Int32:
                var temp = SchemaHelper.GetConverter <TLabel, float>(out identity);
                mapperI4 = (in TLabel src, ref int dst) =>
                {
                    float v = 0f;
                    temp(in src, ref v);
                    dst = (int)v;
                };
                var temp2 = SchemaHelper.GetConverter <float, TLabel>(out identity);
                mapperFromI4 = (in int src, ref TLabel dst) =>
                {
                    float v = (float)src;
                    temp2(in v, ref dst);
                };
                break;

            case DataKind.Single:
                mapperR4     = SchemaHelper.GetConverter <TLabel, float>(out identity);
                mapperFromR4 = SchemaHelper.GetConverter <float, TLabel>(out identity);
                break;

            default:
                throw Contracts.ExceptNotSupp("Unsupported kinds {0} --> {1}", _kind, _destKind);
            }
        }
Ejemplo n.º 7
0
 public TypedConverters()
 {
     Init(SchemaHelper.GetColumnType <TLabel>().RawKind());
 }
Ejemplo n.º 8
0
        /// <summary>
        /// Constructor.
        /// </summary>
        /// <param name="inputSchema">existing schema</param>
        /// <param name="names">new columns</param>
        /// <param name="types">corresponding types</param>
        public TypeReplacementSchema(ISchema inputSchema, string[] names, DataViewType[] types)
        {
            _schemaInput = inputSchema;
            if (names == null || names.Length == 0)
            {
                throw Contracts.ExceptEmpty("The extended schema must contain new names.");
            }
            if (types == null || types.Length != names.Length)
            {
                throw Contracts.Except("names and types must have the same length.");
            }
            _types         = new Dictionary <int, DataViewType>();
            _mappedColumns = new Dictionary <int, int>();
            Contracts.Assert(types.Length == names.Length);
            int index;

            for (int i = 0; i < names.Length; ++i)
            {
                if (!inputSchema.TryGetColumnIndex(names[i], out index))
                {
                    throw Contracts.Except("Unable to find column '{0}' in '{1}'", names[i], SchemaHelper.ToString(inputSchema));
                }
                _types[index] = types[i];
                _mappedColumns[inputSchema.ColumnCount + i] = index;
            }
        }