Esempio n. 1
0
        protected IDataView MapLabelsCore <T>(ColumnType type, RefPredicate <T> equalsTarget, RoleMappedData data, string dstName)
        {
            Host.AssertValue(type);
            Host.Assert(type.RawType == typeof(T));
            Host.AssertValue(equalsTarget);
            Host.AssertValue(data);
            Host.AssertValue(data.Schema.Label);
            Host.AssertNonWhiteSpace(dstName);

            var lab = data.Schema.Label;

            RefPredicate <T> isMissing;

            if (!Args.ImputeMissingLabelsAsNegative && Conversions.Instance.TryGetIsNAPredicate(type, out isMissing))
            {
                return(LambdaColumnMapper.Create(Host, "Label mapper", data.Data,
                                                 lab.Name, dstName, type, NumberType.Float,
                                                 (ref T src, ref Float dst) =>
                                                 dst = equalsTarget(ref src) ? 1 : (isMissing(ref src) ? Float.NaN : default(Float))));
            }
            return(LambdaColumnMapper.Create(Host, "Label mapper", data.Data,
                                             lab.Name, dstName, type, NumberType.Float,
                                             (ref T src, ref Float dst) =>
                                             dst = equalsTarget(ref src) ? 1 : default(Float)));
        }
Esempio n. 2
0
 protected TypedValue(RowCursor cursor, ValueGetter <T> getSrc, RefPredicate <T> hasBad)
     : base(cursor)
 {
     Contracts.AssertValue(getSrc);
     Contracts.AssertValue(hasBad);
     _getSrc = getSrc;
     _hasBad = hasBad;
 }
Esempio n. 3
0
        public static IDataView Create <TSrc>(IHostEnvironment env, string name, IDataView input,
                                              string src, ColumnType typeSrc, RefPredicate <TSrc> predicate)
        {
            Contracts.CheckValue(env, nameof(env));
            env.CheckNonEmpty(name, nameof(name));
            env.CheckValue(input, nameof(input));
            env.CheckNonEmpty(src, nameof(src));
            env.CheckValue(typeSrc, nameof(typeSrc));
            env.CheckValue(predicate, nameof(predicate));

            if (typeSrc.RawType != typeof(TSrc))
            {
                throw env.ExceptParam(nameof(predicate),
                                      "The source column type '{0}' doesn't match the input type of the predicate", typeSrc);
            }

            int  colSrc;
            bool tmp = input.Schema.TryGetColumnIndex(src, out colSrc);

            if (!tmp)
            {
                throw env.ExceptParam(nameof(src), "The input data doesn't have a column named '{0}'", src);
            }
            var typeOrig = input.Schema.GetColumnType(colSrc);

            // REVIEW: Ideally this should support vector-type conversion. It currently doesn't.
            bool     ident;
            Delegate conv;

            if (typeOrig.SameSizeAndItemType(typeSrc))
            {
                ident = true;
                conv  = null;
            }
            else if (!Conversions.Instance.TryGetStandardConversion(typeOrig, typeSrc, out conv, out ident))
            {
                throw env.ExceptParam(nameof(predicate),
                                      "The type of column '{0}', '{1}', cannot be converted to the input type of the predicate '{2}'",
                                      src, typeOrig, typeSrc);
            }

            IDataView impl;

            if (ident)
            {
                impl = new Impl <TSrc, TSrc>(env, name, input, colSrc, predicate);
            }
            else
            {
                Func <IHostEnvironment, string, IDataView, int,
                      RefPredicate <int>, ValueMapper <int, int>, Impl <int, int> > del = CreateImpl <int, int>;
                var meth = del.GetMethodInfo().GetGenericMethodDefinition()
                           .MakeGenericMethod(typeOrig.RawType, typeof(TSrc));
                impl = (IDataView)meth.Invoke(null, new object[] { env, name, input, colSrc, predicate, conv });
            }

            return(new OpaqueDataView(impl));
        }
Esempio n. 4
0
            public Impl(IHostEnvironment env, string name, IDataView input,
                        int colSrc, RefPredicate <T2> pred, ValueMapper <T1, T2> conv = null)
                : base(env, name, input)
            {
                Host.AssertValue(pred);
                Host.Assert(conv != null | typeof(T1) == typeof(T2));
                Host.Assert(0 <= colSrc & colSrc < Source.Schema.ColumnCount);

                _colSrc = colSrc;
                _pred   = pred;
                _conv   = conv;
            }
Esempio n. 5
0
        private BitArray ComputeDefaultSlots <T>(ColumnType type, T[] values)
        {
            Host.Assert(values.Length == type.VectorSize);
            BitArray         defaultSlots = new BitArray(values.Length);
            RefPredicate <T> defaultPred  = Conversions.Instance.GetIsDefaultPredicate <T>(type.ItemType);

            for (int slot = 0; slot < values.Length; slot++)
            {
                if (defaultPred(ref values[slot]))
                {
                    defaultSlots[slot] = true;
                }
            }
            return(defaultSlots);
        }
        /// <summary>
        /// Adds all NAs (or non-NAs) to the indices List.  Whether NAs or non-NAs have been added is indicated by the bool sense.
        /// </summary>
        private void FindNAs <T>(ref VBuffer <T> src, RefPredicate <T> isNA, bool defaultIsNA, List <int> indices, out bool sense)
        {
            Host.AssertValue(isNA);
            Host.AssertValue(indices);

            // Find the indices of all of the NAs.
            indices.Clear();
            var srcValues = src.Values;
            var srcCount  = src.Count;

            if (src.IsDense)
            {
                for (int i = 0; i < srcCount; i++)
                {
                    if (isNA(ref srcValues[i]))
                    {
                        indices.Add(i);
                    }
                }
                sense = true;
            }
            else if (!defaultIsNA)
            {
                var srcIndices = src.Indices;
                for (int ii = 0; ii < srcCount; ii++)
                {
                    if (isNA(ref srcValues[ii]))
                    {
                        indices.Add(srcIndices[ii]);
                    }
                }
                sense = true;
            }
            else
            {
                // Note that this adds non-NAs to indices -- this is indicated by sense being false.
                var srcIndices = src.Indices;
                for (int ii = 0; ii < srcCount; ii++)
                {
                    if (!isNA(ref srcValues[ii]))
                    {
                        indices.Add(srcIndices[ii]);
                    }
                }
                sense = false;
            }
        }
Esempio n. 7
0
        private object GetSpecifiedValue <T>(string srcStr, ColumnType dstType, RefPredicate <T> isNA)
        {
            var val = default(T);

            if (!string.IsNullOrEmpty(srcStr))
            {
                // Handles converting input strings to correct types.
                DvText srcTxt = new DvText(srcStr);
                bool   identity;
                var    strToT = Conversions.Instance.GetStandardConversion <DvText, T>(TextType.Instance, dstType.ItemType, out identity);
                strToT(ref srcTxt, ref val);
                // Make sure that the srcTxt can legitimately be converted to dstType, throw error otherwise.
                if (isNA(ref val))
                {
                    throw Contracts.Except("No conversion of '{0}' to '{1}'", srcStr, dstType.ItemType);
                }
            }

            return(val);
        }
Esempio n. 8
0
        private T[] GetValuesArray <T>(VBuffer <T> src, ColumnType srcType, int iinfo)
        {
            Host.Assert(srcType.IsVector);
            Host.Assert(srcType.VectorSize == src.Length);
            VBufferUtils.Densify <T>(ref src);
            RefPredicate <T> defaultPred = Conversions.Instance.GetIsDefaultPredicate <T>(srcType.ItemType);

            _repIsDefault[iinfo] = new BitArray(srcType.VectorSize);
            for (int slot = 0; slot < src.Length; slot++)
            {
                if (defaultPred(ref src.Values[slot]))
                {
                    _repIsDefault[iinfo][slot] = true;
                }
            }
            T[] valReturn = src.Values;
            Array.Resize <T>(ref valReturn, srcType.VectorSize);
            Host.Assert(valReturn.Length == src.Length);
            return(valReturn);
        }
Esempio n. 9
0
 public RowCursor(Impl <T1, T2> parent, IRowCursor input, bool[] active)
     : base(parent.Host, input, parent.Schema, active)
 {
     _getSrc = Input.GetGetter <T1>(parent._colSrc);
     if (parent._conv == null)
     {
         Ch.Assert(typeof(T1) == typeof(T2));
         _pred = (RefPredicate <T1>)(Delegate) parent._pred;
     }
     else
     {
         T2  val  = default(T2);
         var pred = parent._pred;
         var conv = parent._conv;
         _pred =
             (ref T1 src) =>
         {
             conv(ref _src, ref val);
             return(pred(ref val));
         };
     }
 }
Esempio n. 10
0
 public ValueVec(RowCursor cursor, ValueGetter <VBuffer <T> > getSrc, RefPredicate <VBuffer <T> > hasBad)
     : base(cursor, getSrc, hasBad)
 {
     _getter = GetValue;
 }
Esempio n. 11
0
 public ValueOne(RowCursor cursor, ValueGetter <T> getSrc, RefPredicate <T> hasBad)
     : base(cursor, getSrc, hasBad)
 {
     _getter = GetValue;
 }
Esempio n. 12
0
        /// <summary>
        ///  Fills values for vectors where there is slot-wise replacement values.
        /// </summary>
        private void FillValues <T>(ref VBuffer <T> src, ref VBuffer <T> dst, RefPredicate <T> isNA, T[] rep, BitArray repIsDefault)
        {
            Host.AssertValue(rep);
            Host.Assert(rep.Length == src.Length);
            Host.AssertValue(repIsDefault);
            Host.Assert(repIsDefault.Length == src.Length);
            Host.AssertValue(isNA);

            int srcSize   = src.Length;
            int srcCount  = src.Count;
            var srcValues = src.Values;

            Host.Assert(Utils.Size(srcValues) >= srcCount);
            var srcIndices = src.Indices;

            var dstValues  = dst.Values;
            var dstIndices = dst.Indices;

            // If the values array is not large enough, allocate sufficient space.
            Utils.EnsureSize(ref dstValues, srcCount, srcSize, keepOld: false);

            int iivDst = 0;

            Host.Assert(Utils.Size(srcValues) >= srcCount);
            if (src.IsDense)
            {
                // The source vector is dense.
                Host.Assert(srcSize == srcCount);

                for (int ivSrc = 0; ivSrc < srcCount; ivSrc++)
                {
                    var srcVal = srcValues[ivSrc];

                    // The output for dense inputs is always dense.
                    // Note: Theoretically, one could imagine a dataset with NA values that one wished to replace with
                    // the default value, resulting in more than half of the indices being the default value.
                    // In this case, changing the dst vector to be sparse would be more memory efficient -- the current decision
                    // is it is not worth handling this case at the expense of running checks that will almost always not be triggered.
                    dstValues[ivSrc] = isNA(ref srcVal) ? rep[ivSrc] : srcVal;
                }
                iivDst = srcCount;
            }
            else
            {
                // The source vector is sparse.
                Host.Assert(Utils.Size(srcIndices) >= srcCount);
                Host.Assert(srcCount < srcSize);

                // Allocate more space if necessary.
                // REVIEW: One thing that changing the code to simply ensure that there are srcCount indices in the arrays
                // does is over-allocate space if the replacement value is the default value in a dataset with a
                // signficiant amount of NA values -- is it worth handling allocation of memory for this case?
                Utils.EnsureSize(ref dstIndices, srcCount, srcSize, keepOld: false);

                // Note: ivPrev is only used for asserts.
                int ivPrev = -1;
                for (int iivSrc = 0; iivSrc < srcCount; iivSrc++)
                {
                    Host.Assert(iivDst <= iivSrc);
                    var srcVal = srcValues[iivSrc];
                    int iv     = srcIndices[iivSrc];
                    Host.Assert(ivPrev < iv & iv < srcSize);
                    ivPrev = iv;

                    if (!isNA(ref srcVal))
                    {
                        dstValues[iivDst]    = srcVal;
                        dstIndices[iivDst++] = iv;
                    }
                    else if (!repIsDefault[iv])
                    {
                        // Allow for further sparsification.
                        dstValues[iivDst]    = rep[iv];
                        dstIndices[iivDst++] = iv;
                    }
                }
                Host.Assert(iivDst <= srcCount);
            }
            Host.Assert(0 <= iivDst);
            dst = new VBuffer <T>(srcSize, iivDst, dstValues, dstIndices);
        }
Esempio n. 13
0
        private void DropNAs <TDst>(ref VBuffer <TDst> src, ref VBuffer <TDst> dst, RefPredicate <TDst> isNA)
        {
            Host.AssertValue(isNA);

            int newCount = 0;

            for (int i = 0; i < src.Count; i++)
            {
                if (!isNA(ref src.Values[i]))
                {
                    newCount++;
                }
            }
            Host.Assert(newCount <= src.Count);

            if (newCount == 0)
            {
                dst = new VBuffer <TDst>(src.Length - src.Count, 0, dst.Values, dst.Indices);
                return;
            }

            if (newCount == src.Count)
            {
                Utils.Swap(ref src, ref dst);
                return;
            }

            var values = dst.Values;

            if (Utils.Size(values) < newCount)
            {
                values = new TDst[newCount];
            }

            int iDst = 0;

            if (src.IsDense)
            {
                for (int i = 0; i < src.Count; i++)
                {
                    if (!isNA(ref src.Values[i]))
                    {
                        values[iDst] = src.Values[i];
                        iDst++;
                    }
                }
                Host.Assert(iDst == newCount);
                dst = new VBuffer <TDst>(newCount, values, dst.Indices);
            }
            else
            {
                var indices = dst.Indices;
                if (Utils.Size(indices) < newCount)
                {
                    indices = new int[newCount];
                }

                int offset = 0;
                for (int i = 0; i < src.Count; i++)
                {
                    if (!isNA(ref src.Values[i]))
                    {
                        values[iDst]  = src.Values[i];
                        indices[iDst] = src.Indices[i] - offset;
                        iDst++;
                    }
                    else
                    {
                        offset++;
                    }
                }
                Host.Assert(iDst == newCount);
                Host.Assert(offset == src.Count - newCount);
                dst = new VBuffer <TDst>(src.Length - offset, newCount, values, indices);
            }
        }
Esempio n. 14
0
        private void DropNAsAndDefaults <TDst>(ref VBuffer <TDst> src, ref VBuffer <TDst> dst, RefPredicate <TDst> isNA)
        {
            Host.AssertValue(isNA);

            int newCount = 0;

            for (int i = 0; i < src.Count; i++)
            {
                if (!isNA(ref src.Values[i]))
                {
                    newCount++;
                }
            }
            Host.Assert(newCount <= src.Count);

            if (newCount == 0)
            {
                dst = new VBuffer <TDst>(0, dst.Values, dst.Indices);
                return;
            }

            if (newCount == src.Count)
            {
                Utils.Swap(ref src, ref dst);
                if (!dst.IsDense)
                {
                    Host.Assert(dst.Count == newCount);
                    dst = new VBuffer <TDst>(dst.Count, dst.Values, dst.Indices);
                }
                return;
            }

            int iDst   = 0;
            var values = dst.Values;

            if (Utils.Size(values) < newCount)
            {
                values = new TDst[newCount];
            }

            // Densifying sparse vectors since default value equals NA and hence should be dropped.
            for (int i = 0; i < src.Count; i++)
            {
                if (!isNA(ref src.Values[i]))
                {
                    values[iDst++] = src.Values[i];
                }
            }
            Host.Assert(iDst == newCount);

            dst = new VBuffer <TDst>(newCount, values, dst.Indices);
        }
Esempio n. 15
0
 private static Impl <T1, T2> CreateImpl <T1, T2>(
     IHostEnvironment env, string name, IDataView input, int colSrc,
     RefPredicate <T2> pred, ValueMapper <T1, T2> conv)
 {
     return(new Impl <T1, T2>(env, name, input, colSrc, pred, conv));
 }