protected IDataView MapLabelsCore <T>(ColumnType type, RefPredicate <T> equalsTarget, RoleMappedData data, string dstName) { Host.AssertValue(type); Host.Assert(type.RawType == typeof(T)); Host.AssertValue(equalsTarget); Host.AssertValue(data); Host.AssertValue(data.Schema.Label); Host.AssertNonWhiteSpace(dstName); var lab = data.Schema.Label; RefPredicate <T> isMissing; if (!Args.ImputeMissingLabelsAsNegative && Conversions.Instance.TryGetIsNAPredicate(type, out isMissing)) { return(LambdaColumnMapper.Create(Host, "Label mapper", data.Data, lab.Name, dstName, type, NumberType.Float, (ref T src, ref Float dst) => dst = equalsTarget(ref src) ? 1 : (isMissing(ref src) ? Float.NaN : default(Float)))); } return(LambdaColumnMapper.Create(Host, "Label mapper", data.Data, lab.Name, dstName, type, NumberType.Float, (ref T src, ref Float dst) => dst = equalsTarget(ref src) ? 1 : default(Float))); }
protected TypedValue(RowCursor cursor, ValueGetter <T> getSrc, RefPredicate <T> hasBad) : base(cursor) { Contracts.AssertValue(getSrc); Contracts.AssertValue(hasBad); _getSrc = getSrc; _hasBad = hasBad; }
public static IDataView Create <TSrc>(IHostEnvironment env, string name, IDataView input, string src, ColumnType typeSrc, RefPredicate <TSrc> predicate) { Contracts.CheckValue(env, nameof(env)); env.CheckNonEmpty(name, nameof(name)); env.CheckValue(input, nameof(input)); env.CheckNonEmpty(src, nameof(src)); env.CheckValue(typeSrc, nameof(typeSrc)); env.CheckValue(predicate, nameof(predicate)); if (typeSrc.RawType != typeof(TSrc)) { throw env.ExceptParam(nameof(predicate), "The source column type '{0}' doesn't match the input type of the predicate", typeSrc); } int colSrc; bool tmp = input.Schema.TryGetColumnIndex(src, out colSrc); if (!tmp) { throw env.ExceptParam(nameof(src), "The input data doesn't have a column named '{0}'", src); } var typeOrig = input.Schema.GetColumnType(colSrc); // REVIEW: Ideally this should support vector-type conversion. It currently doesn't. bool ident; Delegate conv; if (typeOrig.SameSizeAndItemType(typeSrc)) { ident = true; conv = null; } else if (!Conversions.Instance.TryGetStandardConversion(typeOrig, typeSrc, out conv, out ident)) { throw env.ExceptParam(nameof(predicate), "The type of column '{0}', '{1}', cannot be converted to the input type of the predicate '{2}'", src, typeOrig, typeSrc); } IDataView impl; if (ident) { impl = new Impl <TSrc, TSrc>(env, name, input, colSrc, predicate); } else { Func <IHostEnvironment, string, IDataView, int, RefPredicate <int>, ValueMapper <int, int>, Impl <int, int> > del = CreateImpl <int, int>; var meth = del.GetMethodInfo().GetGenericMethodDefinition() .MakeGenericMethod(typeOrig.RawType, typeof(TSrc)); impl = (IDataView)meth.Invoke(null, new object[] { env, name, input, colSrc, predicate, conv }); } return(new OpaqueDataView(impl)); }
public Impl(IHostEnvironment env, string name, IDataView input, int colSrc, RefPredicate <T2> pred, ValueMapper <T1, T2> conv = null) : base(env, name, input) { Host.AssertValue(pred); Host.Assert(conv != null | typeof(T1) == typeof(T2)); Host.Assert(0 <= colSrc & colSrc < Source.Schema.ColumnCount); _colSrc = colSrc; _pred = pred; _conv = conv; }
private BitArray ComputeDefaultSlots <T>(ColumnType type, T[] values) { Host.Assert(values.Length == type.VectorSize); BitArray defaultSlots = new BitArray(values.Length); RefPredicate <T> defaultPred = Conversions.Instance.GetIsDefaultPredicate <T>(type.ItemType); for (int slot = 0; slot < values.Length; slot++) { if (defaultPred(ref values[slot])) { defaultSlots[slot] = true; } } return(defaultSlots); }
/// <summary> /// Adds all NAs (or non-NAs) to the indices List. Whether NAs or non-NAs have been added is indicated by the bool sense. /// </summary> private void FindNAs <T>(ref VBuffer <T> src, RefPredicate <T> isNA, bool defaultIsNA, List <int> indices, out bool sense) { Host.AssertValue(isNA); Host.AssertValue(indices); // Find the indices of all of the NAs. indices.Clear(); var srcValues = src.Values; var srcCount = src.Count; if (src.IsDense) { for (int i = 0; i < srcCount; i++) { if (isNA(ref srcValues[i])) { indices.Add(i); } } sense = true; } else if (!defaultIsNA) { var srcIndices = src.Indices; for (int ii = 0; ii < srcCount; ii++) { if (isNA(ref srcValues[ii])) { indices.Add(srcIndices[ii]); } } sense = true; } else { // Note that this adds non-NAs to indices -- this is indicated by sense being false. var srcIndices = src.Indices; for (int ii = 0; ii < srcCount; ii++) { if (!isNA(ref srcValues[ii])) { indices.Add(srcIndices[ii]); } } sense = false; } }
private object GetSpecifiedValue <T>(string srcStr, ColumnType dstType, RefPredicate <T> isNA) { var val = default(T); if (!string.IsNullOrEmpty(srcStr)) { // Handles converting input strings to correct types. DvText srcTxt = new DvText(srcStr); bool identity; var strToT = Conversions.Instance.GetStandardConversion <DvText, T>(TextType.Instance, dstType.ItemType, out identity); strToT(ref srcTxt, ref val); // Make sure that the srcTxt can legitimately be converted to dstType, throw error otherwise. if (isNA(ref val)) { throw Contracts.Except("No conversion of '{0}' to '{1}'", srcStr, dstType.ItemType); } } return(val); }
private T[] GetValuesArray <T>(VBuffer <T> src, ColumnType srcType, int iinfo) { Host.Assert(srcType.IsVector); Host.Assert(srcType.VectorSize == src.Length); VBufferUtils.Densify <T>(ref src); RefPredicate <T> defaultPred = Conversions.Instance.GetIsDefaultPredicate <T>(srcType.ItemType); _repIsDefault[iinfo] = new BitArray(srcType.VectorSize); for (int slot = 0; slot < src.Length; slot++) { if (defaultPred(ref src.Values[slot])) { _repIsDefault[iinfo][slot] = true; } } T[] valReturn = src.Values; Array.Resize <T>(ref valReturn, srcType.VectorSize); Host.Assert(valReturn.Length == src.Length); return(valReturn); }
public RowCursor(Impl <T1, T2> parent, IRowCursor input, bool[] active) : base(parent.Host, input, parent.Schema, active) { _getSrc = Input.GetGetter <T1>(parent._colSrc); if (parent._conv == null) { Ch.Assert(typeof(T1) == typeof(T2)); _pred = (RefPredicate <T1>)(Delegate) parent._pred; } else { T2 val = default(T2); var pred = parent._pred; var conv = parent._conv; _pred = (ref T1 src) => { conv(ref _src, ref val); return(pred(ref val)); }; } }
public ValueVec(RowCursor cursor, ValueGetter <VBuffer <T> > getSrc, RefPredicate <VBuffer <T> > hasBad) : base(cursor, getSrc, hasBad) { _getter = GetValue; }
public ValueOne(RowCursor cursor, ValueGetter <T> getSrc, RefPredicate <T> hasBad) : base(cursor, getSrc, hasBad) { _getter = GetValue; }
/// <summary> /// Fills values for vectors where there is slot-wise replacement values. /// </summary> private void FillValues <T>(ref VBuffer <T> src, ref VBuffer <T> dst, RefPredicate <T> isNA, T[] rep, BitArray repIsDefault) { Host.AssertValue(rep); Host.Assert(rep.Length == src.Length); Host.AssertValue(repIsDefault); Host.Assert(repIsDefault.Length == src.Length); Host.AssertValue(isNA); int srcSize = src.Length; int srcCount = src.Count; var srcValues = src.Values; Host.Assert(Utils.Size(srcValues) >= srcCount); var srcIndices = src.Indices; var dstValues = dst.Values; var dstIndices = dst.Indices; // If the values array is not large enough, allocate sufficient space. Utils.EnsureSize(ref dstValues, srcCount, srcSize, keepOld: false); int iivDst = 0; Host.Assert(Utils.Size(srcValues) >= srcCount); if (src.IsDense) { // The source vector is dense. Host.Assert(srcSize == srcCount); for (int ivSrc = 0; ivSrc < srcCount; ivSrc++) { var srcVal = srcValues[ivSrc]; // The output for dense inputs is always dense. // Note: Theoretically, one could imagine a dataset with NA values that one wished to replace with // the default value, resulting in more than half of the indices being the default value. // In this case, changing the dst vector to be sparse would be more memory efficient -- the current decision // is it is not worth handling this case at the expense of running checks that will almost always not be triggered. dstValues[ivSrc] = isNA(ref srcVal) ? rep[ivSrc] : srcVal; } iivDst = srcCount; } else { // The source vector is sparse. Host.Assert(Utils.Size(srcIndices) >= srcCount); Host.Assert(srcCount < srcSize); // Allocate more space if necessary. // REVIEW: One thing that changing the code to simply ensure that there are srcCount indices in the arrays // does is over-allocate space if the replacement value is the default value in a dataset with a // signficiant amount of NA values -- is it worth handling allocation of memory for this case? Utils.EnsureSize(ref dstIndices, srcCount, srcSize, keepOld: false); // Note: ivPrev is only used for asserts. int ivPrev = -1; for (int iivSrc = 0; iivSrc < srcCount; iivSrc++) { Host.Assert(iivDst <= iivSrc); var srcVal = srcValues[iivSrc]; int iv = srcIndices[iivSrc]; Host.Assert(ivPrev < iv & iv < srcSize); ivPrev = iv; if (!isNA(ref srcVal)) { dstValues[iivDst] = srcVal; dstIndices[iivDst++] = iv; } else if (!repIsDefault[iv]) { // Allow for further sparsification. dstValues[iivDst] = rep[iv]; dstIndices[iivDst++] = iv; } } Host.Assert(iivDst <= srcCount); } Host.Assert(0 <= iivDst); dst = new VBuffer <T>(srcSize, iivDst, dstValues, dstIndices); }
private void DropNAs <TDst>(ref VBuffer <TDst> src, ref VBuffer <TDst> dst, RefPredicate <TDst> isNA) { Host.AssertValue(isNA); int newCount = 0; for (int i = 0; i < src.Count; i++) { if (!isNA(ref src.Values[i])) { newCount++; } } Host.Assert(newCount <= src.Count); if (newCount == 0) { dst = new VBuffer <TDst>(src.Length - src.Count, 0, dst.Values, dst.Indices); return; } if (newCount == src.Count) { Utils.Swap(ref src, ref dst); return; } var values = dst.Values; if (Utils.Size(values) < newCount) { values = new TDst[newCount]; } int iDst = 0; if (src.IsDense) { for (int i = 0; i < src.Count; i++) { if (!isNA(ref src.Values[i])) { values[iDst] = src.Values[i]; iDst++; } } Host.Assert(iDst == newCount); dst = new VBuffer <TDst>(newCount, values, dst.Indices); } else { var indices = dst.Indices; if (Utils.Size(indices) < newCount) { indices = new int[newCount]; } int offset = 0; for (int i = 0; i < src.Count; i++) { if (!isNA(ref src.Values[i])) { values[iDst] = src.Values[i]; indices[iDst] = src.Indices[i] - offset; iDst++; } else { offset++; } } Host.Assert(iDst == newCount); Host.Assert(offset == src.Count - newCount); dst = new VBuffer <TDst>(src.Length - offset, newCount, values, indices); } }
private void DropNAsAndDefaults <TDst>(ref VBuffer <TDst> src, ref VBuffer <TDst> dst, RefPredicate <TDst> isNA) { Host.AssertValue(isNA); int newCount = 0; for (int i = 0; i < src.Count; i++) { if (!isNA(ref src.Values[i])) { newCount++; } } Host.Assert(newCount <= src.Count); if (newCount == 0) { dst = new VBuffer <TDst>(0, dst.Values, dst.Indices); return; } if (newCount == src.Count) { Utils.Swap(ref src, ref dst); if (!dst.IsDense) { Host.Assert(dst.Count == newCount); dst = new VBuffer <TDst>(dst.Count, dst.Values, dst.Indices); } return; } int iDst = 0; var values = dst.Values; if (Utils.Size(values) < newCount) { values = new TDst[newCount]; } // Densifying sparse vectors since default value equals NA and hence should be dropped. for (int i = 0; i < src.Count; i++) { if (!isNA(ref src.Values[i])) { values[iDst++] = src.Values[i]; } } Host.Assert(iDst == newCount); dst = new VBuffer <TDst>(newCount, values, dst.Indices); }
private static Impl <T1, T2> CreateImpl <T1, T2>( IHostEnvironment env, string name, IDataView input, int colSrc, RefPredicate <T2> pred, ValueMapper <T1, T2> conv) { return(new Impl <T1, T2>(env, name, input, colSrc, pred, conv)); }