private ImageResizerTransform(IHost host, ModelLoadContext ctx, IDataView input) : base(host, ctx, input, t => t is ImageType ? null : "Expected Image type") { Host.AssertValue(ctx); // *** Binary format *** // <prefix handled in static Create method> // <base> // for each added column // int: width // int: height // byte: scaling kind Host.AssertNonEmpty(Infos); _exes = new ColInfoEx[Infos.Length]; for (int i = 0; i < _exes.Length; i++) { int width = ctx.Reader.ReadInt32(); Host.CheckDecode(width > 0); int height = ctx.Reader.ReadInt32(); Host.CheckDecode(height > 0); var scale = (ResizingKind)ctx.Reader.ReadByte(); Host.CheckDecode(Enum.IsDefined(typeof(ResizingKind), scale)); var anchor = (Anchor)ctx.Reader.ReadByte(); Host.CheckDecode(Enum.IsDefined(typeof(Anchor), anchor)); _exes[i] = new ColInfoEx(width, height, scale, anchor); } Metadata.Seal(); }
/// <summary> /// Public constructor corresponding to SignatureDataTransform. /// </summary> public WhiteningTransform(IHostEnvironment env, Arguments args, IDataView input) : base(env, RegistrationName, Contracts.CheckRef(args, nameof(args)).Column, input, TestColumn) { Host.AssertNonEmpty(Infos); Host.Assert(Infos.Length == Utils.Size(args.Column)); _exes = new ColInfoEx[Infos.Length]; for (int i = 0; i < _exes.Length; i++) { _exes[i] = new ColInfoEx(args.Column[i], args, Infos[i]); } using (var ch = Host.Start("Training")) { // The training process will load all data into memory and perform whitening process // for each resulting column separately. _models = new Float[Infos.Length][]; InvModels = new Float[Infos.Length][]; int[] rowCounts; var columnData = LoadDataAsDense(ch, out rowCounts); TrainModels(columnData, rowCounts, ch); } Metadata.Seal(); }
private WhiteningTransform(IHost host, ModelLoadContext ctx, IDataView input) : base(host, ctx, input, TestColumn) { Host.AssertValue(ctx); // *** Binary format *** // <prefix handled in static Create method> // <base> // foreach added column // ColInfoEx // foreach model // whitening matrix // recovery matrix Host.AssertNonEmpty(Infos); _exes = new ColInfoEx[Infos.Length]; for (int i = 0; i < _exes.Length; i++) { _exes[i] = new ColInfoEx(ctx, Infos[i]); } _models = new Float[Infos.Length][]; InvModels = new Float[Infos.Length][]; for (int i = 0; i < Infos.Length; i++) { _models[i] = ctx.Reader.ReadFloatArray(); ValidateModel(Host, _models[i], Infos[i].TypeSrc); if (_exes[i].SaveInv) { InvModels[i] = ctx.Reader.ReadFloatArray(); ValidateModel(Host, InvModels[i], Infos[i].TypeSrc); } } Metadata.Seal(); }
public ImplVecOrdered(IRow row, OneToOneTransformBase.ColInfo info, ColInfoEx ex, int invertHashMaxCount, Delegate dstGetter) : base(row, info, ex, invertHashMaxCount) { _srcGetter = Row.GetGetter <VBuffer <T> >(_info.Source); _dstGetter = dstGetter as ValueGetter <VBuffer <uint> >; Contracts.AssertValue(_dstGetter); }
private NgramCountingTransformer(IHost host, ModelLoadContext ctx, IDataView input) : base(host, ctx, input, TestType) { Host.AssertValue(ctx); // *** Binary format *** // <prefix handled in static Create method> // <base> // for each column // ColInfoEx // the ngram SequencePool // the ngram inverse document frequencies _exes = new ColInfoEx[Infos.Length]; _ngramMaps = new SequencePool[Infos.Length]; _invDocFreqs = new double[Infos.Length][]; for (int i = 0; i < Infos.Length; i++) { _exes[i] = new ColInfoEx(ctx, ctx.Header.ModelVerWritten >= VerTfIdfSupported); _ngramMaps[i] = new SequencePool(ctx.Reader); if (ctx.Header.ModelVerWritten >= VerTfIdfSupported) { _invDocFreqs[i] = ctx.Reader.ReadDoubleArray(); for (int j = 0; j < Utils.Size(_invDocFreqs[i]); j++) { Host.CheckDecode(_invDocFreqs[i][j] >= 0); } } } InitColumnTypeAndMetadata(out _types, out _slotNamesTypes); }
private DropSlotsTransform(IHost host, ModelLoadContext ctx, IDataView input) : base(host, ctx, input, null) { Host.AssertValue(ctx); // *** Binary format *** // <base> // for each added column // int[]: slotsMin // int[]: slotsMax (no count) Host.AssertNonEmpty(Infos); var size = Infos.Length; _exes = new ColInfoEx[size]; for (int i = 0; i < size; i++) { int[] slotsMin = ctx.Reader.ReadIntArray(); Host.CheckDecode(Utils.Size(slotsMin) > 0); int[] slotsMax = ctx.Reader.ReadIntArray(slotsMin.Length); bool suppressed; ColumnType typeDst; SlotDropper slotDropper = new SlotDropper(Infos[i].TypeSrc.ValueCount, slotsMin, slotsMax); int[] categoricalRanges; ComputeType(input.Schema, slotsMin, slotsMax, i, slotDropper, out suppressed, out typeDst, out categoricalRanges); _exes[i] = new ColInfoEx(slotDropper, suppressed, typeDst, categoricalRanges); Host.CheckDecode(AreRangesValid(i)); } Metadata.Seal(); }
public StopWordsRemoverTransform(IHostEnvironment env, Arguments args, IDataView input) : base(env, RegistrationName, Contracts.CheckRef(args, nameof(args)).Column, input, TestIsTextVector) { Host.AssertNonEmpty(Infos); Host.Assert(Infos.Length == Utils.Size(args.Column)); using (var ch = Host.Start("construction")) { _exes = new ColInfoEx[Infos.Length]; for (int iinfo = 0; iinfo < Infos.Length; iinfo++) { var col = args.Column[iinfo]; string languagesCol = !string.IsNullOrWhiteSpace(col.LanguagesColumn) ? col.LanguagesColumn : args.LanguagesColumn; _exes[iinfo] = new ColInfoEx(input.Schema, col.Language ?? args.Language, languagesCol); } _resourcesExist = new bool?[StopWords.Length]; CheckResources(ch); ch.Done(); } Metadata.Seal(); }
protected Impl(IRow row, ColInfo info, ColInfoEx ex, int invertHashMaxCount) : base(row, info, ex) { Contracts.AssertValue(row); Contracts.AssertValue(info); Contracts.AssertValue(ex); Collector = new InvertHashCollector <T>(1 << ex.HashBits, invertHashMaxCount, GetTextMap(), GetComparer()); }
/// <summary> /// Constructs an <see cref="InvertHashHelper"/> instance to accumulate hash/value pairs /// from a single column as parameterized by this transform, with values fetched from /// the row. /// </summary> /// <param name="row">The input source row, from which the hashed values can be fetched</param> /// <param name="info">The column info, describing the source</param> /// <param name="ex">The extra column info</param> /// <param name="invertHashMaxCount">The number of input hashed values to accumulate per output hash value</param> /// <param name="dstGetter">A hash getter, built on top of <paramref name="row"/>.</param> public static InvertHashHelper Create(IRow row, ColInfo info, ColInfoEx ex, int invertHashMaxCount, Delegate dstGetter) { ColumnType typeSrc = info.TypeSrc; Type t = typeSrc.IsVector ? (ex.Ordered ? typeof(ImplVecOrdered <>) : typeof(ImplVec <>)) : typeof(ImplOne <>); t = t.MakeGenericType(typeSrc.ItemType.RawType); var consTypes = new Type[] { typeof(IRow), typeof(OneToOneTransformBase.ColInfo), typeof(ColInfoEx), typeof(int), typeof(Delegate) }; var constructorInfo = t.GetConstructor(consTypes); return((InvertHashHelper)constructorInfo.Invoke(new object[] { row, info, ex, invertHashMaxCount, dstGetter })); }
private InvertHashHelper(IRow row, ColInfo info, ColInfoEx ex) { Contracts.AssertValue(row); Contracts.AssertValue(info); Row = row; _info = info; _ex = ex; // If this is a vector and ordered, then we must include the slot as part of the representation. _includeSlot = _info.TypeSrc.IsVector && _ex.Ordered; }
public LpNormNormalizerTransform(IHostEnvironment env, Arguments args, IDataView input) : base(env, RegistrationName, env.CheckRef(args, nameof(args)).Column, input, TestIsFloatVector) { Host.AssertNonEmpty(Infos); Host.Assert(Infos.Length == Utils.Size(args.Column)); _exes = new ColInfoEx[Infos.Length]; for (int i = 0; i < _exes.Length; i++) { _exes[i] = new ColInfoEx(args.Column[i], args); } SetMetadata(); }
// Public constructor corresponding to SignatureDataTransform. public ImagePixelExtractorTransform(IHostEnvironment env, Arguments args, IDataView input) : base(env, RegistrationName, Contracts.CheckRef(args, nameof(args)).Column, input, t => t is ImageType ? null : "Expected Image type") { Host.AssertNonEmpty(Infos); Host.Assert(Infos.Length == Utils.Size(args.Column)); _exes = new ColInfoEx[Infos.Length]; for (int i = 0; i < _exes.Length; i++) { var item = args.Column[i]; _exes[i] = new ColInfoEx(item, args); } _types = ConstructTypes(true); }
/// <summary> /// Public constructor corresponding to SignatureDataTransform. /// </summary> public NgramCountingTransformer(IHostEnvironment env, Arguments args, IDataView input) : base(env, RegistrationName, Contracts.CheckRef(args, nameof(args)).Column, input, TestType) { Host.AssertNonEmpty(Infos); Host.Assert(Utils.Size(Infos) == Utils.Size(args.Column)); _exes = new ColInfoEx[Infos.Length]; for (int iinfo = 0; iinfo < _exes.Length; iinfo++) { _exes[iinfo] = new ColInfoEx(args.Column[iinfo], args); } _ngramMaps = Train(args, input, out _invDocFreqs); InitColumnTypeAndMetadata(out _types, out _slotNamesTypes); }
// Public constructor corresponding to SignatureDataTransform. public VectorToImageTransform(IHostEnvironment env, Arguments args, IDataView input) : base(env, RegistrationName, Contracts.CheckRef(args, nameof(args)).Column, input, t => t is VectorType ? null : "Expected VectorType type") { Host.AssertNonEmpty(Infos); Host.Assert(Infos.Length == Utils.Size(args.Column)); _exes = new ColInfoEx[Infos.Length]; _types = new ImageType[Infos.Length]; for (int i = 0; i < _exes.Length; i++) { var item = args.Column[i]; _exes[i] = new ColInfoEx(item, args); _types[i] = new ImageType(_exes[i].Height, _exes[i].Width); } Metadata.Seal(); }
/// <summary> /// Public constructor corresponding to SignatureDataTransform. /// </summary> public DelimitedTokenizeTransform(IHostEnvironment env, Arguments args, IDataView input) : base(env, RegistrationName, Contracts.CheckRef(args, nameof(args)).Column, input, TestIsTextItem) { // REVIEW: Need to decide whether to inject an NA token between slots in ReadOnlyMemory inputs. Host.AssertNonEmpty(Infos); Host.Assert(Infos.Length == Utils.Size(args.Column)); _exes = new ColInfoEx[Infos.Length]; for (int iinfo = 0; iinfo < Infos.Length; iinfo++) { _exes[iinfo] = new ColInfoEx(args, iinfo); } _columnType = new VectorType(TextType.Instance); Metadata.Seal(); }
/// <summary> /// Public constructor corresponding to SignatureDataTransform. /// </summary> public NgramHashingTransformer(IHostEnvironment env, Arguments args, IDataView input) : base(env, RegistrationName, input) { Host.CheckValue(args, nameof(args)); Host.CheckUserArg(Utils.Size(args.Column) > 0, nameof(args.Column)); _bindings = new Bindings(args, Source.Schema, this); _exes = new ColInfoEx[args.Column.Length]; List <int> invertIinfos = null; int[] invertHashMaxCounts = new int[args.Column.Length]; for (int iinfo = 0; iinfo < _exes.Length; iinfo++) { _exes[iinfo] = new ColInfoEx(args.Column[iinfo], args); var invertHashMaxCount = GetAndVerifyInvertHashMaxCount(args, args.Column[iinfo], _exes[iinfo]); if (invertHashMaxCount > 0) { Utils.Add(ref invertIinfos, iinfo); invertHashMaxCounts[iinfo] = invertHashMaxCount; } } InitColumnTypes(); if (Utils.Size(invertIinfos) > 0) { // Build the invert hashes if we actually had any. var dstSrcs = new HashSet <int>(invertIinfos.Select(i => _bindings.MapIinfoToCol(i))); var inputPred = _bindings.GetDependencies(dstSrcs.Contains); var active = _bindings.GetActive(dstSrcs.Contains); string[][] friendlyNames = args.Column.Select(c => c.FriendlyNames).ToArray(); var helper = new InvertHashHelper(this, friendlyNames, inputPred, invertHashMaxCounts); using (IRowCursor srcCursor = input.GetRowCursor(inputPred)) using (var dstCursor = new RowCursor(this, srcCursor, active, helper.Decorate)) { var allGetters = InvertHashHelper.CallAllGetters(dstCursor); while (dstCursor.MoveNext()) { allGetters(); } } _slotNames = helper.SlotNamesMetadata(out _slotNamesTypes); } }
// Public constructor corresponding to SignatureDataTransform. public ImageResizerTransform(IHostEnvironment env, Arguments args, IDataView input) : base(env, RegistrationName, env.CheckRef(args, nameof(args)).Column, input, t => t is ImageType ? null : "Expected Image type") { Host.AssertNonEmpty(Infos); Host.Assert(Infos.Length == Utils.Size(args.Column)); _exes = new ColInfoEx[Infos.Length]; for (int i = 0; i < _exes.Length; i++) { var item = args.Column[i]; _exes[i] = new ColInfoEx( item.ImageWidth ?? args.ImageWidth, item.ImageHeight ?? args.ImageHeight, item.Resizing ?? args.Resizing, item.CropAnchor ?? args.CropAnchor); } Metadata.Seal(); }
/// <summary> /// Public constructor corresponding to SignatureTokenizeTransform. It accepts arguments of type ArgumentsBase, /// and a separate array of columns (constructed from the caller -WordBag/WordHashBag- arguments). /// </summary> public DelimitedTokenizeTransform(IHostEnvironment env, TokenizeArguments args, IDataView input, OneToOneColumn[] columns) : base(env, RegistrationName, columns, input, TestIsTextItem) { Host.CheckValue(args, nameof(args)); Host.CheckUserArg(Utils.Size(columns) > 0, nameof(Arguments.Column)); // REVIEW: Need to decide whether to inject an NA token between slots in VBuffer<DvText> inputs. Host.AssertNonEmpty(Infos); Host.Assert(Infos.Length == Utils.Size(columns)); _exes = new ColInfoEx[Infos.Length]; for (int iinfo = 0; iinfo < Infos.Length; iinfo++) { _exes[iinfo] = new ColInfoEx(args); } _columnType = new VectorType(TextType.Instance); Metadata.Seal(); }
private ImagePixelExtractorTransform(IHost host, ModelLoadContext ctx, IDataView input) : base(host, ctx, input, t => t is ImageType ? null : "Expected Image type") { Host.AssertValue(ctx); // *** Binary format *** // <prefix handled in static Create method> // <base> // foreach added column // ColInfoEx Host.AssertNonEmpty(Infos); _exes = new ColInfoEx[Infos.Length]; for (int i = 0; i < _exes.Length; i++) { _exes[i] = new ColInfoEx(ctx); } _types = ConstructTypes(false); }
private LpNormNormalizerTransform(IHost host, ModelLoadContext ctx, IDataView input) : base(host, ctx, input, TestIsFloatItem) { Host.AssertValue(ctx); // *** Binary format *** // <prefix handled in static Create method> // <base> // foreach added column // ColInfoEx Host.AssertNonEmpty(Infos); _exes = new ColInfoEx[Infos.Length]; for (int i = 0; i < _exes.Length; i++) { _exes[i] = new ColInfoEx(ctx, ctx.Header.ModelVerWritten >= VerVectorNormalizerSupported); } SetMetadata(); }
private DelimitedTokenizeTransform(IHost host, ModelLoadContext ctx, IDataView input) : base(host, ctx, input, TestIsTextItem) { Host.AssertValue(ctx); // *** Binary format *** // <base> // for each added column // ColInfoEx Host.AssertNonEmpty(Infos); _exes = new ColInfoEx[Infos.Length]; for (int iinfo = 0; iinfo < _exes.Length; iinfo++) { _exes[iinfo] = new ColInfoEx(ctx); } _columnType = new VectorType(TextType.Instance); Metadata.Seal(); }
private VectorToImageTransform(IHost host, ModelLoadContext ctx, IDataView input) : base(host, ctx, input, t => t is VectorType ? null : "Expected VectorType type") { Host.AssertValue(ctx); // *** Binary format *** // <prefix handled in static Create method> // <base> // foreach added column // ColInfoEx Host.AssertNonEmpty(Infos); _exes = new ColInfoEx[Infos.Length]; _types = new ImageType[Infos.Length]; for (int i = 0; i < _exes.Length; i++) { _exes[i] = new ColInfoEx(ctx); _types[i] = new ImageType(_exes[i].Height, _exes[i].Width); } Metadata.Seal(); }
private HashTransform(IHost host, ModelLoadContext ctx, IDataView input) : base(host, ctx, input, TestType) { Host.AssertValue(ctx); // *** Binary format *** // <base> // Exes Host.AssertNonEmpty(Infos); _exes = new ColInfoEx[Infos.Length]; for (int iinfo = 0; iinfo < Infos.Length; iinfo++) { _exes[iinfo] = new ColInfoEx(ctx); } _types = InitColumnTypes(); TextModelHelper.LoadAll(Host, ctx, Infos.Length, out _keyValues, out _kvTypes); SetMetadata(); }
public StopWordsRemovingTransformer(IHostEnvironment env, IDataView input, OneToOneColumn[] column) : base(env, RegistrationName, column, input, TestIsTextVector) { Host.AssertNonEmpty(Infos); Host.Assert(Infos.Length == Utils.Size(column)); Host.Assert(column is Column[]); using (var ch = Host.Start("construction")) { _exes = new ColInfoEx[Infos.Length]; for (int iinfo = 0; iinfo < Infos.Length; iinfo++) { var col = (Column)column[iinfo]; _exes[iinfo] = new ColInfoEx(input.Schema, col.Language ?? DefaultLanguage, col.LanguagesColumn); } _resourcesExist = new bool?[StopWords.Length]; CheckResources(ch); } Metadata.Seal(); }
public LdaTransform(IHostEnvironment env, Arguments args, IDataView input) : base(env, RegistrationName, args.Column, input, TestType) { Host.CheckValue(args, nameof(args)); Host.CheckUserArg(args.NumTopic > 0, nameof(args.NumTopic), "Must be positive."); Host.CheckValue(input, nameof(input)); Host.CheckUserArg(Utils.Size(args.Column) > 0, nameof(args.Column)); _exes = new ColInfoEx[Infos.Length]; _types = new ColumnType[Infos.Length]; _ldas = new LdaState[Infos.Length]; _saveText = args.OutputTopicWordSummary; for (int i = 0; i < Infos.Length; i++) { var ex = new ColInfoEx(Host, args.Column[i], args); _exes[i] = ex; _types[i] = new VectorType(NumberType.Float, ex.NumTopic); } using (var ch = Host.Start("Train")) { Train(ch, input, _ldas); } Metadata.Seal(); }
/// <summary> /// Public constructor corresponding to SignatureDataTransform. /// </summary> public LpNormNormalizerTransform(IHostEnvironment env, GcnArguments args, IDataView input) : base(env, RegistrationName, env.CheckRef(args, nameof(args)).Column, input, TestIsFloatVector) { Host.AssertNonEmpty(Infos); Host.Assert(Infos.Length == Utils.Size(args.Column)); _exes = new ColInfoEx[Infos.Length]; for (int i = 0; i < _exes.Length; i++) { _exes[i] = new ColInfoEx(args.Column[i], args); } // REVIEW: for now check only global (default) values. Move to Bindings/ColInfoEx? if (!args.SubMean && args.UseStdDev) { using (var ch = Host.Start("Argument validation")) { ch.Warning("subMean parameter is false while useStd is true. It is advisable to set subMean to true in case useStd is set to true."); } } SetMetadata(); }
public LdaState(IExceptionContext ectx, ColInfoEx ex, int numVocab) : this() { Contracts.AssertValue(ectx); ectx.AssertValue(ex, "ex"); ectx.Assert(numVocab >= 0); InfoEx = ex; _numVocab = numVocab; _ldaTrainer = new LdaSingleBox( InfoEx.NumTopic, numVocab, /* Need to set number of vocabulary here */ InfoEx.AlphaSum, InfoEx.Beta, InfoEx.NumIter, InfoEx.LikelihoodInterval, InfoEx.NumThread, InfoEx.MHStep, InfoEx.NumSummaryTermPerTopic, false, InfoEx.NumMaxDocToken); }
/// <summary> /// Public constructor corresponding to SignatureDataTransform. /// </summary> public DropSlotsTransform(IHostEnvironment env, Arguments args, IDataView input) : base(Contracts.CheckRef(env, nameof(env)), RegistrationName, env.CheckRef(args, nameof(args)).Column, input, null) { Host.CheckNonEmpty(args.Column, nameof(args.Column)); var size = Infos.Length; _exes = new ColInfoEx[size]; for (int i = 0; i < size; i++) { var col = args.Column[i]; int[] slotsMin; int[] slotsMax; GetSlotsMinMax(col, out slotsMin, out slotsMax); SlotDropper slotDropper = new SlotDropper(Infos[i].TypeSrc.ValueCount, slotsMin, slotsMax); bool suppressed; ColumnType typeDst; int[] categoricalRanges; ComputeType(Source.Schema, slotsMin, slotsMax, i, slotDropper, out suppressed, out typeDst, out categoricalRanges); _exes[i] = new ColInfoEx(slotDropper, suppressed, typeDst, categoricalRanges); } Metadata.Seal(); }
private StopWordsRemovingTransformer(IHost host, ModelLoadContext ctx, IDataView input) : base(host, ctx, input, TestIsTextVector) { Host.AssertValue(ctx); using (var ch = Host.Start("Deserialization")) { // *** Binary format *** // <base> // for each added column // ColInfoEx ch.AssertNonEmpty(Infos); _exes = new ColInfoEx[Infos.Length]; for (int iinfo = 0; iinfo < _exes.Length; iinfo++) { _exes[iinfo] = new ColInfoEx(ctx, input.Schema); } _resourcesExist = new bool?[StopWords.Length]; CheckResources(ch); } Metadata.Seal(); }
private NgramHashingTransformer(IHost host, ModelLoadContext ctx, IDataView input) : base(host, input) { Host.AssertValue(ctx); // *** Binary format *** // int: sizeof(Float) // bindings // for each added column // ColInfoEx int cbFloat = ctx.Reader.ReadInt32(); Host.CheckDecode(cbFloat == sizeof(Float)); _bindings = new Bindings(ctx, Source.Schema, this); _exes = new ColInfoEx[_bindings.Infos.Length]; for (int iinfo = 0; iinfo < _bindings.Infos.Length; iinfo++) { _exes[iinfo] = new ColInfoEx(ctx); } InitColumnTypes(); TextModelHelper.LoadAll(Host, ctx, _exes.Length, out _slotNames, out _slotNamesTypes); }