public static void LoadAll(IHost host, ModelLoadContext ctx, int infoLim, out VBuffer <ReadOnlyMemory <char> >[] keyValues, out ColumnType[] kvTypes) { Contracts.AssertValue(host); host.AssertValue(ctx); using (var ch = host.Start("LoadTextValues")) { // Try to find the key names. VBuffer <ReadOnlyMemory <char> >[] keyValuesLocal = null; ColumnType[] kvTypesLocal = null; CodecFactory factory = null; const string dirFormat = "Vocabulary_{0:000}"; for (int iinfo = 0; iinfo < infoLim; iinfo++) { ctx.TryProcessSubModel(string.Format(dirFormat, iinfo), c => { // Load the lazily initialized structures, if needed. if (keyValuesLocal == null) { keyValuesLocal = new VBuffer <ReadOnlyMemory <char> > [infoLim]; kvTypesLocal = new ColumnType[infoLim]; factory = new CodecFactory(host); } Load(ch, c, factory, ref keyValuesLocal[iinfo]); kvTypesLocal[iinfo] = new VectorType(TextType.Instance, keyValuesLocal[iinfo].Length); }); } keyValues = keyValuesLocal; kvTypes = kvTypesLocal; ch.Done(); } }
private CustomStopWordsRemoverTransform(IHost host, ModelLoadContext ctx, IDataView input) : base(host, ctx, input, TestIsTextVector) { Host.AssertValue(ctx); using (var ch = Host.Start("Deserialization")) { // *** Binary format *** // <base> ch.AssertNonEmpty(Infos); const string dir = "Stopwords"; NormStr.Pool stopwrods = null; bool res = ctx.TryProcessSubModel(dir, c => { Host.CheckValue(c, nameof(ctx)); c.CheckAtModel(GetStopwrodsManagerVersionInfo()); // *** Binary format *** // int: number of stopwords // int[]: stopwords string ids int cstr = ctx.Reader.ReadInt32(); Host.CheckDecode(cstr > 0); stopwrods = new NormStr.Pool(); for (int istr = 0; istr < cstr; istr++) { var nstr = stopwrods.Add(ctx.LoadString()); Host.CheckDecode(nstr.Id == istr); } // All stopwords are distinct. Host.CheckDecode(stopwrods.Count == cstr); // The deserialized pool should not have the empty string. Host.CheckDecode(stopwrods.Get("") == null); }); if (!res) { throw Host.ExceptDecode(); } _stopWordsMap = stopwrods; ch.Done(); } Metadata.Seal(); }
private TermTransform(IHost host, ModelLoadContext ctx, IDataView input) : base(host, ctx, input, TestIsKnownDataKind) { Host.AssertValue(ctx); // *** Binary format *** // for each term map: // bool(byte): whether this column should present key value metadata as text int cinfo = Infos.Length; Host.Assert(cinfo > 0); if (ctx.Header.ModelVerWritten >= VerNonTextTypesSupported) { _textMetadata = ctx.Reader.ReadBoolArray(cinfo); } else { _textMetadata = new bool[cinfo]; // No need to set in this case. They're all text. } const string dir = "Vocabulary"; TermMap[] termMap = new TermMap[cinfo]; bool b = ctx.TryProcessSubModel(dir, c => { // *** Binary format *** // int: number of term maps (should equal number of columns) // for each term map: // byte: code identifying the term map type (0 text, 1 codec) // <data>: type specific format, see TermMap save/load methods Host.CheckValue(c, nameof(ctx)); c.CheckAtModel(GetTermManagerVersionInfo()); int cmap = c.Reader.ReadInt32(); Host.CheckDecode(cmap == cinfo); if (c.Header.ModelVerWritten >= VerManagerNonTextTypesSupported) { for (int i = 0; i < cinfo; ++i) { termMap[i] = TermMap.Load(c, host, this); } } else { for (int i = 0; i < cinfo; ++i) { termMap[i] = TermMap.TextImpl.Create(c, host); } } }); #pragma warning disable TLC_NoMessagesForLoadContext // Vaguely useful. if (!b) { throw Host.ExceptDecode("Missing {0} model", dir); } #pragma warning restore TLC_NoMessagesForLoadContext _termMap = new BoundTermMap[cinfo]; for (int i = 0; i < cinfo; ++i) { _termMap[i] = termMap[i].Bind(this, i); } _types = ComputeTypesAndMetadata(); }