public static void LoadAll(IHost host, ModelLoadContext ctx, int infoLim, out VBuffer <ReadOnlyMemory <char> >[] keyValues, out ColumnType[] kvTypes)
        {
            Contracts.AssertValue(host);
            host.AssertValue(ctx);

            using (var ch = host.Start("LoadTextValues"))
            {
                // Try to find the key names.
                VBuffer <ReadOnlyMemory <char> >[] keyValuesLocal = null;
                ColumnType[] kvTypesLocal = null;
                CodecFactory factory      = null;
                const string dirFormat    = "Vocabulary_{0:000}";
                for (int iinfo = 0; iinfo < infoLim; iinfo++)
                {
                    ctx.TryProcessSubModel(string.Format(dirFormat, iinfo),
                                           c =>
                    {
                        // Load the lazily initialized structures, if needed.
                        if (keyValuesLocal == null)
                        {
                            keyValuesLocal = new VBuffer <ReadOnlyMemory <char> > [infoLim];
                            kvTypesLocal   = new ColumnType[infoLim];
                            factory        = new CodecFactory(host);
                        }
                        Load(ch, c, factory, ref keyValuesLocal[iinfo]);
                        kvTypesLocal[iinfo] = new VectorType(TextType.Instance, keyValuesLocal[iinfo].Length);
                    });
                }

                keyValues = keyValuesLocal;
                kvTypes   = kvTypesLocal;
                ch.Done();
            }
        }
Exemple #2
0
        private CustomStopWordsRemoverTransform(IHost host, ModelLoadContext ctx, IDataView input)
            : base(host, ctx, input, TestIsTextVector)
        {
            Host.AssertValue(ctx);

            using (var ch = Host.Start("Deserialization"))
            {
                // *** Binary format ***
                // <base>
                ch.AssertNonEmpty(Infos);

                const string dir       = "Stopwords";
                NormStr.Pool stopwrods = null;
                bool         res       = ctx.TryProcessSubModel(dir,
                                                                c =>
                {
                    Host.CheckValue(c, nameof(ctx));
                    c.CheckAtModel(GetStopwrodsManagerVersionInfo());

                    // *** Binary format ***
                    // int: number of stopwords
                    // int[]: stopwords string ids
                    int cstr = ctx.Reader.ReadInt32();
                    Host.CheckDecode(cstr > 0);

                    stopwrods = new NormStr.Pool();
                    for (int istr = 0; istr < cstr; istr++)
                    {
                        var nstr = stopwrods.Add(ctx.LoadString());
                        Host.CheckDecode(nstr.Id == istr);
                    }

                    // All stopwords are distinct.
                    Host.CheckDecode(stopwrods.Count == cstr);
                    // The deserialized pool should not have the empty string.
                    Host.CheckDecode(stopwrods.Get("") == null);
                });
                if (!res)
                {
                    throw Host.ExceptDecode();
                }

                _stopWordsMap = stopwrods;
                ch.Done();
            }
            Metadata.Seal();
        }
        private TermTransform(IHost host, ModelLoadContext ctx, IDataView input)
            : base(host, ctx, input, TestIsKnownDataKind)
        {
            Host.AssertValue(ctx);

            // *** Binary format ***
            // for each term map:
            //   bool(byte): whether this column should present key value metadata as text

            int cinfo = Infos.Length;

            Host.Assert(cinfo > 0);

            if (ctx.Header.ModelVerWritten >= VerNonTextTypesSupported)
            {
                _textMetadata = ctx.Reader.ReadBoolArray(cinfo);
            }
            else
            {
                _textMetadata = new bool[cinfo]; // No need to set in this case. They're all text.
            }
            const string dir = "Vocabulary";

            TermMap[] termMap = new TermMap[cinfo];
            bool      b       = ctx.TryProcessSubModel(dir,
                                                       c =>
            {
                // *** Binary format ***
                // int: number of term maps (should equal number of columns)
                // for each term map:
                //   byte: code identifying the term map type (0 text, 1 codec)
                //   <data>: type specific format, see TermMap save/load methods

                Host.CheckValue(c, nameof(ctx));
                c.CheckAtModel(GetTermManagerVersionInfo());
                int cmap = c.Reader.ReadInt32();
                Host.CheckDecode(cmap == cinfo);
                if (c.Header.ModelVerWritten >= VerManagerNonTextTypesSupported)
                {
                    for (int i = 0; i < cinfo; ++i)
                    {
                        termMap[i] = TermMap.Load(c, host, this);
                    }
                }
                else
                {
                    for (int i = 0; i < cinfo; ++i)
                    {
                        termMap[i] = TermMap.TextImpl.Create(c, host);
                    }
                }
            });

#pragma warning disable TLC_NoMessagesForLoadContext // Vaguely useful.
            if (!b)
            {
                throw Host.ExceptDecode("Missing {0} model", dir);
            }
#pragma warning restore TLC_NoMessagesForLoadContext
            _termMap = new BoundTermMap[cinfo];
            for (int i = 0; i < cinfo; ++i)
            {
                _termMap[i] = termMap[i].Bind(this, i);
            }

            _types = ComputeTypesAndMetadata();
        }