Beispiel #1
0
        private NgramCountingTransformer(IHost host, ModelLoadContext ctx, IDataView input)
            : base(host, ctx, input, TestType)
        {
            Host.AssertValue(ctx);

            // *** Binary format ***
            // <prefix handled in static Create method>
            // <base>
            // for each column
            //   ColInfoEx
            //   the ngram SequencePool
            //   the ngram inverse document frequencies

            _exes        = new ColInfoEx[Infos.Length];
            _ngramMaps   = new SequencePool[Infos.Length];
            _invDocFreqs = new double[Infos.Length][];
            for (int i = 0; i < Infos.Length; i++)
            {
                _exes[i]      = new ColInfoEx(ctx, ctx.Header.ModelVerWritten >= VerTfIdfSupported);
                _ngramMaps[i] = new SequencePool(ctx.Reader);

                if (ctx.Header.ModelVerWritten >= VerTfIdfSupported)
                {
                    _invDocFreqs[i] = ctx.Reader.ReadDoubleArray();
                    for (int j = 0; j < Utils.Size(_invDocFreqs[i]); j++)
                    {
                        Host.CheckDecode(_invDocFreqs[i][j] >= 0);
                    }
                }
            }

            InitColumnTypeAndMetadata(out _types, out _slotNamesTypes);
        }
Beispiel #2
0
 public void WithPool(PoolType t)
 {
     if (t == poolType)
     {
         return;
     }
     poolType = t;
     if (t == PoolType.Sequence)
     {
         pool = new SequencePool();
     }
     else if (t == PoolType.Concurrent)
     {
         pool = new ConcurrentPool();
     }
 }
Beispiel #3
0
        private void AssertValid(int[] counts, int[] lims, SequencePool pool)
        {
            int count     = 0;
            int countFull = 0;

            for (int i = 0; i < lims.Length; i++)
            {
                Host.Assert(counts[i] >= 0);
                Host.Assert(counts[i] <= lims[i]);
                if (counts[i] == lims[i])
                {
                    countFull++;
                }
                count += counts[i];
            }
            Host.Assert(count == pool.Count);
        }
Beispiel #4
0
    public AI5042()
    {
        List <int> list = new List <int> {
            0xc23,
            0xc25,
            0xc27
        };

        this.callids    = list;
        this.calldelay2 = 0x2710;
        this.prev_scale = 1f;
        this.mSeqPool   = new SequencePool();
        this.mSeqPool2  = new SequencePool();
        this.delay      = 0.25f;
        this.poslist    = new List <Vector3>();
        this.prevs      = new List <GameObject>();
        this.mLines     = new List <BulletRedLineCtrl>();
    }
Beispiel #5
0
        private static NgramIdFinder GetNgramIdFinderAdd(int[] counts, int[] lims, SequencePool pool, bool requireIdf, IHost host)
        {
            Contracts.AssertValue(host);
            host.Assert(Utils.Size(lims) > 0);
            host.Assert(Utils.Size(lims) == Utils.Size(counts));

            int numFull     = lims.Count(l => l <= 0);
            int ngramLength = lims.Length;

            return
                ((uint[] ngram, int lim, int icol, ref bool more) =>
            {
                host.Assert(0 < lim && lim <= Utils.Size(ngram));
                host.Assert(lim <= Utils.Size(counts));
                host.Assert(lim <= Utils.Size(lims));
                host.Assert(icol == 0);

                var max = lim - 1;
                int slot = -1;
                if (counts[max] < lims[max] && pool.TryAdd(ngram, 0, lim, out slot) && ++counts[max] >= lims[max])
                {
                    numFull++;
                }

                // Note: 'slot' is either the id of the added ngram or -1. In case it is -1, find its id.
                // Note: 'more' controls whether more ngrams/skip-grams should be processed in the current
                //       row. For IDF, as we are interested in counting the occurrence of ngrams/skip-
                //       grams, more should not be updated.
                if (requireIdf)
                {
                    return slot != -1 ? slot : pool.Get(ngram, 0, lim);
                }

                more = numFull < ngramLength;
                return -1;
            });
        }
Beispiel #6
0
 /// <summary>
 /// Initializes a new instance of the <see cref="JsonWriter"/> struct.
 /// </summary>
 /// <param name="sequencePool">The pool from which to draw an <see cref="IBufferWriter{T}"/> if required..</param>
 /// <param name="array">An array to start with so we can avoid accessing the <paramref name="sequencePool"/> if possible.</param>
 internal JsonWriter(SequencePool sequencePool, byte[] array)
     : this()
 {
     Writer = new BufferWriter(sequencePool, array);
 }
Beispiel #7
0
        private SequencePool[] Train(Arguments args, IDataView trainingData, out double[][] invDocFreqs)
        {
            // Contains the maximum number of grams to store in the dictionary, for each level of ngrams,
            // from 1 (in position 0) up to ngramLength (in position ngramLength-1)
            var lims = new int[Infos.Length][];

            for (int iinfo = 0; iinfo < Infos.Length; iinfo++)
            {
                var all         = args.Column[iinfo].AllLengths ?? args.AllLengths;
                var ngramLength = _exes[iinfo].NgramLength;
                var maxNumTerms = Utils.Size(args.Column[iinfo].MaxNumTerms) > 0 ? args.Column[iinfo].MaxNumTerms : args.MaxNumTerms;
                if (!all)
                {
                    Host.CheckUserArg(Utils.Size(maxNumTerms) == 0 ||
                                      Utils.Size(maxNumTerms) == 1 && maxNumTerms[0] > 0, nameof(args.MaxNumTerms));
                    lims[iinfo] = new int[ngramLength];
                    lims[iinfo][ngramLength - 1] = Utils.Size(maxNumTerms) == 0 ? Arguments.DefaultMaxTerms : maxNumTerms[0];
                }
                else
                {
                    Host.CheckUserArg(Utils.Size(maxNumTerms) <= ngramLength, nameof(args.MaxNumTerms));
                    Host.CheckUserArg(Utils.Size(maxNumTerms) == 0 || maxNumTerms.All(i => i >= 0) && maxNumTerms[maxNumTerms.Length - 1] > 0, nameof(args.MaxNumTerms));
                    var extend = Utils.Size(maxNumTerms) == 0 ? Arguments.DefaultMaxTerms : maxNumTerms[maxNumTerms.Length - 1];
                    lims[iinfo] = Utils.BuildArray(ngramLength,
                                                   i => i < Utils.Size(maxNumTerms) ? maxNumTerms[i] : extend);
                }
            }

            var helpers = new NgramBufferBuilder[Infos.Length];
            var getters = new ValueGetter <VBuffer <uint> > [Infos.Length];
            var src     = new VBuffer <uint> [Infos.Length];

            // Keep track of how many grams are in the pool for each value of n. Position
            // i in _counts counts how many (i+1)-grams are in the pool for column iinfo.
            var counts    = new int[Infos.Length][];
            var ngramMaps = new SequencePool[Infos.Length];

            bool[] activeInput = new bool[trainingData.Schema.ColumnCount];
            foreach (var info in Infos)
            {
                activeInput[info.Source] = true;
            }
            using (var cursor = trainingData.GetRowCursor(col => activeInput[col]))
                using (var pch = Host.StartProgressChannel("Building n-gram dictionary"))
                {
                    for (int iinfo = 0; iinfo < Infos.Length; iinfo++)
                    {
                        Host.Assert(Infos[iinfo].TypeSrc.IsVector && Infos[iinfo].TypeSrc.ItemType.IsKey);
                        var ngramLength = _exes[iinfo].NgramLength;
                        var skipLength  = _exes[iinfo].SkipLength;

                        getters[iinfo]   = RowCursorUtils.GetVecGetterAs <uint>(NumberType.U4, cursor, Infos[iinfo].Source);
                        src[iinfo]       = default(VBuffer <uint>);
                        counts[iinfo]    = new int[ngramLength];
                        ngramMaps[iinfo] = new SequencePool();

                        // Note: GetNgramIdFinderAdd will control how many ngrams of a specific length will
                        // be added (using lims[iinfo]), therefore we set slotLim to the maximum
                        helpers[iinfo] = new NgramBufferBuilder(ngramLength, skipLength, Utils.ArrayMaxSize,
                                                                GetNgramIdFinderAdd(counts[iinfo], lims[iinfo], ngramMaps[iinfo], _exes[iinfo].RequireIdf(), Host));
                    }

                    int    cInfoFull = 0;
                    bool[] infoFull  = new bool[Infos.Length];

                    invDocFreqs = new double[Infos.Length][];

                    long   totalDocs = 0;
                    Double rowCount  = trainingData.GetRowCount() ?? Double.NaN;
                    var    buffers   = new VBuffer <float> [Infos.Length];
                    pch.SetHeader(new ProgressHeader(new[] { "Total n-grams" }, new[] { "documents" }),
                                  e => e.SetProgress(0, totalDocs, rowCount));
                    while (cInfoFull < Infos.Length && cursor.MoveNext())
                    {
                        totalDocs++;
                        for (int iinfo = 0; iinfo < Infos.Length; iinfo++)
                        {
                            getters[iinfo](ref src[iinfo]);
                            var keyCount = (uint)Infos[iinfo].TypeSrc.ItemType.KeyCount;
                            if (keyCount == 0)
                            {
                                keyCount = uint.MaxValue;
                            }
                            if (!infoFull[iinfo])
                            {
                                if (_exes[iinfo].RequireIdf())
                                {
                                    helpers[iinfo].Reset();
                                }

                                helpers[iinfo].AddNgrams(in src[iinfo], 0, keyCount);
                                if (_exes[iinfo].RequireIdf())
                                {
                                    int totalNgrams = counts[iinfo].Sum();
                                    Utils.EnsureSize(ref invDocFreqs[iinfo], totalNgrams);
                                    helpers[iinfo].GetResult(ref buffers[iinfo]);
                                    foreach (var pair in buffers[iinfo].Items())
                                    {
                                        if (pair.Value >= 1)
                                        {
                                            invDocFreqs[iinfo][pair.Key] += 1;
                                        }
                                    }
                                }
                            }
                            AssertValid(counts[iinfo], lims[iinfo], ngramMaps[iinfo]);
                        }
                    }

                    pch.Checkpoint(counts.Sum(c => c.Sum()), totalDocs);
                    for (int iinfo = 0; iinfo < Infos.Length; iinfo++)
                    {
                        for (int i = 0; i < Utils.Size(invDocFreqs[iinfo]); i++)
                        {
                            if (invDocFreqs[iinfo][i] != 0)
                            {
                                invDocFreqs[iinfo][i] = Math.Log(totalDocs / invDocFreqs[iinfo][i]);
                            }
                        }
                    }

                    for (int iinfo = 0; iinfo < Infos.Length; iinfo++)
                    {
                        AssertValid(counts[iinfo], lims[iinfo], ngramMaps[iinfo]);

                        int ngramLength = _exes[iinfo].NgramLength;
                        for (int i = 0; i < ngramLength; i++)
                        {
                            _exes[iinfo].NonEmptyLevels[i] = counts[iinfo][i] > 0;
                        }
                    }

                    return(ngramMaps);
                }
        }
 protected override void OnInit()
 {
     base.OnInit();
     this.m_pool = new SequencePool();
 }