private void GetLabels(Transposer trans, ColumnType labelType, int labelCol)
            {
                int min;
                int lim;
                var labels = default(VBuffer <int>);

                // Note: NAs have their own separate bin.
                if (labelType == NumberType.I4)
                {
                    var tmp = default(VBuffer <int>);
                    trans.GetSingleSlotValue(labelCol, ref tmp);
                    BinInts(in tmp, ref labels, _numBins, out min, out lim);
                    _numLabels = lim - min;
                }
                else if (labelType == NumberType.R4)
                {
                    var tmp = default(VBuffer <Single>);
                    trans.GetSingleSlotValue(labelCol, ref tmp);
                    BinSingles(in tmp, ref labels, _numBins, out min, out lim);
                    _numLabels = lim - min;
                }
                else if (labelType == NumberType.R8)
                {
                    var tmp = default(VBuffer <Double>);
                    trans.GetSingleSlotValue(labelCol, ref tmp);
                    BinDoubles(in tmp, ref labels, _numBins, out min, out lim);
                    _numLabels = lim - min;
                }
                else if (labelType is BoolType)
                {
                    var tmp = default(VBuffer <bool>);
                    trans.GetSingleSlotValue(labelCol, ref tmp);
                    BinBools(in tmp, ref labels);
                    _numLabels = 3;
                    min        = -1;
                    lim        = 2;
                }
                else
                {
                    ulong labelKeyCount = labelType.GetKeyCount();
                    Contracts.Assert(labelKeyCount < Utils.ArrayMaxSize);
                    KeyLabelGetter <int> del = GetKeyLabels <int>;
                    var methodInfo           = del.GetMethodInfo().GetGenericMethodDefinition().MakeGenericMethod(labelType.RawType);
                    var parameters           = new object[] { trans, labelCol, labelType };
                    _labels    = (VBuffer <int>)methodInfo.Invoke(this, parameters);
                    _numLabels = labelType.GetKeyCountAsInt32(_host) + 1;

                    // No need to densify or shift in this case.
                    return;
                }

                // Densify and shift labels.
                VBufferUtils.Densify(ref labels);
                Contracts.Assert(labels.IsDense);
                var labelsEditor = VBufferEditor.CreateFromBuffer(ref labels);

                for (int i = 0; i < labels.Length; i++)
                {
                    labelsEditor.Values[i] -= min;
                    Contracts.Assert(labelsEditor.Values[i] < _numLabels);
                }
                _labels = labelsEditor.Commit();
            }
Пример #2
0
        private void FetchWorker(BlockingCollection <Block> toCompress, IDataView data,
                                 ColumnCodec[] activeColumns, int rowsPerBlock, Stopwatch sw, IChannel ch, IProgressChannel pch, ExceptionMarshaller exMarshaller)
        {
            Contracts.AssertValue(ch);
            Contracts.AssertValueOrNull(pch);
            ch.AssertValue(exMarshaller);
            try
            {
                ch.AssertValue(toCompress);
                ch.AssertValue(data);
                ch.AssertValue(activeColumns);
                ch.AssertValue(sw);
                ch.Assert(rowsPerBlock > 0);

                // The main thread handles fetching from the cursor, and storing it into blocks passed to toCompress.
                HashSet <int> activeSet        = new HashSet <int>(activeColumns.Select(col => col.SourceIndex));
                long          blockIndex       = 0;
                int           remainingInBlock = rowsPerBlock;
                using (RowCursor cursor = data.GetRowCursor(activeSet.Contains))
                {
                    WritePipe[] pipes = new WritePipe[activeColumns.Length];
                    for (int c = 0; c < activeColumns.Length; ++c)
                    {
                        pipes[c] = WritePipe.Create(this, cursor, activeColumns[c]);
                    }
                    for (int c = 0; c < pipes.Length; ++c)
                    {
                        pipes[c].BeginBlock();
                    }

                    long rows = 0;
                    if (pch != null)
                    {
                        pch.SetHeader(new ProgressHeader(new[] { "rows" }), e => e.SetProgress(0, rows));
                    }

                    while (cursor.MoveNext())
                    {
                        for (int c = 0; c < pipes.Length; ++c)
                        {
                            pipes[c].FetchAndWrite();
                        }
                        if (--remainingInBlock == 0)
                        {
                            for (int c = 0; c < pipes.Length; ++c)
                            {
                                // REVIEW: It may be better if EndBlock got moved to a different worker thread.
                                toCompress.Add(new Block(pipes[c].EndBlock(), c, blockIndex), exMarshaller.Token);
                                pipes[c].BeginBlock();
                            }
                            remainingInBlock = rowsPerBlock;
                            blockIndex++;
                        }

                        rows++;
                    }
                    if (remainingInBlock < rowsPerBlock)
                    {
                        for (int c = 0; c < pipes.Length; ++c)
                        {
                            toCompress.Add(new Block(pipes[c].EndBlock(), c, blockIndex), exMarshaller.Token);
                        }
                    }

                    Contracts.Assert(rows == (blockIndex + 1) * rowsPerBlock - remainingInBlock);
                    _rowCount = rows;
                    if (pch != null)
                    {
                        pch.Checkpoint(rows);
                    }
                }

                toCompress.CompleteAdding();
            }
            catch (Exception ex)
            {
                exMarshaller.Set("cursoring", ex);
            }
        }
Пример #3
0
 private protected override JToken PredictedLabelPfa(string[] mapperOutputs)
 {
     Contracts.Assert(Utils.Size(mapperOutputs) == 1);
     return(PfaUtils.Call("a.argmax", mapperOutputs[0]));
 }
Пример #4
0
 public FixedSizeQueue(int capacity)
 {
     Contracts.Assert(capacity > 0, "Array capacity should be greater than zero");
     _array = new T[capacity];
     AssertValid();
 }
Пример #5
0
 public T PeekFirst()
 {
     AssertValid();
     Contracts.Assert(_count != 0, "Array is empty");
     return(_array[_startIndex]);
 }
 public static OneValueMap <TVal> CreatePrimitive <TVal>(PrimitiveType type)
 {
     Contracts.AssertValue(type);
     Contracts.Assert(type.RawType == typeof(TVal));
     return(new OneValueMap <TVal>(type));
 }
        protected override void CheckLabel(RoleMappedData data)
        {
            Contracts.AssertValue(data);
            // REVIEW: For floating point labels, this will make a pass over the data.
            // Should we instead leverage the pass made by the LBFGS base class? Ideally, it wouldn't
            // make a pass over the data...
            data.CheckMultiClassLabel(out _numClasses);

            // Initialize prior counts.
            _prior = new Double[_numClasses];

            // Try to get the label key values metedata.
            var schema            = data.Data.Schema;
            var labelIdx          = data.Schema.Label.Index;
            var labelMetadataType = schema.GetMetadataTypeOrNull(MetadataUtils.Kinds.KeyValues, labelIdx);

            if (labelMetadataType == null || !labelMetadataType.IsKnownSizeVector || !labelMetadataType.ItemType.IsText ||
                labelMetadataType.VectorSize != _numClasses)
            {
                _labelNames = null;
                return;
            }

            VBuffer <ReadOnlyMemory <char> > labelNames = default;

            schema.GetMetadata(MetadataUtils.Kinds.KeyValues, labelIdx, ref labelNames);

            // If label names is not dense or contain NA or default value, then it follows that
            // at least one class does not have a valid name for its label. If the label names we
            // try to get from the metadata are not unique, we may also not use them in model summary.
            // In both cases we set _labelNames to null and use the "Class_n", where n is the class number
            // for model summary saving instead.
            if (!labelNames.IsDense)
            {
                _labelNames = null;
                return;
            }

            _labelNames = new string[_numClasses];
            ReadOnlySpan <ReadOnlyMemory <char> > values = labelNames.GetValues();

            // This hashset is used to verify the uniqueness of label names.
            HashSet <string> labelNamesSet = new HashSet <string>();

            for (int i = 0; i < _numClasses; i++)
            {
                ReadOnlyMemory <char> value = values[i];
                if (value.IsEmpty)
                {
                    _labelNames = null;
                    break;
                }

                var vs = values[i].ToString();
                if (!labelNamesSet.Add(vs))
                {
                    _labelNames = null;
                    break;
                }

                _labelNames[i] = vs;

                Contracts.Assert(!string.IsNullOrEmpty(_labelNames[i]));
            }

            Contracts.Assert(_labelNames == null || _labelNames.Length == _numClasses);
        }
 private ObjectiveFunctionImplBase(Dataset trainData, Arguments args)
     : base(trainData, args, double.MaxValue) // No notion of maximum step size.
 {
     _labels = FastTreeRegressionTrainer.GetDatasetRegressionLabels(trainData);
     Contracts.Assert(_labels.Length == trainData.NumDocs);
 }
Пример #9
0
        public override bool CheckInvocation(TexlBinding binding, TexlNode[] args, DType[] argTypes, IErrorContainer errors, out DType returnType, out Dictionary <TexlNode, DType> nodeToCoercedTypeMap)
        {
            Contracts.AssertValue(args);
            Contracts.AssertAllValues(args);
            Contracts.AssertValue(argTypes);
            Contracts.Assert(args.Length == argTypes.Length);
            Contracts.AssertValue(errors);
            Contracts.Assert(MinArity <= args.Length && args.Length <= MaxArity);

            bool fValid = base.CheckInvocation(args, argTypes, errors, out returnType, out nodeToCoercedTypeMap);

            DType type0 = argTypes[0];
            DType type1 = argTypes[1];
            DType type2 = argTypes[2];
            DType type3 = argTypes[3];

            // Arg0 should be either a string or a column of strings.
            // Its type dictates the function return type.
            if (type0.IsTable)
            {
                // Ensure we have a one-column table of strings
                fValid &= CheckStringColumnType(type0, args[0], errors, ref nodeToCoercedTypeMap);
                // Borrow the return type from the 1st arg
                returnType = type0;
            }
            else
            {
                returnType = DType.CreateTable(new TypedName(DType.String, OneColumnTableResultName));
                if (!DType.String.Accepts(type0))
                {
                    if (type0.CoercesTo(DType.String))
                    {
                        CollectionUtils.Add(ref nodeToCoercedTypeMap, args[0], DType.String);
                    }
                    else
                    {
                        fValid = false;
                        errors.EnsureError(DocumentErrorSeverity.Severe, args[0], TexlStrings.ErrStringExpected);
                    }
                }
            }

            // Arg1 should be either a number or a column of numbers.
            if (type1.IsTable)
            {
                fValid &= CheckNumericColumnType(type1, args[1], errors, ref nodeToCoercedTypeMap);
            }
            else if (!DType.Number.Accepts(type1))
            {
                if (type1.CoercesTo(DType.Number))
                {
                    CollectionUtils.Add(ref nodeToCoercedTypeMap, args[1], DType.Number);
                }
                else
                {
                    fValid = false;
                    errors.EnsureError(DocumentErrorSeverity.Severe, args[1], TexlStrings.ErrNumberExpected);
                }
            }

            // Arg2 should be either a number or a column of numbers.
            if (type2.IsTable)
            {
                fValid &= CheckNumericColumnType(type2, args[2], errors, ref nodeToCoercedTypeMap);
            }
            else if (!DType.Number.Accepts(type2))
            {
                if (type2.CoercesTo(DType.Number))
                {
                    CollectionUtils.Add(ref nodeToCoercedTypeMap, args[2], DType.Number);
                }
                else
                {
                    fValid = false;
                    errors.EnsureError(DocumentErrorSeverity.Severe, args[2], TexlStrings.ErrNumberExpected);
                }
            }

            // Arg3 should be either a string or a column of strings.
            if (type3.IsTable)
            {
                fValid &= CheckStringColumnType(type3, args[3], errors, ref nodeToCoercedTypeMap);
            }
            else if (!DType.String.Accepts(type3))
            {
                if (type3.CoercesTo(DType.String))
                {
                    CollectionUtils.Add(ref nodeToCoercedTypeMap, args[3], DType.String);
                }
                else
                {
                    fValid = false;
                    errors.EnsureError(DocumentErrorSeverity.Severe, args[3], TexlStrings.ErrStringExpected);
                }
            }

            // At least one arg has to be a table.
            if (!type0.IsTable && !type1.IsTable && !type2.IsTable && !type3.IsTable)
            {
                fValid = false;
                errors.EnsureError(DocumentErrorSeverity.Severe, args[0], TexlStrings.ErrTypeError);
                errors.EnsureError(DocumentErrorSeverity.Severe, args[1], TexlStrings.ErrTypeError);
                errors.EnsureError(DocumentErrorSeverity.Severe, args[2], TexlStrings.ErrTypeError);
                errors.EnsureError(DocumentErrorSeverity.Severe, args[3], TexlStrings.ErrTypeError);
            }

            return(fValid);
        }
Пример #10
0
 public void Dispose()
 {
     Contracts.Assert(_host._allowMismatch);
     _host._allowMismatch = _allowMismatch;
 }
Пример #11
0
        protected bool CheckEqualityFromPathsCore(string relPath, string basePath, string outPath, int skip = 0, decimal precision = 10000000)
        {
            Contracts.Assert(skip >= 0);

            using (StreamReader baseline = OpenReader(basePath))
                using (StreamReader result = OpenReader(outPath))
                {
                    int count = 0;
                    if (skip > 0)
                    {
                        string line2;
                        do
                        {
                            line2 = result.ReadLine();
                            if (line2 == null)
                            {
                                Fail("Output is shorter than the skip value of {0}!", skip);
                                return(false);
                            }
                            count++;
                        } while (count <= skip);

                        string line1;
                        do
                        {
                            line1 = baseline.ReadLine();
                            if (line1 == null)
                            {
                                Fail("Couldn't match output file line to a line in the baseline!");
                                return(false);
                            }
                        } while (line1 != line2);
                    }

                    for (; ;)
                    {
                        // read lines while we can
                        string line1 = baseline.ReadLine();
                        string line2 = result.ReadLine();

                        if (line1 == null && line2 == null)
                        {
                            Log("Output matches baseline: '{0}'", relPath);
                            return(true);
                        }

                        count++;

                        if (!RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
                        {
                            GetNumbersFromFile(ref line1, ref line2, precision);
                        }

                        if (line1 != line2)
                        {
                            if (line1 == null || line2 == null)
                            {
                                Fail("Output and baseline different lengths: '{0}'", relPath);
                            }
                            else
                            {
                                Fail(_allowMismatch, "Output and baseline mismatch at line {1}: '{0}'", relPath, count);
                            }
                            return(false);
                        }
                    }
                }
        }
Пример #12
0
 internal PrimitiveType(Type rawType, DataKind rawKind)
     : base(rawType, rawKind)
 {
     Contracts.Assert(IsPrimitive);
     Contracts.Assert(!typeof(IDisposable).IsAssignableFrom(RawType));
 }
Пример #13
0
 internal StructuredType(Type rawType, DataKind rawKind)
     : base(rawType, rawKind)
 {
     Contracts.Assert(!IsPrimitive);
 }
Пример #14
0
 protected StructuredType(Type rawType)
     : base(rawType)
 {
     Contracts.Assert(!IsPrimitive);
 }
Пример #15
0
        private static void PrintSchema(TextWriter writer, Arguments args, Schema schema, ITransposeSchema tschema)
        {
            Contracts.AssertValue(writer);
            Contracts.AssertValue(args);
            Contracts.AssertValue(schema);
            Contracts.AssertValueOrNull(tschema);
#if !CORECLR
            if (args.ShowJson)
            {
                writer.WriteLine("Json Schema not supported.");
                return;
            }
#endif
            int colLim = schema.Count;

            var itw = new IndentedTextWriter(writer, "  ");
            itw.WriteLine("{0} columns:", colLim);
            using (itw.Nest())
            {
                var names = default(VBuffer <ReadOnlyMemory <char> >);
                for (int col = 0; col < colLim; col++)
                {
                    var name     = schema[col].Name;
                    var type     = schema[col].Type;
                    var slotType = tschema == null ? null : tschema.GetSlotType(col);
                    itw.WriteLine("{0}: {1}{2}", name, type, slotType == null ? "" : " (T)");

                    bool metaVals = args.ShowMetadataValues;
                    if (metaVals || args.ShowMetadataTypes)
                    {
                        ShowMetadata(itw, schema, col, metaVals);
                        continue;
                    }

                    if (!args.ShowSlots)
                    {
                        continue;
                    }
                    if (!type.IsKnownSizeVector())
                    {
                        continue;
                    }
                    ColumnType typeNames;
                    if ((typeNames = schema[col].Metadata.Schema.GetColumnOrNull(MetadataUtils.Kinds.SlotNames)?.Type) == null)
                    {
                        continue;
                    }
                    if (typeNames.GetVectorSize() != type.GetVectorSize() || !(typeNames.GetItemType() is TextType))
                    {
                        Contracts.Assert(false, "Unexpected slot names type");
                        continue;
                    }
                    schema[col].Metadata.GetValue(MetadataUtils.Kinds.SlotNames, ref names);
                    if (names.Length != type.GetVectorSize())
                    {
                        Contracts.Assert(false, "Unexpected length of slot names vector");
                        continue;
                    }

                    using (itw.Nest())
                    {
                        bool verbose = args.Verbose ?? false;
                        foreach (var kvp in names.Items(all: verbose))
                        {
                            if (verbose || !kvp.Value.IsEmpty)
                            {
                                itw.WriteLine("{0}:{1}", kvp.Key, kvp.Value);
                            }
                        }
                    }
                }
            }
        }
Пример #16
0
        /// <summary>
        /// Possible returns:
        ///
        /// Finite Value: no infinite value in the sliding window and at least a non NaN value
        /// NaN value: only NaN values in the sliding window or +/- Infinite
        /// Inifinite value: one infinite value in the sliding window (sign is no relevant)
        /// </summary>
        internal static Single ComputeMovingAverageUniform(FixedSizeQueue <Single> others, Single input, int lag,
                                                           Single lastDropped, ref Single currentSum,
                                                           ref bool initUniformMovingAverage,
                                                           ref int nbNanValues)
        {
            if (initUniformMovingAverage)
            {
                initUniformMovingAverage = false;
                return(ComputeMovingAverageUniformInitialisation(others, input, lag,
                                                                 lastDropped, ref currentSum, ref nbNanValues));
            }
            else
            {
                if (Single.IsNaN(lastDropped))
                {
                    --nbNanValues;
                }
                else if (!FloatUtils.IsFinite(lastDropped))
                {
                    // One infinite value left,
                    // we need to recompute everything as we don't know how many infinite values are in the sliding window.
                    return(ComputeMovingAverageUniformInitialisation(others, input, lag,
                                                                     lastDropped, ref currentSum, ref nbNanValues));
                }
                else
                {
                    currentSum -= lastDropped;
                }

                // lastDropped is finite
                Contracts.Assert(FloatUtils.IsFinite(lastDropped) || Single.IsNaN(lastDropped));

                var newValue = lag == 0 ? input : others[others.Count - lag];
                if (!Single.IsNaN(newValue) && !FloatUtils.IsFinite(newValue))
                {
                    // One infinite value entered,
                    // we need to recompute everything as we don't know how many infinite values are in the sliding window.
                    return(ComputeMovingAverageUniformInitialisation(others, input, lag,
                                                                     lastDropped, ref currentSum, ref nbNanValues));
                }

                // lastDropped is finite and input is finite or NaN
                Contracts.Assert(FloatUtils.IsFinite(newValue) || Single.IsNaN(newValue));

                if (!Single.IsNaN(currentSum) && !FloatUtils.IsFinite(currentSum))
                {
                    if (Single.IsNaN(newValue))
                    {
                        ++nbNanValues;
                        return(currentSum);
                    }
                    else
                    {
                        return(FloatUtils.IsFinite(newValue) ? currentSum : (currentSum + newValue));
                    }
                }

                // lastDropped is finite, input is finite or NaN, currentSum is finite or NaN
                Contracts.Assert(FloatUtils.IsFinite(currentSum) || Single.IsNaN(currentSum));

                if (Single.IsNaN(newValue))
                {
                    ++nbNanValues;
                    int nb = (lag == 0 ? others.Count + 1 : others.Count - lag + 1) - nbNanValues;
                    return(nb == 0 ? Single.NaN : currentSum / nb);
                }
                else
                {
                    int nb = lag == 0 ? others.Count + 1 - nbNanValues : others.Count + 1 - nbNanValues - lag;
                    currentSum += input;
                    return(nb == 0 ? Single.NaN : currentSum / nb);
                }
            }
        }
Пример #17
0
            private void EndLine(string defaultStr = null)
            {
                Contracts.Assert(_col == _pipes.Length);

                if (_dense)
                {
                    WriteDenseTo(_dstBase, defaultStr);
                    return;
                }

                // Find the sparse split point.
                // REVIEW: Should we allow splitting at any slot or only at column boundaries?
                // This currently does the latter.
                int    colBest   = 0;
                Double bestScore = _sparseWeight * _slotLim;

                for (int col = 1; col <= _pipes.Length; col++)
                {
                    int cd = _mpcoldst[col];
                    int cs = _slotLim - _mpcolslot[col];

                    Double score = cd + _sparseWeight * cs;
                    if (bestScore > score)
                    {
                        bestScore = score;
                        colBest   = col;
                    }
                }

                // If the length of the sparse section is small compared to the dense count,
                // don't bother with sparse.
                int lenDense  = _mpcoldst[colBest];
                int lenSparse = _dstBase - lenDense;

                if (lenSparse < 5 || lenSparse < lenDense / 5)
                {
                    colBest = _pipes.Length;
                }

                string sep = "";

                if (colBest > 0)
                {
                    WriteDenseTo(_mpcoldst[colBest], defaultStr);
                    sep = _sepStr;
                }

                if (colBest >= _pipes.Length)
                {
                    return;
                }

                // Write the rest sparsely.
                _writer.Write(sep);
                sep = _sepStr;
                _writer.Write(lenSparse);

                int slot = _mpcolslot[colBest];

                if (slot == _slotLim)
                {
                    // Need to write at least one sparse specification.
                    _writer.Write(sep);
                    _writer.Write("0:");
                    _writer.Write(defaultStr ?? _pipes[colBest].Default);
                    return;
                }

                int ichMin = slot > 0 ? _mpslotichLim[slot - 1] : 0;

                for (; slot < _slotLim; slot++)
                {
                    _writer.Write(sep);
                    _writer.Write(_mpslotdst[slot] - lenDense);
                    _writer.Write(':');
                    int ichLim = _mpslotichLim[slot];
                    _writer.Write(_rgch, ichMin, ichLim - ichMin);
                    ichMin = ichLim;
                }
            }
        private ValueGetter <VBuffer <ushort> > MakeGetterVec(IRow input, int iinfo)
        {
            Host.AssertValue(input);
            Host.Assert(Infos[iinfo].TypeSrc.IsVector);
            Host.Assert(Infos[iinfo].TypeSrc.ItemType.IsText);

            int cv = Infos[iinfo].TypeSrc.VectorSize;

            Contracts.Assert(cv >= 0);

            var getSrc = GetSrcGetter <VBuffer <DvText> >(input, iinfo);
            var src    = default(VBuffer <DvText>);

            return
                ((ref VBuffer <ushort> dst) =>
            {
                getSrc(ref src);

                int len = 0;
                for (int i = 0; i < src.Count; i++)
                {
                    if (src.Values[i].HasChars)
                    {
                        len += src.Values[i].Length;
                        if (_useMarkerChars)
                        {
                            len += TextMarkersCount;
                        }
                    }
                }

                var values = dst.Values;
                if (len > 0)
                {
                    if (Utils.Size(values) < len)
                    {
                        values = new ushort[len];
                    }

                    int index = 0;
                    for (int i = 0; i < src.Count; i++)
                    {
                        if (!src.Values[i].HasChars)
                        {
                            continue;
                        }
                        if (_useMarkerChars)
                        {
                            values[index++] = TextStartMarker;
                        }
                        for (int ich = 0; ich < src.Values[i].Length; ich++)
                        {
                            values[index++] = src.Values[i][ich];
                        }
                        if (_useMarkerChars)
                        {
                            values[index++] = TextEndMarker;
                        }
                    }
                    Contracts.Assert(index == len);
                }

                dst = new VBuffer <ushort>(len, values, dst.Indices);
            });
        }
 public static VecValueMap <TVal> CreateVector <TVal>(VectorType type)
 {
     Contracts.AssertValue(type);
     Contracts.Assert(type.ItemType.RawType == typeof(TVal));
     return(new VecValueMap <TVal>(type));
 }
Пример #20
0
        public void TransposerTest()
        {
            const int            rowCount = 1000;
            Random               rgen     = new Random(0);
            ArrayDataViewBuilder builder  = new ArrayDataViewBuilder(Env);

            // A is to check the splitting of a sparse-ish column.
            var dataA = GenerateHelper(rowCount, 0.1, rgen, () => (int)rgen.Next(), 50, 5, 10, 15);

            dataA[rowCount / 2] = new VBuffer <int>(50, 0, null, null); // Coverage for the null vbuffer case.
            builder.AddColumn("A", NumberType.I4, dataA);
            // B is to check the splitting of a dense-ish column.
            builder.AddColumn("B", NumberType.R8, GenerateHelper(rowCount, 0.8, rgen, rgen.NextDouble, 50, 0, 25, 49));
            // C is to just have some column we do nothing with.
            builder.AddColumn("C", NumberType.I2, GenerateHelper(rowCount, 0.1, rgen, () => (short)1, 30, 3, 10, 24));
            // D is to check some column we don't have to split because it's sufficiently small.
            builder.AddColumn("D", NumberType.R8, GenerateHelper(rowCount, 0.1, rgen, rgen.NextDouble, 3, 1));
            // E is to check a sparse scalar column.
            builder.AddColumn("E", NumberType.U4, GenerateHelper(rowCount, 0.1, rgen, () => (uint)rgen.Next(int.MinValue, int.MaxValue)));
            // F is to check a dense-ish scalar column.
            builder.AddColumn("F", NumberType.I4, GenerateHelper(rowCount, 0.8, rgen, () => rgen.Next()));

            IDataView view = builder.GetDataView();

            // Do not force save. This will have a mix of passthrough and saved columns. Note that duplicate
            // specification of "D" to test that specifying a column twice has no ill effects.
            string[] names = { "B", "A", "E", "D", "F", "D" };
            using (Transposer trans = Transposer.Create(Env, view, false, names))
            {
                // Before checking the contents, check the names.
                for (int i = 0; i < names.Length; ++i)
                {
                    int index;
                    Assert.True(trans.Schema.TryGetColumnIndex(names[i], out index), $"Transpose schema couldn't find column '{names[i]}'");
                    int  trueIndex;
                    bool result = view.Schema.TryGetColumnIndex(names[i], out trueIndex);
                    Contracts.Assert(result);
                    Assert.True(trueIndex == index, $"Transpose schema had column '{names[i]}' at unexpected index");
                }
                // Check the contents
                Assert.Null(trans.TransposeSchema.GetSlotType(2)); // C check to see that it's not transposable.
                TransposeCheckHelper <int>(view, 0, trans);        // A check.
                TransposeCheckHelper <Double>(view, 1, trans);     // B check.
                TransposeCheckHelper <Double>(view, 3, trans);     // D check.
                TransposeCheckHelper <uint>(view, 4, trans);       // E check.
                TransposeCheckHelper <int>(view, 5, trans);        // F check.
            }

            // Force save. Recheck columns that would have previously been passthrough columns.
            // The primary benefit of this check is that we check the binary saving / loading
            // functionality of scalars which are otherwise always must necessarily be
            // passthrough. Also exercise the select by index functionality while we're at it.
            using (Transposer trans = Transposer.Create(Env, view, true, 3, 5, 4))
            {
                // Check to see that A, B, and C were not transposed somehow.
                Assert.Null(trans.TransposeSchema.GetSlotType(0));
                Assert.Null(trans.TransposeSchema.GetSlotType(1));
                Assert.Null(trans.TransposeSchema.GetSlotType(2));
                TransposeCheckHelper <Double>(view, 3, trans); // D check.
                TransposeCheckHelper <uint>(view, 4, trans);   // E check.
                TransposeCheckHelper <int>(view, 5, trans);    // F check.
            }
        }
        /// <summary>
        /// Consume the characters of the next token and append them to the string builder.
        /// </summary>
        public void GetToken(StringBuilder bldr)
        {
            int ichDst = bldr.Length;

            // Skip spaces, comments, etc.
            SkipWhiteSpace();

            while (!_curs.Eof)
            {
                char ch = _curs.ChCur;
                switch (ch)
                {
                    case '{':
                        if (bldr.Length == ichDst)
                            GatherCurlyGroup(bldr);
                        return;

                    case '}':
                        if (bldr.Length == ichDst)
                        {
                            bldr.Append(ch);
                            _curs.ChNext();
                            // Naked } is an error.
                            _error = true;
                        }
                        return;

                    case '=':
                        if (bldr.Length == ichDst)
                        {
                            bldr.Append(ch);
                            _curs.ChNext();
                        }
                        return;

                    case '\\':
                        if (_escapes)
                        {
                            GatherSlash(bldr, true);
                            continue;
                        }
                        break;

                    case '"':
                        GatherString(bldr, true);
                        continue;

                    case '#':
                        // Since we skipped comments, we should only get here if we've collected something.
                        Contracts.Assert(bldr.Length > ichDst);
                        return;

                    default:
                        if (char.IsWhiteSpace(ch))
                            return;
                        break;
                }

                bldr.Append(ch);
                _curs.ChNext();
            }
        }
Пример #22
0
 public override void Fill(int idx)
 {
     Contracts.Assert(0 <= idx && idx < Buffer.Length);
     _getter(ref Buffer[idx]);
 }
Пример #23
0
 private void AssertValid()
 {
     Contracts.Assert(Utils.Size(_array) >= 0);
     Contracts.Assert(0 <= _startIndex && _startIndex < _array.Length);
     Contracts.Assert(0 <= _count && _count <= _array.Length);
 }
Пример #24
0
 /// <summary>
 /// Copies the values stored at an index through a previous <see cref="Fill"/> method,
 /// call to a value.
 /// </summary>
 public void Fetch(int idx, ref T value)
 {
     Contracts.Assert(0 <= idx && idx < Buffer.Length);
     Copy(in Buffer[idx], ref value);
 }
Пример #25
0
 public T PeekLast()
 {
     AssertValid();
     Contracts.Assert(_count != 0, "Array is empty");
     return(_array[(_startIndex + _count - 1) % _array.Length]);
 }
Пример #26
0
 protected override ColumnType GetColumnTypeCore(int iinfo)
 {
     Contracts.Assert(0 <= iinfo & iinfo < InfoCount);
     return(UseCounter[iinfo] ? NumberType.I8 : NumberType.Float);
 }
Пример #27
0
 private void RegisterOtherCodec(string name, GetCodecFromStreamDelegate fn)
 {
     Contracts.Assert(!_loadNameToCodecCreator.ContainsKey(name));
     _loadNameToCodecCreator.Add(name, fn);
 }
Пример #28
0
 private void IsNormalized(int iinfo, ref bool dst)
 {
     Contracts.Assert(0 <= iinfo & iinfo < InfoCount);
     dst = true;
 }
Пример #29
0
 // Builds a mask with bits below ibit all set. Only works for 0 <= ibit < 32.
 // Use MaskBelowEx to extend to 0 <= ibit < 64, in particular, for 32.
 public static uint UMaskBelow(int ibit)
 {
     Contracts.Assert(0 <= ibit && ibit < CbitUint, "UMaskBelow is designed to work for 0 <= ibit < 32");
     return (uint)(1U << ibit) - 1;
 }
            public float[][] GetScores(IDataView input, string labelColumnName, string[] columns, int numBins, int[] colSizes)
            {
                _numBins = numBins;
                var schema = input.Schema;
                var size   = columns.Length;

                if (!schema.TryGetColumnIndex(labelColumnName, out int labelCol))
                {
                    throw _host.ExceptUserArg(nameof(MutualInformationFeatureSelectingEstimator.Arguments.LabelColumn),
                                              "Label column '{0}' not found", labelColumnName);
                }

                var labelType = schema[labelCol].Type;

                if (!IsValidColumnType(labelType))
                {
                    throw _host.ExceptUserArg(nameof(MutualInformationFeatureSelectingEstimator.Arguments.LabelColumn),
                                              "Label column '{0}' does not have compatible type", labelColumnName);
                }

                var colSrcs = new int[size + 1];

                colSrcs[size] = labelCol;
                for (int i = 0; i < size; i++)
                {
                    var colName = columns[i];
                    if (!schema.TryGetColumnIndex(colName, out int colSrc))
                    {
                        throw _host.ExceptUserArg(nameof(MutualInformationFeatureSelectingEstimator.Arguments.Column),
                                                  "Source column '{0}' not found", colName);
                    }

                    var colType = schema[colSrc].Type;
                    if (colType is VectorType vectorType && !vectorType.IsKnownSize)
                    {
                        throw _host.ExceptUserArg(nameof(MutualInformationFeatureSelectingEstimator.Arguments.Column),
                                                  "Variable length column '{0}' is not allowed", colName);
                    }

                    if (!IsValidColumnType(colType.GetItemType()))
                    {
                        throw _host.ExceptUserArg(nameof(MutualInformationFeatureSelectingEstimator.Arguments.Column),
                                                  "Column '{0}' of type '{1}' does not have compatible type.", colName, colType);
                    }

                    colSrcs[i]  = colSrc;
                    colSizes[i] = colType.GetValueCount();
                }

                var scores = new float[size][];

                using (var ch = _host.Start("Computing mutual information scores"))
                    using (var pch = _host.StartProgressChannel("Computing mutual information scores"))
                    {
                        using (var trans = Transposer.Create(_host, input, false, colSrcs))
                        {
                            int i      = 0;
                            var header = new ProgressHeader(new[] { "columns" });
                            var b      = trans.Schema.TryGetColumnIndex(labelColumnName, out labelCol);
                            Contracts.Assert(b);

                            GetLabels(trans, labelType, labelCol);
                            _contingencyTable = new int[_numLabels][];
                            _labelSums        = new int[_numLabels];
                            pch.SetHeader(header, e => e.SetProgress(0, i, size));
                            for (i = 0; i < size; i++)
                            {
                                b = trans.Schema.TryGetColumnIndex(columns[i], out int col);
                                Contracts.Assert(b);
                                ch.Trace("Computing scores for column '{0}'", columns[i]);
                                scores[i] = ComputeMutualInformation(trans, col);
#if DEBUG
                                ch.Trace("Scores for column '{0}': {1}", columns[i], string.Join(", ", scores[i]));
#endif
                                pch.Checkpoint(i + 1);
                            }
                        }
                    }

                return(scores);
            }