Beispiel #1
0
        public string GetContent(int ifeat)
        {
            Contracts.Assert(0 <= ifeat && ifeat < Count);
            DvText content = _content.GetItemOrDefault(ifeat);

            return(content.HasChars ? content.ToString() : DatasetUtils.GetDefaultTransform(GetName(ifeat)));
        }
Beispiel #2
0
        public string GetName(int ifeat)
        {
            Contracts.Assert(0 <= ifeat && ifeat < Count);
            DvText name = _names.GetItemOrDefault(ifeat);

            return(name.HasChars ? name.ToString() : string.Format("f{0}", ifeat));
        }
        public void CanSuccessfullyRetrieveQuotedData()
        {
            string dataPath = GetDataPath("QuotingData.csv");
            var    loader   = new Data.TextLoader(dataPath).CreateFrom <QuoteInput>(useHeader: true, separator: ',', allowQuotedStrings: true, supportSparse: false);

            using (var environment = new TlcEnvironment())
            {
                Experiment experiment            = environment.CreateExperiment();
                ILearningPipelineDataStep output = loader.ApplyStep(null, experiment) as ILearningPipelineDataStep;

                experiment.Compile();
                loader.SetInput(environment, experiment);
                experiment.Run();

                IDataView data = experiment.GetOutput(output.Data);
                Assert.NotNull(data);

                using (var cursor = data.GetRowCursor((a => true)))
                {
                    var IDGetter   = cursor.GetGetter <float>(0);
                    var TextGetter = cursor.GetGetter <DvText>(1);

                    Assert.True(cursor.MoveNext());

                    float ID = 0;
                    IDGetter(ref ID);
                    Assert.Equal(1, ID);

                    DvText Text = new DvText();
                    TextGetter(ref Text);
                    Assert.Equal("This text contains comma, within quotes.", Text.ToString());

                    Assert.True(cursor.MoveNext());

                    ID = 0;
                    IDGetter(ref ID);
                    Assert.Equal(2, ID);

                    Text = new DvText();
                    TextGetter(ref Text);
                    Assert.Equal("This text contains extra punctuations and special characters.;*<>?!@#$%^&*()_+=-{}|[]:;'", Text.ToString());

                    Assert.True(cursor.MoveNext());

                    ID = 0;
                    IDGetter(ref ID);
                    Assert.Equal(3, ID);

                    Text = new DvText();
                    TextGetter(ref Text);
                    Assert.Equal("This text has no quotes", Text.ToString());

                    Assert.False(cursor.MoveNext());
                }
            }
        }
Beispiel #4
0
            private static Action <TRow> CreateTextToStringSetter(IRow input, int col, Delegate poke)
            {
                var getter    = input.GetGetter <DvText>(col);
                var typedPoke = poke as Poke <TRow, string>;

                Contracts.AssertValue(typedPoke);
                DvText value = default(DvText);

                return(row =>
                {
                    getter(ref value);
                    typedPoke(row, value.ToString());
                });
            }
        protected override Delegate GetGetterCore(IChannel ch, IRow input, int iinfo, out Action disposer)
        {
            Host.AssertValue(ch, nameof(ch));
            Host.AssertValue(input);
            Host.Assert(0 <= iinfo && iinfo < Infos.Length);
            disposer = null;

            var    getSrc            = GetSrcGetter <DvText>(input, iinfo);
            DvText src               = default;
            ValueGetter <Bitmap> del =
                (ref Bitmap dst) =>
            {
                if (dst != null)
                {
                    dst.Dispose();
                    dst = null;
                }

                getSrc(ref src);

                if (src.Length > 0)
                {
                    // Catch exceptions and pass null through. Should also log failures...
                    try
                    {
                        string path = src.ToString();
                        if (!string.IsNullOrWhiteSpace(_imageFolder))
                        {
                            path = Path.Combine(_imageFolder, path);
                        }
                        dst = new Bitmap(path);
                    }
                    catch (Exception e)
                    {
                        // REVIEW: We catch everything since the documentation for new Bitmap(string)
                        // appears to be incorrect. When the file isn't found, it throws an ArgumentException,
                        // while the documentation says FileNotFoundException. Not sure what it will throw
                        // in other cases, like corrupted file, etc.

                        // REVIEW : Log failures.
                        ch.Info(e.Message);
                        ch.Info(e.StackTrace);
                        dst = null;
                    }
                }
            };

            return(del);
        }
            protected override Delegate MakeGetter(IRow input, int iinfo, out Action disposer)
            {
                Contracts.AssertValue(input);
                Contracts.Assert(0 <= iinfo && iinfo < _parent.ColumnPairs.Length);

                disposer = null;
                var    getSrc            = input.GetGetter <DvText>(ColMapNewToOld[iinfo]);
                DvText src               = default;
                ValueGetter <Bitmap> del =
                    (ref Bitmap dst) =>
                {
                    if (dst != null)
                    {
                        dst.Dispose();
                        dst = null;
                    }

                    getSrc(ref src);

                    if (src.Length > 0)
                    {
                        // Catch exceptions and pass null through. Should also log failures...
                        try
                        {
                            string path = src.ToString();
                            if (!string.IsNullOrWhiteSpace(_parent.ImageFolder))
                            {
                                path = Path.Combine(_parent.ImageFolder, path);
                            }
                            dst = new Bitmap(path);
                        }
                        catch (Exception)
                        {
                            // REVIEW: We catch everything since the documentation for new Bitmap(string)
                            // appears to be incorrect. When the file isn't found, it throws an ArgumentException,
                            // while the documentation says FileNotFoundException. Not sure what it will throw
                            // in other cases, like corrupted file, etc.

                            // REVIEW : Log failures.
                            dst = null;
                        }
                    }
                };

                return(del);
            }
        public void CanSuccessfullyTrimSpaces()
        {
            string dataPath = GetDataPath("TrimData.csv");
            var    loader   = new Data.TextLoader(dataPath).CreateFrom <QuoteInput>(useHeader: true, separator: ',', allowQuotedStrings: false, supportSparse: false, trimWhitespace: true);

            using (var environment = new TlcEnvironment())
            {
                Experiment experiment            = environment.CreateExperiment();
                ILearningPipelineDataStep output = loader.ApplyStep(null, experiment) as ILearningPipelineDataStep;

                experiment.Compile();
                loader.SetInput(environment, experiment);
                experiment.Run();

                IDataView data = experiment.GetOutput(output.Data);
                Assert.NotNull(data);

                using (var cursor = data.GetRowCursor((a => true)))
                {
                    var IDGetter   = cursor.GetGetter <float>(0);
                    var TextGetter = cursor.GetGetter <DvText>(1);

                    Assert.True(cursor.MoveNext());

                    float ID = 0;
                    IDGetter(ref ID);
                    Assert.Equal(1, ID);

                    DvText Text = new DvText();
                    TextGetter(ref Text);
                    Assert.Equal("There is a space at the end", Text.ToString());

                    Assert.True(cursor.MoveNext());

                    ID = 0;
                    IDGetter(ref ID);
                    Assert.Equal(2, ID);

                    Text = new DvText();
                    TextGetter(ref Text);
                    Assert.Equal("There is no space at the end", Text.ToString());

                    Assert.False(cursor.MoveNext());
                }
            }
        }
        public static NumericColumn Operation(NumericColumn c1, DvText value)
        {
            switch (c1.Kind)
            {
            case DataKind.TX:
            {
                DvText[]            a;
                DataColumn <DvText> res;
                Operation(c1, out a, out res);
                for (int i = 0; i < res.Length; ++i)
                {
                    res.Set(i, new DvText(a[i].ToString() + value.ToString()));
                }
                return(new NumericColumn(res));
            }

            default:
                throw new DataTypeError(string.Format("{0} not implemented for column {1}.", OperationName, c1.Kind));
            }
        }
Beispiel #9
0
                private bool?IsLookLikeHeader(DvText value)
                {
                    var v = value.ToString();

                    if (v.Length > 100)
                    {
                        return(false);
                    }
                    var headerCandidates = new[] { "^Label", "^Feature", "^Market", "^m_", "^Weight" };

                    foreach (var candidate in headerCandidates)
                    {
                        if (Regex.IsMatch(v, candidate, RegexOptions.IgnoreCase))
                        {
                            return(true);
                        }
                    }

                    return(null);
                }
        public static PipelineResultRow[] ExtractResults(IHostEnvironment env, IDataView data, string graphColName, string metricColName, string idColName)
        {
            var results = new List <PipelineResultRow>();
            var schema  = data.Schema;

            if (!schema.TryGetColumnIndex(graphColName, out var graphCol))
            {
                throw env.ExceptNotSupp($"Column name {graphColName} not found");
            }
            if (!schema.TryGetColumnIndex(metricColName, out var metricCol))
            {
                throw env.ExceptNotSupp($"Column name {metricColName} not found");
            }
            if (!schema.TryGetColumnIndex(idColName, out var pipelineIdCol))
            {
                throw env.ExceptNotSupp($"Column name {idColName} not found");
            }

            using (var cursor = data.GetRowCursor(col => true))
            {
                while (cursor.MoveNext())
                {
                    var    getter1     = cursor.GetGetter <double>(metricCol);
                    double metricValue = 0;
                    getter1(ref metricValue);
                    var    getter2   = cursor.GetGetter <DvText>(graphCol);
                    DvText graphJson = new DvText();
                    getter2(ref graphJson);
                    var    getter3    = cursor.GetGetter <DvText>(pipelineIdCol);
                    DvText pipelineId = new DvText();
                    getter3(ref pipelineId);
                    results.Add(new PipelineResultRow(graphJson.ToString(), metricValue, pipelineId.ToString()));
                }
            }

            return(results.ToArray());
        }
Beispiel #11
0
        public void TestSaveImages()
        {
            using (var env = new ConsoleEnvironment())
            {
                var dataFile    = GetDataPath("images/images.tsv");
                var imageFolder = Path.GetDirectoryName(dataFile);
                var data        = env.CreateLoader("Text{col=ImagePath:TX:0 col=Name:TX:1}", new MultiFileSource(dataFile));
                var images      = ImageLoaderTransform.Create(env, new ImageLoaderTransform.Arguments()
                {
                    Column = new ImageLoaderTransform.Column[1]
                    {
                        new ImageLoaderTransform.Column()
                        {
                            Source = "ImagePath", Name = "ImageReal"
                        }
                    },
                    ImageFolder = imageFolder
                }, data);

                IDataView cropped = ImageResizerTransform.Create(env, new ImageResizerTransform.Arguments()
                {
                    Column = new ImageResizerTransform.Column[1] {
                        new ImageResizerTransform.Column()
                        {
                            Name = "ImageCropped", Source = "ImageReal", ImageHeight = 100, ImageWidth = 100, Resizing = ImageResizerTransform.ResizingKind.IsoPad
                        }
                    }
                }, images);

                cropped.Schema.TryGetColumnIndex("ImagePath", out int pathColumn);
                cropped.Schema.TryGetColumnIndex("ImageCropped", out int cropBitmapColumn);
                using (var cursor = cropped.GetRowCursor((x) => true))
                {
                    var    pathGetter       = cursor.GetGetter <DvText>(pathColumn);
                    DvText path             = default;
                    var    bitmapCropGetter = cursor.GetGetter <Bitmap>(cropBitmapColumn);
                    Bitmap bitmap           = default;
                    while (cursor.MoveNext())
                    {
                        pathGetter(ref path);
                        bitmapCropGetter(ref bitmap);
                        Assert.NotNull(bitmap);
                        var fileToSave = GetOutputPath(Path.GetFileNameWithoutExtension(path.ToString()) + ".cropped.jpg");
                        bitmap.Save(fileToSave, System.Drawing.Imaging.ImageFormat.Jpeg);
                    }
                }
            }
            Done();
        }
        public static Type InferPredictorCategoryType(IDataView data, PurposeInference.Column[] columns)
        {
            List <PurposeInference.Column> labels = columns.Where(col => col.Purpose == ColumnPurpose.Label).ToList();

            if (labels.Count == 0)
            {
                return(typeof(SignatureClusteringTrainer));
            }

            if (labels.Count > 1)
            {
                return(typeof(SignatureMultiOutputRegressorTrainer));
            }

            PurposeInference.Column label             = labels.First();
            HashSet <string>        uniqueLabelValues = new HashSet <string>();

            data = data.Take(1000);
            using (var cursor = data.GetRowCursor(index => index == label.ColumnIndex))
            {
                ValueGetter <DvText> getter = DataViewUtils.PopulateGetterArray(cursor, new List <int> {
                    label.ColumnIndex
                })[0];
                while (cursor.MoveNext())
                {
                    var currentLabel = new DvText();
                    getter(ref currentLabel);
                    string currentLabelString = currentLabel.ToString();
                    if (!String.IsNullOrEmpty(currentLabelString) && !uniqueLabelValues.Contains(currentLabelString))
                    {
                        uniqueLabelValues.Add(currentLabelString);
                    }
                }
            }

            if (uniqueLabelValues.Count == 1)
            {
                return(typeof(SignatureAnomalyDetectorTrainer));
            }

            if (uniqueLabelValues.Count == 2)
            {
                return(typeof(SignatureBinaryClassifierTrainer));
            }

            if (uniqueLabelValues.Count > 2)
            {
                if ((label.ItemKind == DataKind.R4) &&
                    uniqueLabelValues.Any(val =>
                {
                    float fVal;
                    return(float.TryParse(val, out fVal) && (fVal > 50 || fVal < 0 || val.Contains('.')));
                }))
                {
                    return(typeof(SignatureRegressorTrainer));
                }

                if (label.ItemKind == DataKind.R4 ||
                    label.ItemKind == DataKind.TX ||
                    data.Schema.GetColumnType(label.ColumnIndex).IsKey)
                {
                    if (columns.Any(col => col.Purpose == ColumnPurpose.Group))
                    {
                        return(typeof(SignatureRankerTrainer));
                    }
                    else
                    {
                        return(typeof(SignatureMultiClassClassifierTrainer));
                    }
                }
            }

            return(null);
        }
Beispiel #13
0
 private static ByteString StringToByteString(DvText str) => ByteString.CopyFrom(Encoding.UTF8.GetBytes(str.ToString()));
        public static PipelineResultRow[] ExtractResults(IHostEnvironment env, IDataView data,
                                                         string graphColName, string metricColName, string idColName, string trainingMetricColName,
                                                         string firstInputColName, string predictorModelColName)
        {
            var results = new List <PipelineResultRow>();
            var schema  = data.Schema;

            if (!schema.TryGetColumnIndex(graphColName, out var graphCol))
            {
                throw env.ExceptParam(nameof(graphColName), $"Column name {graphColName} not found");
            }
            if (!schema.TryGetColumnIndex(metricColName, out var metricCol))
            {
                throw env.ExceptParam(nameof(metricColName), $"Column name {metricColName} not found");
            }
            if (!schema.TryGetColumnIndex(trainingMetricColName, out var trainingMetricCol))
            {
                throw env.ExceptParam(nameof(trainingMetricColName), $"Column name {trainingMetricColName} not found");
            }
            if (!schema.TryGetColumnIndex(idColName, out var pipelineIdCol))
            {
                throw env.ExceptParam(nameof(idColName), $"Column name {idColName} not found");
            }
            if (!schema.TryGetColumnIndex(firstInputColName, out var firstInputCol))
            {
                throw env.ExceptParam(nameof(firstInputColName), $"Column name {firstInputColName} not found");
            }
            if (!schema.TryGetColumnIndex(predictorModelColName, out var predictorModelCol))
            {
                throw env.ExceptParam(nameof(predictorModelColName), $"Column name {predictorModelColName} not found");
            }

            using (var cursor = data.GetRowCursor(col => true))
            {
                var    getter1             = cursor.GetGetter <double>(metricCol);
                var    getter2             = cursor.GetGetter <DvText>(graphCol);
                var    getter3             = cursor.GetGetter <DvText>(pipelineIdCol);
                var    getter4             = cursor.GetGetter <double>(trainingMetricCol);
                var    getter5             = cursor.GetGetter <DvText>(firstInputCol);
                var    getter6             = cursor.GetGetter <DvText>(predictorModelCol);
                double metricValue         = 0;
                double trainingMetricValue = 0;
                DvText graphJson           = new DvText();
                DvText pipelineId          = new DvText();
                DvText firstInput          = new DvText();
                DvText predictorModel      = new DvText();

                while (cursor.MoveNext())
                {
                    getter1(ref metricValue);
                    getter2(ref graphJson);
                    getter3(ref pipelineId);
                    getter4(ref trainingMetricValue);
                    getter5(ref firstInput);
                    getter6(ref predictorModel);

                    results.Add(new PipelineResultRow(graphJson.ToString(),
                                                      metricValue, pipelineId.ToString(), trainingMetricValue,
                                                      firstInput.ToString(), predictorModel.ToString()));
                }
            }

            return(results.ToArray());
        }
Beispiel #15
0
        public void CanSuccessfullyEnumerated()
        {
            var collection = CollectionDataSource.Create(new List <Input>()
            {
                new Input {
                    Number1 = 1, String1 = "1"
                },
                new Input {
                    Number1 = 2, String1 = "2"
                },
                new Input {
                    Number1 = 3, String1 = "3"
                }
            });

            using (var environment = new TlcEnvironment())
            {
                Experiment experiment            = environment.CreateExperiment();
                ILearningPipelineDataStep output = collection.ApplyStep(null, experiment) as ILearningPipelineDataStep;

                experiment.Compile();
                collection.SetInput(environment, experiment);
                experiment.Run();

                IDataView data = experiment.GetOutput(output.Data);
                Assert.NotNull(data);

                using (var cursor = data.GetRowCursor((a => true)))
                {
                    var IDGetter   = cursor.GetGetter <float>(0);
                    var TextGetter = cursor.GetGetter <DvText>(1);

                    Assert.True(cursor.MoveNext());

                    float ID = 0;
                    IDGetter(ref ID);
                    Assert.Equal(1, ID);

                    DvText Text = new DvText();
                    TextGetter(ref Text);
                    Assert.Equal("1", Text.ToString());

                    Assert.True(cursor.MoveNext());

                    ID = 0;
                    IDGetter(ref ID);
                    Assert.Equal(2, ID);

                    Text = new DvText();
                    TextGetter(ref Text);
                    Assert.Equal("2", Text.ToString());

                    Assert.True(cursor.MoveNext());

                    ID = 0;
                    IDGetter(ref ID);
                    Assert.Equal(3, ID);

                    Text = new DvText();
                    TextGetter(ref Text);
                    Assert.Equal("3", Text.ToString());

                    Assert.False(cursor.MoveNext());
                }
            }
        }