public string GetContent(int ifeat) { Contracts.Assert(0 <= ifeat && ifeat < Count); DvText content = _content.GetItemOrDefault(ifeat); return(content.HasChars ? content.ToString() : DatasetUtils.GetDefaultTransform(GetName(ifeat))); }
public string GetName(int ifeat) { Contracts.Assert(0 <= ifeat && ifeat < Count); DvText name = _names.GetItemOrDefault(ifeat); return(name.HasChars ? name.ToString() : string.Format("f{0}", ifeat)); }
public void CanSuccessfullyRetrieveQuotedData() { string dataPath = GetDataPath("QuotingData.csv"); var loader = new Data.TextLoader(dataPath).CreateFrom <QuoteInput>(useHeader: true, separator: ',', allowQuotedStrings: true, supportSparse: false); using (var environment = new TlcEnvironment()) { Experiment experiment = environment.CreateExperiment(); ILearningPipelineDataStep output = loader.ApplyStep(null, experiment) as ILearningPipelineDataStep; experiment.Compile(); loader.SetInput(environment, experiment); experiment.Run(); IDataView data = experiment.GetOutput(output.Data); Assert.NotNull(data); using (var cursor = data.GetRowCursor((a => true))) { var IDGetter = cursor.GetGetter <float>(0); var TextGetter = cursor.GetGetter <DvText>(1); Assert.True(cursor.MoveNext()); float ID = 0; IDGetter(ref ID); Assert.Equal(1, ID); DvText Text = new DvText(); TextGetter(ref Text); Assert.Equal("This text contains comma, within quotes.", Text.ToString()); Assert.True(cursor.MoveNext()); ID = 0; IDGetter(ref ID); Assert.Equal(2, ID); Text = new DvText(); TextGetter(ref Text); Assert.Equal("This text contains extra punctuations and special characters.;*<>?!@#$%^&*()_+=-{}|[]:;'", Text.ToString()); Assert.True(cursor.MoveNext()); ID = 0; IDGetter(ref ID); Assert.Equal(3, ID); Text = new DvText(); TextGetter(ref Text); Assert.Equal("This text has no quotes", Text.ToString()); Assert.False(cursor.MoveNext()); } } }
private static Action <TRow> CreateTextToStringSetter(IRow input, int col, Delegate poke) { var getter = input.GetGetter <DvText>(col); var typedPoke = poke as Poke <TRow, string>; Contracts.AssertValue(typedPoke); DvText value = default(DvText); return(row => { getter(ref value); typedPoke(row, value.ToString()); }); }
protected override Delegate GetGetterCore(IChannel ch, IRow input, int iinfo, out Action disposer) { Host.AssertValue(ch, nameof(ch)); Host.AssertValue(input); Host.Assert(0 <= iinfo && iinfo < Infos.Length); disposer = null; var getSrc = GetSrcGetter <DvText>(input, iinfo); DvText src = default; ValueGetter <Bitmap> del = (ref Bitmap dst) => { if (dst != null) { dst.Dispose(); dst = null; } getSrc(ref src); if (src.Length > 0) { // Catch exceptions and pass null through. Should also log failures... try { string path = src.ToString(); if (!string.IsNullOrWhiteSpace(_imageFolder)) { path = Path.Combine(_imageFolder, path); } dst = new Bitmap(path); } catch (Exception e) { // REVIEW: We catch everything since the documentation for new Bitmap(string) // appears to be incorrect. When the file isn't found, it throws an ArgumentException, // while the documentation says FileNotFoundException. Not sure what it will throw // in other cases, like corrupted file, etc. // REVIEW : Log failures. ch.Info(e.Message); ch.Info(e.StackTrace); dst = null; } } }; return(del); }
protected override Delegate MakeGetter(IRow input, int iinfo, out Action disposer) { Contracts.AssertValue(input); Contracts.Assert(0 <= iinfo && iinfo < _parent.ColumnPairs.Length); disposer = null; var getSrc = input.GetGetter <DvText>(ColMapNewToOld[iinfo]); DvText src = default; ValueGetter <Bitmap> del = (ref Bitmap dst) => { if (dst != null) { dst.Dispose(); dst = null; } getSrc(ref src); if (src.Length > 0) { // Catch exceptions and pass null through. Should also log failures... try { string path = src.ToString(); if (!string.IsNullOrWhiteSpace(_parent.ImageFolder)) { path = Path.Combine(_parent.ImageFolder, path); } dst = new Bitmap(path); } catch (Exception) { // REVIEW: We catch everything since the documentation for new Bitmap(string) // appears to be incorrect. When the file isn't found, it throws an ArgumentException, // while the documentation says FileNotFoundException. Not sure what it will throw // in other cases, like corrupted file, etc. // REVIEW : Log failures. dst = null; } } }; return(del); }
public void CanSuccessfullyTrimSpaces() { string dataPath = GetDataPath("TrimData.csv"); var loader = new Data.TextLoader(dataPath).CreateFrom <QuoteInput>(useHeader: true, separator: ',', allowQuotedStrings: false, supportSparse: false, trimWhitespace: true); using (var environment = new TlcEnvironment()) { Experiment experiment = environment.CreateExperiment(); ILearningPipelineDataStep output = loader.ApplyStep(null, experiment) as ILearningPipelineDataStep; experiment.Compile(); loader.SetInput(environment, experiment); experiment.Run(); IDataView data = experiment.GetOutput(output.Data); Assert.NotNull(data); using (var cursor = data.GetRowCursor((a => true))) { var IDGetter = cursor.GetGetter <float>(0); var TextGetter = cursor.GetGetter <DvText>(1); Assert.True(cursor.MoveNext()); float ID = 0; IDGetter(ref ID); Assert.Equal(1, ID); DvText Text = new DvText(); TextGetter(ref Text); Assert.Equal("There is a space at the end", Text.ToString()); Assert.True(cursor.MoveNext()); ID = 0; IDGetter(ref ID); Assert.Equal(2, ID); Text = new DvText(); TextGetter(ref Text); Assert.Equal("There is no space at the end", Text.ToString()); Assert.False(cursor.MoveNext()); } } }
public static NumericColumn Operation(NumericColumn c1, DvText value) { switch (c1.Kind) { case DataKind.TX: { DvText[] a; DataColumn <DvText> res; Operation(c1, out a, out res); for (int i = 0; i < res.Length; ++i) { res.Set(i, new DvText(a[i].ToString() + value.ToString())); } return(new NumericColumn(res)); } default: throw new DataTypeError(string.Format("{0} not implemented for column {1}.", OperationName, c1.Kind)); } }
private bool?IsLookLikeHeader(DvText value) { var v = value.ToString(); if (v.Length > 100) { return(false); } var headerCandidates = new[] { "^Label", "^Feature", "^Market", "^m_", "^Weight" }; foreach (var candidate in headerCandidates) { if (Regex.IsMatch(v, candidate, RegexOptions.IgnoreCase)) { return(true); } } return(null); }
public static PipelineResultRow[] ExtractResults(IHostEnvironment env, IDataView data, string graphColName, string metricColName, string idColName) { var results = new List <PipelineResultRow>(); var schema = data.Schema; if (!schema.TryGetColumnIndex(graphColName, out var graphCol)) { throw env.ExceptNotSupp($"Column name {graphColName} not found"); } if (!schema.TryGetColumnIndex(metricColName, out var metricCol)) { throw env.ExceptNotSupp($"Column name {metricColName} not found"); } if (!schema.TryGetColumnIndex(idColName, out var pipelineIdCol)) { throw env.ExceptNotSupp($"Column name {idColName} not found"); } using (var cursor = data.GetRowCursor(col => true)) { while (cursor.MoveNext()) { var getter1 = cursor.GetGetter <double>(metricCol); double metricValue = 0; getter1(ref metricValue); var getter2 = cursor.GetGetter <DvText>(graphCol); DvText graphJson = new DvText(); getter2(ref graphJson); var getter3 = cursor.GetGetter <DvText>(pipelineIdCol); DvText pipelineId = new DvText(); getter3(ref pipelineId); results.Add(new PipelineResultRow(graphJson.ToString(), metricValue, pipelineId.ToString())); } } return(results.ToArray()); }
public void TestSaveImages() { using (var env = new ConsoleEnvironment()) { var dataFile = GetDataPath("images/images.tsv"); var imageFolder = Path.GetDirectoryName(dataFile); var data = env.CreateLoader("Text{col=ImagePath:TX:0 col=Name:TX:1}", new MultiFileSource(dataFile)); var images = ImageLoaderTransform.Create(env, new ImageLoaderTransform.Arguments() { Column = new ImageLoaderTransform.Column[1] { new ImageLoaderTransform.Column() { Source = "ImagePath", Name = "ImageReal" } }, ImageFolder = imageFolder }, data); IDataView cropped = ImageResizerTransform.Create(env, new ImageResizerTransform.Arguments() { Column = new ImageResizerTransform.Column[1] { new ImageResizerTransform.Column() { Name = "ImageCropped", Source = "ImageReal", ImageHeight = 100, ImageWidth = 100, Resizing = ImageResizerTransform.ResizingKind.IsoPad } } }, images); cropped.Schema.TryGetColumnIndex("ImagePath", out int pathColumn); cropped.Schema.TryGetColumnIndex("ImageCropped", out int cropBitmapColumn); using (var cursor = cropped.GetRowCursor((x) => true)) { var pathGetter = cursor.GetGetter <DvText>(pathColumn); DvText path = default; var bitmapCropGetter = cursor.GetGetter <Bitmap>(cropBitmapColumn); Bitmap bitmap = default; while (cursor.MoveNext()) { pathGetter(ref path); bitmapCropGetter(ref bitmap); Assert.NotNull(bitmap); var fileToSave = GetOutputPath(Path.GetFileNameWithoutExtension(path.ToString()) + ".cropped.jpg"); bitmap.Save(fileToSave, System.Drawing.Imaging.ImageFormat.Jpeg); } } } Done(); }
public static Type InferPredictorCategoryType(IDataView data, PurposeInference.Column[] columns) { List <PurposeInference.Column> labels = columns.Where(col => col.Purpose == ColumnPurpose.Label).ToList(); if (labels.Count == 0) { return(typeof(SignatureClusteringTrainer)); } if (labels.Count > 1) { return(typeof(SignatureMultiOutputRegressorTrainer)); } PurposeInference.Column label = labels.First(); HashSet <string> uniqueLabelValues = new HashSet <string>(); data = data.Take(1000); using (var cursor = data.GetRowCursor(index => index == label.ColumnIndex)) { ValueGetter <DvText> getter = DataViewUtils.PopulateGetterArray(cursor, new List <int> { label.ColumnIndex })[0]; while (cursor.MoveNext()) { var currentLabel = new DvText(); getter(ref currentLabel); string currentLabelString = currentLabel.ToString(); if (!String.IsNullOrEmpty(currentLabelString) && !uniqueLabelValues.Contains(currentLabelString)) { uniqueLabelValues.Add(currentLabelString); } } } if (uniqueLabelValues.Count == 1) { return(typeof(SignatureAnomalyDetectorTrainer)); } if (uniqueLabelValues.Count == 2) { return(typeof(SignatureBinaryClassifierTrainer)); } if (uniqueLabelValues.Count > 2) { if ((label.ItemKind == DataKind.R4) && uniqueLabelValues.Any(val => { float fVal; return(float.TryParse(val, out fVal) && (fVal > 50 || fVal < 0 || val.Contains('.'))); })) { return(typeof(SignatureRegressorTrainer)); } if (label.ItemKind == DataKind.R4 || label.ItemKind == DataKind.TX || data.Schema.GetColumnType(label.ColumnIndex).IsKey) { if (columns.Any(col => col.Purpose == ColumnPurpose.Group)) { return(typeof(SignatureRankerTrainer)); } else { return(typeof(SignatureMultiClassClassifierTrainer)); } } } return(null); }
private static ByteString StringToByteString(DvText str) => ByteString.CopyFrom(Encoding.UTF8.GetBytes(str.ToString()));
public static PipelineResultRow[] ExtractResults(IHostEnvironment env, IDataView data, string graphColName, string metricColName, string idColName, string trainingMetricColName, string firstInputColName, string predictorModelColName) { var results = new List <PipelineResultRow>(); var schema = data.Schema; if (!schema.TryGetColumnIndex(graphColName, out var graphCol)) { throw env.ExceptParam(nameof(graphColName), $"Column name {graphColName} not found"); } if (!schema.TryGetColumnIndex(metricColName, out var metricCol)) { throw env.ExceptParam(nameof(metricColName), $"Column name {metricColName} not found"); } if (!schema.TryGetColumnIndex(trainingMetricColName, out var trainingMetricCol)) { throw env.ExceptParam(nameof(trainingMetricColName), $"Column name {trainingMetricColName} not found"); } if (!schema.TryGetColumnIndex(idColName, out var pipelineIdCol)) { throw env.ExceptParam(nameof(idColName), $"Column name {idColName} not found"); } if (!schema.TryGetColumnIndex(firstInputColName, out var firstInputCol)) { throw env.ExceptParam(nameof(firstInputColName), $"Column name {firstInputColName} not found"); } if (!schema.TryGetColumnIndex(predictorModelColName, out var predictorModelCol)) { throw env.ExceptParam(nameof(predictorModelColName), $"Column name {predictorModelColName} not found"); } using (var cursor = data.GetRowCursor(col => true)) { var getter1 = cursor.GetGetter <double>(metricCol); var getter2 = cursor.GetGetter <DvText>(graphCol); var getter3 = cursor.GetGetter <DvText>(pipelineIdCol); var getter4 = cursor.GetGetter <double>(trainingMetricCol); var getter5 = cursor.GetGetter <DvText>(firstInputCol); var getter6 = cursor.GetGetter <DvText>(predictorModelCol); double metricValue = 0; double trainingMetricValue = 0; DvText graphJson = new DvText(); DvText pipelineId = new DvText(); DvText firstInput = new DvText(); DvText predictorModel = new DvText(); while (cursor.MoveNext()) { getter1(ref metricValue); getter2(ref graphJson); getter3(ref pipelineId); getter4(ref trainingMetricValue); getter5(ref firstInput); getter6(ref predictorModel); results.Add(new PipelineResultRow(graphJson.ToString(), metricValue, pipelineId.ToString(), trainingMetricValue, firstInput.ToString(), predictorModel.ToString())); } } return(results.ToArray()); }
public void CanSuccessfullyEnumerated() { var collection = CollectionDataSource.Create(new List <Input>() { new Input { Number1 = 1, String1 = "1" }, new Input { Number1 = 2, String1 = "2" }, new Input { Number1 = 3, String1 = "3" } }); using (var environment = new TlcEnvironment()) { Experiment experiment = environment.CreateExperiment(); ILearningPipelineDataStep output = collection.ApplyStep(null, experiment) as ILearningPipelineDataStep; experiment.Compile(); collection.SetInput(environment, experiment); experiment.Run(); IDataView data = experiment.GetOutput(output.Data); Assert.NotNull(data); using (var cursor = data.GetRowCursor((a => true))) { var IDGetter = cursor.GetGetter <float>(0); var TextGetter = cursor.GetGetter <DvText>(1); Assert.True(cursor.MoveNext()); float ID = 0; IDGetter(ref ID); Assert.Equal(1, ID); DvText Text = new DvText(); TextGetter(ref Text); Assert.Equal("1", Text.ToString()); Assert.True(cursor.MoveNext()); ID = 0; IDGetter(ref ID); Assert.Equal(2, ID); Text = new DvText(); TextGetter(ref Text); Assert.Equal("2", Text.ToString()); Assert.True(cursor.MoveNext()); ID = 0; IDGetter(ref ID); Assert.Equal(3, ID); Text = new DvText(); TextGetter(ref Text); Assert.Equal("3", Text.ToString()); Assert.False(cursor.MoveNext()); } } }