public void DatasetInferenceTest() { var datasets = new[] { GetDataPath(@"..\UCI\adult.train"), GetDataPath(@"..\UCI\adult.test"), GetDataPath(@"..\UnitTest\breast-cancer.txt"), }; IHostEnvironment env = new MLContext(); var h = env.Register("InferDatasetFeatures", seed: 0, verbose: false); using (var ch = h.Start("InferDatasetFeatures")) { for (int i = 0; i < datasets.Length; i++) { var sample = TextFileSample.CreateFromFullFile(h, datasets[i]); var splitResult = TextFileContents.TrySplitColumns(h, sample, TextFileContents.DefaultSeparators); if (!splitResult.IsSuccess) { throw ch.ExceptDecode("Couldn't detect separator."); } var typeInfResult = ColumnTypeInference.InferTextFileColumnTypes(Env, sample, new ColumnTypeInference.Arguments { Separator = splitResult.Separator, AllowSparse = splitResult.AllowSparse, AllowQuote = splitResult.AllowQuote, ColumnCount = splitResult.ColumnCount }); if (!typeInfResult.IsSuccess) { return; } ColumnGroupingInference.GroupingColumn[] columns = null; bool hasHeader = false; columns = InferenceUtils.InferColumnPurposes(ch, h, sample, splitResult, out hasHeader); Guid id = new Guid("60C77F4E-DB62-4351-8311-9B392A12968E"); var commandArgs = new DatasetFeatureInference.Arguments(typeInfResult.Data, columns.Select( col => new DatasetFeatureInference.Column(col.SuggestedName, col.Purpose, col.ItemKind, col.ColumnRangeSelector)).ToArray(), sample.FullFileSize, sample.ApproximateRowCount, false, id, true); string jsonString = DatasetFeatureInference.InferDatasetFeatures(env, commandArgs); var outFile = string.Format("dataset-inference-result-{0:00}.txt", i); string dataPath = GetOutputPath(@"..\Common\Inference", outFile); using (var sw = new StreamWriter(File.Create(dataPath))) sw.WriteLine(jsonString); CheckEquality(@"..\Common\Inference", outFile); } } Done(); }
public void TrySplitColumns_should_split_on_dataset_with_newline_between_double_quotes() { var context = new MLContext(); var dataset = Path.Combine("TestData", "DatasetWithNewlineBetweenQuotes.txt"); var sample = TextFileSample.CreateFromFullFile(dataset); var result = TextFileContents.TrySplitColumns(context, sample, TextFileContents.DefaultSeparators); result.ColumnCount.Should().Be(4); result.Separator.Should().Be(','); result.IsSuccess.Should().BeTrue(); }
public void RunCore(IChannel ch) { _host.AssertValue(ch); // Inner env is used to ignore verbose messages from the text loader. var envInner = _host.Register("inner host", seed: 0, verbose: false); ch.Info("Loading file sample into memory."); var sample = TextFileSample.CreateFromFullFile(envInner, _dataFile); ch.Info("Detecting separator and columns"); var splitResult = TextFileContents.TrySplitColumns(envInner, sample, TextFileContents.DefaultSeparators); if (!splitResult.IsSuccess) { throw Contracts.ExceptDecode("Couldn't detect separator."); } ch.Info("Separator detected as '{0}', there are {1} columns.", splitResult.Separator, splitResult.ColumnCount); bool hasHeader; ColumnGroupingInference.GroupingColumn[] groupingResult = InferenceUtils.InferColumnPurposes(ch, envInner, sample, splitResult, out hasHeader); string json = ""; try { json = JsonConvert.SerializeObject(groupingResult, Formatting.Indented); } catch { ch.Error("Error serializing the schema file. Check its content."); } if (!string.IsNullOrEmpty(json)) { if (_outFile != null) { using (var sw = new StreamWriter(_outFile)) PrintSchema(json, sw, ch); } else { PrintSchema(json, null, ch); } } }
private ITextFileLoader GetDefaultLoader(TextFileContents content, string[] injectedMarkers) { if (content == null) { throw new ArgumentNullException("content"); } if (injectedMarkers == null) { throw new ArgumentNullException("injectedMarkers"); } return(new DotLessCssCssLoader( new FixedListCssContentLoader(new[] { content }), () => injectedMarkers, "REPLACEME", ErrorBehaviourOptions.LogAndRaiseException, new NullLogger() )); }
public void StraightDownTheRoadCopyShouldSucceed() { var source = new TextFileContents( "styles.css", new DateTime(2013, 2, 6, 18, 19, 0), ".Woo{color:black}" ); var contentRepresentation = DiskCachingTextFileLoader.GetFileContentRepresentation(source, 68); TextFileContents copy; using (var reader = new StringReader(contentRepresentation)) { copy = DiskCachingTextFileLoader.GetFileContents(reader); } Assert.Equal <TextFileContents>( source, copy, new TextFileContentsComparer() ); }