C# (CSharp) TextFileSample 예제들

프로그래밍 언어: C# (CSharp)

클래스/타입: TextFileSample

hotexamples.com에서의 예제들: 4

C# (CSharp) TextFileSample - 4개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 C# (CSharp)의 TextFileSample에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

CreateFromFullFile(3)

CreateFromFullStream(1)

예제 #1

파일 보기

        public void CanParseLargeRandomStream()
        {
            using (var stream = new MemoryStream())
            {
                const int numRows = 100000;
                const int rowSize = 100;

                var eol = Encoding.UTF8.GetBytes("\r\n");

                for (var i = 0; i < numRows; i++)
                {
                    var row = new byte[rowSize];
                    AutoMlUtils.Random.Value.NextBytes(row);

                    // ensure byte array has no 0s, so text file sampler doesn't
                    // think file is encoded with UTF-16 or UTF-32 without a BOM
                    for (var k = 0; k < row.Length; k++)
                    {
                        if (row[k] == 0)
                        {
                            row[k] = 1;
                        }
                    }
                    stream.Write(row, 0, rowSize);
                    stream.Write(eol, 0, eol.Length);
                }

                stream.Seek(0, SeekOrigin.Begin);

                var sample = TextFileSample.CreateFromFullStream(stream);
                Assert.NotNull(sample);
                Assert.True(sample.FullFileSize > 0);
            }
        }

예제 #2

파일 보기

        public void DatasetInferenceTest()
        {
            var datasets = new[]
            {
                GetDataPath(@"..\UCI\adult.train"),
                GetDataPath(@"..\UCI\adult.test"),
                GetDataPath(@"..\UnitTest\breast-cancer.txt"),
            };

            IHostEnvironment env = new MLContext();
            var h = env.Register("InferDatasetFeatures", seed: 0, verbose: false);

            using (var ch = h.Start("InferDatasetFeatures"))
            {
                for (int i = 0; i < datasets.Length; i++)
                {
                    var sample      = TextFileSample.CreateFromFullFile(h, datasets[i]);
                    var splitResult = TextFileContents.TrySplitColumns(h, sample, TextFileContents.DefaultSeparators);
                    if (!splitResult.IsSuccess)
                    {
                        throw ch.ExceptDecode("Couldn't detect separator.");
                    }

                    var typeInfResult = ColumnTypeInference.InferTextFileColumnTypes(Env, sample,
                                                                                     new ColumnTypeInference.Arguments
                    {
                        Separator   = splitResult.Separator,
                        AllowSparse = splitResult.AllowSparse,
                        AllowQuote  = splitResult.AllowQuote,
                        ColumnCount = splitResult.ColumnCount
                    });

                    if (!typeInfResult.IsSuccess)
                    {
                        return;
                    }

                    ColumnGroupingInference.GroupingColumn[] columns = null;
                    bool hasHeader = false;
                    columns = InferenceUtils.InferColumnPurposes(ch, h, sample, splitResult, out hasHeader);
                    Guid id          = new Guid("60C77F4E-DB62-4351-8311-9B392A12968E");
                    var  commandArgs = new DatasetFeatureInference.Arguments(typeInfResult.Data,
                                                                             columns.Select(
                                                                                 col =>
                                                                                 new DatasetFeatureInference.Column(col.SuggestedName, col.Purpose, col.ItemKind,
                                                                                                                    col.ColumnRangeSelector)).ToArray(), sample.FullFileSize, sample.ApproximateRowCount,
                                                                             false, id, true);

                    string jsonString = DatasetFeatureInference.InferDatasetFeatures(env, commandArgs);
                    var    outFile    = string.Format("dataset-inference-result-{0:00}.txt", i);
                    string dataPath   = GetOutputPath(@"..\Common\Inference", outFile);
                    using (var sw = new StreamWriter(File.Create(dataPath)))
                        sw.WriteLine(jsonString);

                    CheckEquality(@"..\Common\Inference", outFile);
                }
            }
            Done();
        }

예제 #3

파일 보기

        public void TrySplitColumns_should_split_on_dataset_with_newline_between_double_quotes()
        {
            var context = new MLContext();
            var dataset = Path.Combine("TestData", "DatasetWithNewlineBetweenQuotes.txt");
            var sample  = TextFileSample.CreateFromFullFile(dataset);
            var result  = TextFileContents.TrySplitColumns(context, sample, TextFileContents.DefaultSeparators);

            result.ColumnCount.Should().Be(4);
            result.Separator.Should().Be(',');
            result.IsSuccess.Should().BeTrue();
        }

예제 #4

파일 보기

파일: InferSchemaCommand.cs 프로젝트: whilelie/machinelearning

        public void RunCore(IChannel ch)
        {
            _host.AssertValue(ch);

            // Inner env is used to ignore verbose messages from the text loader.
            var envInner = _host.Register("inner host", seed: 0, verbose: false);

            ch.Info("Loading file sample into memory.");
            var sample = TextFileSample.CreateFromFullFile(envInner, _dataFile);

            ch.Info("Detecting separator and columns");
            var splitResult = TextFileContents.TrySplitColumns(envInner, sample, TextFileContents.DefaultSeparators);

            if (!splitResult.IsSuccess)
            {
                throw Contracts.ExceptDecode("Couldn't detect separator.");
            }

            ch.Info("Separator detected as '{0}', there are {1} columns.", splitResult.Separator, splitResult.ColumnCount);
            bool hasHeader;

            ColumnGroupingInference.GroupingColumn[] groupingResult = InferenceUtils.InferColumnPurposes(ch, envInner, sample, splitResult, out hasHeader);

            string json = "";

            try
            {
                json = JsonConvert.SerializeObject(groupingResult, Formatting.Indented);
            }
            catch
            {
                ch.Error("Error serializing the schema file. Check its content.");
            }

            if (!string.IsNullOrEmpty(json))
            {
                if (_outFile != null)
                {
                    using (var sw = new StreamWriter(_outFile))
                        PrintSchema(json, sw, ch);
                }
                else
                {
                    PrintSchema(json, null, ch);
                }
            }
        }