示例#1
0
        /// <summary>
        /// Load a <see cref="IDataView"/> from a text file using <see cref="TextLoader"/>.
        /// Note that <see cref="IDataView"/>'s are lazy, so no actual loading happens here, just schema validation.
        /// </summary>
        /// <param name="catalog">The <see cref="DataOperationsCatalog"/> catalog.</param>
        /// <param name="path">Specifies a file or path of files from which to load.</param>
        /// <param name="options">Defines the settings of the load operation. No need to specify a Columns field,
        /// as columns will be infered by this method.</param>
        /// <returns>The data view.</returns>
        public static IDataView LoadFromTextFile <TInput>(this DataOperationsCatalog catalog, string path,
                                                          TextLoader.Options options)
        {
            CheckValidPathContents(path);

            return(TextLoader.CreateTextLoader <TInput>(CatalogUtils.GetEnvironment(catalog), options)
                   .Load(new MultiFileSource(path)));
        }
示例#2
0
 /// <summary>
 /// Create a text loader <see cref="TextLoader"/> by inferencing the dataset schema from a data model type.
 /// </summary>
 /// <typeparam name="TInput">Defines the schema of the data to be loaded. Use public fields or properties
 /// decorated with <see cref="LoadColumnAttribute"/> (and possibly other attributes) to specify the column
 /// names and their data types in the schema of the loaded data.</typeparam>
 /// <param name="catalog">The <see cref="DataOperationsCatalog"/> catalog.</param>
 /// <param name="separatorChar">Column separator character. Default is '\t'</param>
 /// <param name="hasHeader">Whether the file has a header with feature names. When a <see paramref="dataSample"/> is provided, <see langword="true"/>
 /// indicates that the first line in the <see paramref="dataSample"/> will be used for feature names, and that when <see cref="TextLoader.Load(IMultiStreamSource)"/>
 /// is called, the first line will be skipped. When there is no <see paramref="dataSample"/> provided, <see langword="true"/> just indicates that the loader should
 /// skip the first line when <see cref="TextLoader.Load(IMultiStreamSource)"/> is called, but columns will not have slot names annotations. This is
 /// because the output schema is made when the loader is created, and not when <see cref="TextLoader.Load(IMultiStreamSource)"/> is called.</param>
 /// <param name="dataSample">The optional location of a data sample. The sample can be used to infer slot name annotations if present.</param>
 /// <param name="allowQuoting">Whether the input may include double-quoted values. This parameter is used to distinguish separator characters
 /// in an input value from actual separators. When <see langword="true"/>, separators within double quotes are treated as part of the
 /// input value. When <see langword="false"/>, all separators, even those whitin quotes, are treated as delimiting a new column.</param>
 /// <param name="trimWhitespace">Remove trailing whitespace from lines.</param>
 /// <param name="allowSparse">Whether the input may include sparse representations. For example, a row containing
 /// "5 2:6 4:3" means that there are 5 columns, and the only non-zero are columns 2 and 4, which have values 6 and 3,
 /// respectively. Column indices are zero-based, so columns 2 and 4 represent the 3rd and 5th columns.
 /// A column may also have dense values followed by sparse values represented in this fashion. For example,
 /// a row containing "1 2 5 2:6 4:3" represents two dense columns with values 1 and 2, followed by 5 sparsely represented
 /// columns with values 0, 0, 6, 0, and 3. The indices of the sparse columns start from 0, even though 0 represents the third column.</param>
 public static TextLoader CreateTextLoader <TInput>(this DataOperationsCatalog catalog,
                                                    char separatorChar            = TextLoader.Defaults.Separator,
                                                    bool hasHeader                = TextLoader.Defaults.HasHeader,
                                                    IMultiStreamSource dataSample = null,
                                                    bool allowQuoting             = TextLoader.Defaults.AllowQuoting,
                                                    bool trimWhitespace           = TextLoader.Defaults.TrimWhitespace,
                                                    bool allowSparse              = TextLoader.Defaults.AllowSparse)
 => TextLoader.CreateTextLoader <TInput>(CatalogUtils.GetEnvironment(catalog), hasHeader, separatorChar, allowQuoting,
                                         allowSparse, trimWhitespace, dataSample: dataSample);
        /// <summary>
        /// Load a <see cref="IDataView"/> from a text file using <see cref="TextLoader"/>.
        /// Note that <see cref="IDataView"/>'s are lazy, so no actual loading happens here, just schema validation.
        /// </summary>
        /// <param name="catalog">The <see cref="DataOperationsCatalog"/> catalog.</param>
        /// <param name="path">Specifies a file from which to load.</param>
        /// <param name="options">Defines the settings of the load operation. No need to specify a Columns field,
        /// as columns will be infered by this method.</param>
        /// <returns>The data view.</returns>
        public static IDataView LoadFromTextFile <TInput>(this DataOperationsCatalog catalog, string path,
                                                          TextLoader.Options options)
        {
            Contracts.CheckNonEmpty(path, nameof(path));
            if (!File.Exists(path))
            {
                throw Contracts.ExceptParam(nameof(path), "File does not exist at path: {0}", path);
            }

            return(TextLoader.CreateTextLoader <TInput>(CatalogUtils.GetEnvironment(catalog), options)
                   .Load(new MultiFileSource(path)));
        }
示例#4
0
        /// <summary>
        /// Load a <see cref="IDataView"/> from a text file using <see cref="TextLoader"/>.
        /// Note that <see cref="IDataView"/>'s are lazy, so no actual loading happens here, just schema validation.
        /// </summary>
        /// <param name="catalog">The <see cref="DataOperationsCatalog"/> catalog.</param>
        /// <param name="path">The path to the file(s).</param>
        /// <param name="separatorChar">Column separator character. Default is '\t'</param>
        /// <param name="hasHeader">Whether the file has a header. When <see langword="true"/>, the loader will skip the first line when
        /// <see cref="TextLoader.Load(IMultiStreamSource)"/> is called.</param>
        /// <param name="allowQuoting">Whether the input may include double-quoted values. This parameter is used to distinguish separator characters
        /// in an input value from actual separators. When <see langword="true"/>, separators within double quotes are treated as part of the
        /// input value. When <see langword="false"/>, all separators, even those whitin quotes, are treated as delimiting a new column.
        /// It is also used to distinguish empty values from missing values. When <see langword="true"/>, missing value are denoted by consecutive
        /// separators and empty values by \"\". When <see langword="false"/>, empty values are denoted by consecutive separators and missing
        /// values by the default missing value for each type documented in <see cref="DataKind"/>.</param>
        /// <param name="trimWhitespace">Remove trailing whitespace from lines.</param>
        /// <param name="allowSparse">Whether the input may include sparse representations. For example, a row containing
        /// "5 2:6 4:3" means that there are 5 columns, and the only non-zero are columns 2 and 4, which have values 6 and 3,
        /// respectively. Column indices are zero-based, so columns 2 and 4 represent the 3rd and 5th columns.
        /// A column may also have dense values followed by sparse values represented in this fashion. For example,
        /// a row containing "1 2 5 2:6 4:3" represents two dense columns with values 1 and 2, followed by 5 sparsely represented
        /// columns with values 0, 0, 6, 0, and 3. The indices of the sparse columns start from 0, even though 0 represents the third column.</param>
        /// <returns>The data view.</returns>
        public static IDataView LoadFromTextFile <TInput>(this DataOperationsCatalog catalog,
                                                          string path,
                                                          char separatorChar  = TextLoader.Defaults.Separator,
                                                          bool hasHeader      = TextLoader.Defaults.HasHeader,
                                                          bool allowQuoting   = TextLoader.Defaults.AllowQuoting,
                                                          bool trimWhitespace = TextLoader.Defaults.TrimWhitespace,
                                                          bool allowSparse    = TextLoader.Defaults.AllowSparse)
        {
            CheckValidPathContents(path);

            // REVIEW: it is almost always a mistake to have a 'trainable' text loader here.
            // Therefore, we are going to disallow data sample.
            return(TextLoader.CreateTextLoader <TInput>(CatalogUtils.GetEnvironment(catalog), hasHeader, separatorChar,
                                                        allowQuoting, allowSparse, trimWhitespace).Load(new MultiFileSource(path)));
        }
示例#5
0
        /// <summary>
        /// Load a <see cref="IDataView"/> from a text file using <see cref="TextLoader"/>.
        /// Note that <see cref="IDataView"/>'s are lazy, so no actual loading happens here, just schema validation.
        /// </summary>
        /// <param name="catalog">The <see cref="DataOperationsCatalog"/> catalog.</param>
        /// <param name="path">The path to the file.</param>
        /// <param name="separatorChar">Column separator character. Default is '\t'</param>
        /// <param name="hasHeader">Whether the file has a header with feature names. Note: If a TextLoader is created with hasHeader = true but without a
        /// dataSample, then vector columns made by TextLoader will not contain slot name annotations (slots being the elements of the given vector column),
        /// because the output schema is made when the TextLoader is made, and not when <see cref="TextLoader.Load(IMultiStreamSource)"/> is called.
        /// In addition, the case where dataSample = null and hasHeader = true indicates to the loader that when it is given a file when Load()
        /// is called, it needs to skip the first line.</param>
        /// <param name="allowQuoting">Whether the input may include quoted values,
        /// which can contain separator characters, colons,
        /// and distinguish empty values from missing values. When true, consecutive separators
        /// denote a missing value and an empty value is denoted by \"\".
        /// When false, consecutive separators denote an empty value.</param>
        /// <param name="trimWhitespace">Remove trailing whitespace from lines</param>
        /// <param name="allowSparse">Whether the input may include sparse representations for example,
        /// if one of the row contains "5 2:6 4:3" that's mean there are 5 columns all zero
        /// except for 3rd and 5th columns which have values 6 and 3</param>
        /// <returns>The data view.</returns>
        public static IDataView LoadFromTextFile <TInput>(this DataOperationsCatalog catalog,
                                                          string path,
                                                          char separatorChar  = TextLoader.Defaults.Separator,
                                                          bool hasHeader      = TextLoader.Defaults.HasHeader,
                                                          bool allowQuoting   = TextLoader.Defaults.AllowQuoting,
                                                          bool trimWhitespace = TextLoader.Defaults.TrimWhitespace,
                                                          bool allowSparse    = TextLoader.Defaults.AllowSparse)
        {
            Contracts.CheckNonEmpty(path, nameof(path));
            if (!File.Exists(path))
            {
                throw Contracts.ExceptParam(nameof(path), "File does not exist at path: {0}", path);
            }

            // REVIEW: it is almost always a mistake to have a 'trainable' text loader here.
            // Therefore, we are going to disallow data sample.
            return(TextLoader.CreateTextLoader <TInput>(CatalogUtils.GetEnvironment(catalog), hasHeader, separatorChar,
                                                        allowQuoting, allowSparse, trimWhitespace).Load(new MultiFileSource(path)));
        }
示例#6
0
 /// <summary>
 /// Create a text loader <see cref="TextLoader"/> by inferencing the dataset schema from a data model type.
 /// </summary>
 /// <param name="catalog">The <see cref="DataOperationsCatalog"/> catalog.</param>
 /// <param name="options">Defines the settings of the load operation. Defines the settings of the load operation. No need to specify a Columns field,
 /// as columns will be infered by this method.</param>
 /// <param name="dataSample">The optional location of a data sample. The sample can be used to infer information
 /// about the columns, such as slot names.</param>
 public static TextLoader CreateTextLoader <TInput>(this DataOperationsCatalog catalog,
                                                    TextLoader.Options options,
                                                    IMultiStreamSource dataSample = null)
 => TextLoader.CreateTextLoader <TInput>(CatalogUtils.GetEnvironment(catalog), options, dataSample);