/// <summary>
        /// Read a data view from a text file using <see cref="TextLoader"/>.
        /// </summary>
        /// <param name="catalog">The <see cref="DataOperationsCatalog"/> catalog.</param>
        /// <param name="path">Specifies a file from which to read.</param>
        /// <param name="args">Defines the settings of the load operation.</param>
        public static IDataView ReadFromTextFile(this DataOperationsCatalog catalog, string path, TextLoader.Arguments args = null)
        {
            Contracts.CheckNonEmpty(path, nameof(path));

            var env    = catalog.GetEnvironment();
            var source = new MultiFileSource(path);

            return(new TextLoader(env, args, source).Read(source));
        }
Пример #2
0
        /// <summary>
        /// Load a <see cref="IDataView"/> from a text file using <see cref="TextLoader"/>.
        /// Note that <see cref="IDataView"/>'s are lazy, so no actual loading happens here, just schema validation.
        /// </summary>
        /// <param name="catalog">The <see cref="DataOperationsCatalog"/> catalog.</param>
        /// <param name="path">Specifies a file or path of files from which to load.</param>
        /// <param name="options">Defines the settings of the load operation.</param>
        /// <example>
        /// <format type="text/markdown">
        /// <![CDATA[
        /// [!code-csharp[LoadFromTextFile](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/SaveAndLoadFromText.cs)]
        /// ]]>
        /// </format>
        /// </example>
        public static IDataView LoadFromTextFile(this DataOperationsCatalog catalog, string path,
                                                 TextLoader.Options options = null)
        {
            CheckValidPathContents(path);

            var env    = catalog.GetEnvironment();
            var source = new MultiFileSource(path);

            return(new TextLoader(env, options, dataSample: source).Load(source));
        }
Пример #3
0
        /// <summary>
        /// Read a data view from a binary file using <see cref="BinaryLoader"/>.
        /// </summary>
        /// <param name="catalog">The catalog.</param>
        /// <param name="path">The path to the file to read from.</param>
        public static IDataView ReadFromBinary(this DataOperationsCatalog catalog, string path)
        {
            Contracts.CheckNonEmpty(path, nameof(path));

            var env = catalog.GetEnvironment();

            var reader = new BinaryLoader(env, new BinaryLoader.Arguments(), path);

            return(reader);
        }
Пример #4
0
        /// <summary>
        /// Read a data view from an <see cref="IMultiStreamSource"/> on a binary file using <see cref="BinaryLoader"/>.
        /// </summary>
        /// <param name="catalog">The catalog.</param>
        /// <param name="fileSource">The file source to read from. This can be a <see cref="MultiFileSource"/>, for example.</param>
        public static IDataView ReadFromBinary(this DataOperationsCatalog catalog, IMultiStreamSource fileSource)
        {
            Contracts.CheckValue(fileSource, nameof(fileSource));

            var env = catalog.GetEnvironment();

            var reader = new BinaryLoader(env, new BinaryLoader.Arguments(), fileSource);

            return(reader);
        }
Пример #5
0
        /// <summary>
        /// Save the data view into a binary stream.
        /// </summary>
        /// <param name="catalog">The catalog.</param>
        /// <param name="data">The data view to save.</param>
        /// <param name="stream">The stream to write to.</param>
        /// <param name="keepHidden">Whether to keep hidden columns in the dataset.</param>
        public static void SaveAsBinary(this DataOperationsCatalog catalog, IDataView data, Stream stream,
                                        bool keepHidden = false)
        {
            Contracts.CheckValue(catalog, nameof(catalog));
            Contracts.CheckValue(data, nameof(data));
            Contracts.CheckValue(stream, nameof(stream));

            var env   = catalog.GetEnvironment();
            var saver = new BinarySaver(env, new BinarySaver.Arguments());

            using (var ch = env.Start("Saving data"))
                DataSaverUtils.SaveDataView(ch, saver, data, stream, keepHidden);
        }
Пример #6
0
        /// <summary>
        /// Load a <see cref="IDataView"/> from a text file using <see cref="TextLoader"/>.
        /// Note that <see cref="IDataView"/>'s are lazy, so no actual loading happens here, just schema validation.
        /// </summary>
        /// <param name="catalog">The <see cref="DataOperationsCatalog"/> catalog.</param>
        /// <param name="path">Specifies a file from which to load.</param>
        /// <param name="options">Defines the settings of the load operation.</param>
        /// <example>
        /// <format type="text/markdown">
        /// <![CDATA[
        /// [!code-csharp[LoadFromTextFile](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/SaveAndLoadFromText.cs)]
        /// ]]>
        /// </format>
        /// </example>
        public static IDataView LoadFromTextFile(this DataOperationsCatalog catalog, string path,
                                                 TextLoader.Options options = null)
        {
            Contracts.CheckNonEmpty(path, nameof(path));
            if (!File.Exists(path))
            {
                throw Contracts.ExceptParam(nameof(path), "File does not exist at path: {0}", path);
            }

            var env    = catalog.GetEnvironment();
            var source = new MultiFileSource(path);

            return(new TextLoader(env, options, dataSample: source).Load(source));
        }
Пример #7
0
        /// <summary>
        /// Load a <see cref="IDataView"/> from a binary file.
        /// Note that <see cref="IDataView"/>'s are lazy, so no actual loading happens here, just schema validation.
        /// </summary>
        /// <param name="catalog">The catalog.</param>
        /// <param name="path">The path to the file to load from.</param>
        /// <example>
        /// <format type="text/markdown">
        /// <![CDATA[
        /// [!code-csharp[LoadFromBinary](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/SaveAndLoadFromBinary.cs)]
        /// ]]>
        /// </format>
        /// </example>
        public static IDataView LoadFromBinary(this DataOperationsCatalog catalog, string path)
        {
            Contracts.CheckNonEmpty(path, nameof(path));
            if (!File.Exists(path))
            {
                throw Contracts.ExceptParam(nameof(path), "File does not exist at path: {0}", path);
            }

            var env = catalog.GetEnvironment();

            var loader = new BinaryLoader(env, new BinaryLoader.Arguments(), path);

            return(loader);
        }
        /// <summary>
        /// Read a data view from a text file using <see cref="TextLoader"/>.
        /// </summary>
        /// <param name="catalog">The <see cref="DataOperationsCatalog"/> catalog.</param>
        /// <param name="columns">The columns of the schema.</param>
        /// <param name="hasHeader">Whether the file has a header.</param>
        /// <param name="separatorChar">The character used as separator between data points in a row. By default the tab character is used as separator.</param>
        /// <param name="path">The path to the file.</param>
        /// <returns>The data view.</returns>
        public static IDataView ReadFromTextFile(this DataOperationsCatalog catalog,
                                                 string path,
                                                 TextLoader.Column[] columns,
                                                 bool hasHeader     = TextLoader.DefaultArguments.HasHeader,
                                                 char separatorChar = TextLoader.DefaultArguments.Separator)
        {
            Contracts.CheckNonEmpty(path, nameof(path));

            var env = catalog.GetEnvironment();

            // REVIEW: it is almost always a mistake to have a 'trainable' text loader here.
            // Therefore, we are going to disallow data sample.
            var reader = new TextLoader(env, columns, hasHeader, separatorChar, dataSample: null);

            return(reader.Read(new MultiFileSource(path)));
        }
        /// <summary>
        /// Save the data view as text.
        /// </summary>
        /// <param name="catalog">The <see cref="DataOperationsCatalog"/> catalog.</param>
        /// <param name="data">The data view to save.</param>
        /// <param name="stream">The stream to write to.</param>
        /// <param name="separatorChar">The column separator.</param>
        /// <param name="headerRow">Whether to write the header row.</param>
        /// <param name="schema">Whether to write the header comment with the schema.</param>
        /// <param name="keepHidden">Whether to keep hidden columns in the dataset.</param>
        public static void SaveAsText(this DataOperationsCatalog catalog,
                                      IDataView data,
                                      Stream stream,
                                      char separatorChar = TextLoader.DefaultArguments.Separator,
                                      bool headerRow     = TextLoader.DefaultArguments.HasHeader,
                                      bool schema        = true,
                                      bool keepHidden    = false)
        {
            Contracts.CheckValue(catalog, nameof(catalog));
            Contracts.CheckValue(data, nameof(data));
            Contracts.CheckValue(stream, nameof(stream));

            var env   = catalog.GetEnvironment();
            var saver = new TextSaver(env, new TextSaver.Arguments {
                Separator = separatorChar.ToString(), OutputHeader = headerRow, OutputSchema = schema
            });

            using (var ch = env.Start("Saving data"))
                DataSaverUtils.SaveDataView(ch, saver, data, stream, keepHidden);
        }
 /// <summary>
 /// Drop rows where a specified predicate returns true. This filter allows to maintain a per-cursor state.
 /// </summary>
 /// <typeparam name="TSrc">The class defining which columns to take from the incoming data.</typeparam>
 /// <typeparam name="TState">The type that describes per-cursor state.</typeparam>
 /// <param name="catalog">The data operations catalog.</param>
 /// <param name="input">The input data.</param>
 /// <param name="filterPredicate">A predicate, that takes an input of type <typeparamref name="TSrc"/> and a state object of type
 /// <typeparamref name="TState"/>, and returns true if the row should be filtered (dropped) and false otherwise.</param>
 /// <param name="stateInitAction">The action to initialize the state object, that is called once before the cursor is initialized.</param>
 public static IDataView FilterByStatefulCustomPredicate <TSrc, TState>(this DataOperationsCatalog catalog, IDataView input, Func <TSrc, TState, bool> filterPredicate,
                                                                        Action <TState> stateInitAction)
     where TSrc : class, new()
     where TState : class, new()
 => new StatefulCustomMappingFilter <TSrc, TState>(catalog.GetEnvironment(), input, filterPredicate, stateInitAction);
 /// <summary>
 /// Drop rows where a specified predicate returns true.
 /// </summary>
 /// <typeparam name="TSrc">The class defining which columns to take from the incoming data.</typeparam>
 /// <param name="catalog">The data operations catalog.</param>
 /// <param name="input">The input data.</param>
 /// <param name="filterPredicate">A predicate, that takes an input of type <typeparamref name="TSrc"/> and returns true if the row should be filtered (dropped) and false otherwise.</param>
 public static IDataView FilterByCustomPredicate <TSrc>(this DataOperationsCatalog catalog, IDataView input, Func <TSrc, bool> filterPredicate)
     where TSrc : class, new()
 => new CustomMappingFilter <TSrc>(catalog.GetEnvironment(), input, filterPredicate);