Exemplo n.º 1
0
 /// <summary>
 /// Normalizes all values in the sample data bundle
 /// </summary>
 /// <param name="bundle">Sample data bundle</param>
 public void Normalize(TimeSeriesBundle bundle)
 {
     AdjustNormalizers(bundle);
     NormalizeInputVectorCollection(bundle.InputVectorCollection);
     NormalizeOutputVectorCollection(bundle.OutputVectorCollection);
     return;
 }
Exemplo n.º 2
0
 /// <summary>
 /// Creates PredictionBundle from the vector collection
 /// </summary>
 /// <param name="vectorCollection">Collection of vectors</param>
 /// <param name="normalize">Specifies whether to normalize data in the created bundle</param>
 /// <param name="bundle">Created bundle</param>
 /// <returns>The last unused vector</returns>
 public double[] CreateBundleFromVectorCollection(List <double[]> vectorCollection,
                                                  bool normalize,
                                                  out TimeSeriesBundle bundle
                                                  )
 {
     CheckStructure();
     if (vectorCollection[0].Length != _fieldNameTypeCollection.Count)
     {
         throw new ArgumentException($"Inconsistent number of fields ({vectorCollection[0].Length}) in vectorCollection and number of defined fields ({_fieldNameTypeCollection.Count}).", "vectorCollection");
     }
     //Input field indexes
     int[] inputFieldIdxs = new int[_inputFieldNameCollection.Count];
     for (int i = 0; i < _inputFieldNameCollection.Count; i++)
     {
         inputFieldIdxs[i] = _fieldNameCollection.IndexOf(_inputFieldNameCollection[i]);
     }
     //Output field indexes
     int[] outputFieldIdxs = new int[_outputFieldNameCollection.Count];
     for (int i = 0; i < _outputFieldNameCollection.Count; i++)
     {
         outputFieldIdxs[i] = _fieldNameCollection.IndexOf(_outputFieldNameCollection[i]);
     }
     double[] remainingInputVector = null;
     bundle = new TimeSeriesBundle();
     for (int row = 0; row < vectorCollection.Count; row++)
     {
         //Input vector
         double[] inputVector = new double[inputFieldIdxs.Length];
         for (int i = 0; i < inputFieldIdxs.Length; i++)
         {
             inputVector[i] = vectorCollection[row][inputFieldIdxs[i]];
         }
         if (row < vectorCollection.Count - 1)
         {
             bundle.InputVectorCollection.Add(inputVector);
         }
         else
         {
             remainingInputVector = inputVector;
         }
         //Output vector
         if (row > 0)
         {
             double[] outputVector = new double[outputFieldIdxs.Length];
             for (int i = 0; i < outputFieldIdxs.Length; i++)
             {
                 outputVector[i] = vectorCollection[row][outputFieldIdxs[i]];
             }
             bundle.OutputVectorCollection.Add(outputVector);
         }
     }
     //Normalization ?
     if (normalize)
     {
         Normalize(bundle);
         NormalizeInputVector(remainingInputVector);
     }
     return(remainingInputVector);
 }
Exemplo n.º 3
0
 /// <summary>
 /// Adjusts internal normalizers
 /// </summary>
 /// <param name="bundle">Sample data bundle</param>
 public void AdjustNormalizers(TimeSeriesBundle bundle)
 {
     ResetNormalizers();
     foreach (double[] inputVector in bundle.InputVectorCollection)
     {
         AdjustInputNormalizers(inputVector);
     }
     foreach (double[] outputVector in bundle.OutputVectorCollection)
     {
         AdjustOutputNormalizers(outputVector);
     }
     return;
 }
Exemplo n.º 4
0
 /// <summary>
 /// Naturalizes all values in the sample data bundle
 /// </summary>
 /// <param name="bundle">Sample data bundle</param>
 public void Naturalize(TimeSeriesBundle bundle)
 {
     NaturalizeInputVectorCollection(bundle.InputVectorCollection);
     NaturalizeOutputVectorCollection(bundle.OutputVectorCollection);
     return;
 }
Exemplo n.º 5
0
        /// <summary>
        /// Loads the data and prepares PredictionBundle.
        /// The first line of the csv file must be field names. These field names must
        /// match the names of the input and output fields.
        /// </summary>
        /// <param name="fileName">
        /// Data file name
        /// </param>
        /// <param name="inputFieldNameCollection">
        /// Input fields
        /// </param>
        /// <param name="outputFieldNameCollection">
        /// Output fields
        /// </param>
        /// <param name="normRange">
        /// Range of normalized values
        /// </param>
        /// <param name="normReserveRatio">
        /// Reserve held by a normalizer to cover cases where future data exceeds a known range of sample data.
        /// </param>
        /// <param name="dataStandardization">
        /// Specifies whether to apply data standardization
        /// </param>
        /// <param name="singleNormalizer">
        /// Use true if all input and output fields are about the same range of values.
        /// </param>
        /// <param name="bundleNormalizer">
        /// Returned initialized instance of BundleNormalizer.
        /// </param>
        /// <param name="remainingInputVector">
        /// Returned the last input vector unused in the bundle.
        /// </param>
        public static TimeSeriesBundle Load(string fileName,
                                            List <string> inputFieldNameCollection,
                                            List <string> outputFieldNameCollection,
                                            Interval normRange,
                                            double normReserveRatio,
                                            bool dataStandardization,
                                            bool singleNormalizer,
                                            out BundleNormalizer bundleNormalizer,
                                            out double[] remainingInputVector
                                            )
        {
            TimeSeriesBundle bundle = null;

            bundleNormalizer = new BundleNormalizer(normRange, normReserveRatio, dataStandardization, normReserveRatio, dataStandardization);
            using (StreamReader streamReader = new StreamReader(new FileStream(fileName, FileMode.Open)))
            {
                List <int>      fieldIndexes = new List <int>();
                List <double[]> allData      = new List <double[]>();
                //First row contains column names (data fields)
                string delimitedColumnNames = streamReader.ReadLine();
                //What data delimiter is used?
                char csvDelimiter = DelimitedStringValues.RecognizeDelimiter(delimitedColumnNames);
                //Split column names
                DelimitedStringValues columnNames = new DelimitedStringValues(csvDelimiter);
                columnNames.LoadFromString(delimitedColumnNames);
                //Check if the recognized data delimiter works properly
                if (columnNames.NumOfStringValues < inputFieldNameCollection.Count)
                {
                    throw new FormatException("1st row of the file doesn't contain delimited column names or the value delimiter was not properly recognized.");
                }
                //Define fields
                foreach (string name in inputFieldNameCollection)
                {
                    if (!bundleNormalizer.IsFieldDefined(name))
                    {
                        bundleNormalizer.DefineField(name, singleNormalizer ? "COMMON" : name);
                        fieldIndexes.Add(columnNames.IndexOf(name));
                    }
                    bundleNormalizer.DefineInputField(name);
                }
                foreach (string name in outputFieldNameCollection)
                {
                    if (!bundleNormalizer.IsFieldDefined(name))
                    {
                        bundleNormalizer.DefineField(name, singleNormalizer ? "COMMON" : name);
                        fieldIndexes.Add(columnNames.IndexOf(name));
                    }
                    bundleNormalizer.DefineOutputField(name);
                }
                //Finalize structure
                bundleNormalizer.FinalizeStructure();
                //Load all relevant data
                DelimitedStringValues dataRow = new DelimitedStringValues(csvDelimiter);
                while (!streamReader.EndOfStream)
                {
                    dataRow.LoadFromString(streamReader.ReadLine());
                    double[] vector = new double[fieldIndexes.Count];
                    for (int i = 0; i < fieldIndexes.Count; i++)
                    {
                        vector[i] = dataRow.GetValue(fieldIndexes[i]).ParseDouble(true, $"Can't parse double value {dataRow.GetValue(fieldIndexes[i])}.");
                    }
                    allData.Add(vector);
                }
                //Create data bundle
                remainingInputVector = bundleNormalizer.CreateBundleFromVectorCollection(allData, true, out bundle);
            }
            return(bundle);
        } //Load
Exemplo n.º 6
0
        //Static methods
        /// <summary>
        /// Loads the data and prepares TimeSeriesBundle.
        /// The first line of the csv file must be field names. These field names must
        /// match the names of the input and output fields.
        /// </summary>
        /// <param name="fileName"> Data file name </param>
        /// <param name="inputFieldNameCollection"> Input field names </param>
        /// <param name="outputFieldNameCollection"> Output field names </param>
        /// <param name="outputFieldTaskCollection">
        /// Neural task related to output field.
        /// Classification task means the output field contains binary value so data
        /// standardization and normalizer reserve are suppressed.
        /// </param>
        /// <param name="normRange"> Range of normalized values </param>
        /// <param name="normReserveRatio">
        /// Reserve held by a normalizer to cover cases where future data exceeds a known range of sample data.
        /// </param>
        /// <param name="dataStandardization"> Specifies whether to apply data standardization </param>
        /// <param name="bundleNormalizer"> Returned initialized instance of BundleNormalizer </param>
        /// <param name="remainingInputVector"> Returned the last input vector unused in the bundle </param>
        public static TimeSeriesBundle LoadFromCsv(string fileName,
                                                   List <string> inputFieldNameCollection,
                                                   List <string> outputFieldNameCollection,
                                                   List <CommonEnums.TaskType> outputFieldTaskCollection,
                                                   Interval normRange,
                                                   double normReserveRatio,
                                                   bool dataStandardization,
                                                   out BundleNormalizer bundleNormalizer,
                                                   out double[] remainingInputVector
                                                   )
        {
            TimeSeriesBundle bundle = null;

            remainingInputVector = null;
            bundleNormalizer     = new BundleNormalizer(normRange);
            using (StreamReader streamReader = new StreamReader(new FileStream(fileName, FileMode.Open)))
            {
                List <int> inputFieldIndexes  = new List <int>();
                List <int> outputFieldIndexes = new List <int>();
                //First row contains column names (data fields)
                string delimitedColumnNames = streamReader.ReadLine();
                //What data delimiter is used?
                char csvDelimiter = DelimitedStringValues.RecognizeDelimiter(delimitedColumnNames);
                //Split column names
                DelimitedStringValues columnNames = new DelimitedStringValues(csvDelimiter);
                columnNames.LoadFromString(delimitedColumnNames);
                //Check if the recognized data delimiter works properly
                if (columnNames.NumOfStringValues < inputFieldNameCollection.Count)
                {
                    throw new FormatException("1st row of the file doesn't contain delimited column names or the value delimiter was not properly recognized.");
                }
                //Define fields
                foreach (string name in inputFieldNameCollection)
                {
                    if (!bundleNormalizer.IsFieldDefined(name))
                    {
                        bundleNormalizer.DefineField(name, name, normReserveRatio, dataStandardization);
                        inputFieldIndexes.Add(columnNames.IndexOf(name));
                    }
                    bundleNormalizer.DefineInputField(name);
                }
                for (int i = 0; i < outputFieldNameCollection.Count; i++)
                {
                    if (!bundleNormalizer.IsFieldDefined(outputFieldNameCollection[i]))
                    {
                        bundleNormalizer.DefineField(outputFieldNameCollection[i],
                                                     outputFieldNameCollection[i],
                                                     outputFieldTaskCollection[i] == CommonEnums.TaskType.Classification ? 0 : normReserveRatio,
                                                     outputFieldTaskCollection[i] == CommonEnums.TaskType.Classification ? false : dataStandardization
                                                     );
                    }
                    outputFieldIndexes.Add(columnNames.IndexOf(outputFieldNameCollection[i]));
                    bundleNormalizer.DefineOutputField(outputFieldNameCollection[i]);
                }
                //Finalize structure
                bundleNormalizer.FinalizeStructure();
                //Load full data in string form
                List <DelimitedStringValues> fullData = new List <DelimitedStringValues>();
                while (!streamReader.EndOfStream)
                {
                    DelimitedStringValues row = new DelimitedStringValues(csvDelimiter);
                    row.LoadFromString(streamReader.ReadLine());
                    fullData.Add(row);
                }
                //Prepare input and output vectors
                List <double[]> inputVectorCollection  = new List <double[]>(fullData.Count);
                List <double[]> outputVectorCollection = new List <double[]>(fullData.Count);
                for (int i = 0; i < fullData.Count; i++)
                {
                    //Input vector
                    double[] inputVector = new double[inputFieldIndexes.Count];
                    for (int j = 0; j < inputFieldIndexes.Count; j++)
                    {
                        inputVector[j] = fullData[i].GetValue(inputFieldIndexes[j]).ParseDouble(true, $"Can't parse double value {fullData[i].GetValue(inputFieldIndexes[j])}.");
                    }
                    if (i < fullData.Count - 1)
                    {
                        //Within the bundle
                        inputVectorCollection.Add(inputVector);
                    }
                    else
                    {
                        //remaining input vector out of the bundle
                        remainingInputVector = inputVector;
                    }
                    if (i > 0)
                    {
                        //Output vector
                        double[] outputVector = new double[outputFieldIndexes.Count];
                        for (int j = 0; j < outputFieldIndexes.Count; j++)
                        {
                            outputVector[j] = fullData[i].GetValue(outputFieldIndexes[j]).ParseDouble(true, $"Can't parse double value {fullData[i].GetValue(outputFieldIndexes[j])}.");
                        }
                        outputVectorCollection.Add(outputVector);
                    }
                }
                //Create bundle
                bundle = new TimeSeriesBundle(inputVectorCollection, outputVectorCollection);
                //Normalize bundle and remaining input vector
                bundleNormalizer.Normalize(bundle);
                bundleNormalizer.NormalizeInputVector(remainingInputVector);
            }
            return(bundle);
        } //LoadFromCsv