Beispiel #1
0
        /// <summary>
        /// internal function: normalises all the data input between 0 and 1
        /// </summary>
        private float[,] normaliseArray(float[,] dataArray, DataMetadata metadataPreset)
        {
            //1 make a copy of the parsed array
            float[,] normArray = new float[dataArray.GetUpperBound(0) + 1, dataArray.GetUpperBound(1) + 1];
            //for each dimensions (column) normalise all data
            for (int i = 0; i <= normArray.GetUpperBound(1); i++)
            {
                float[] rawDimension = GetCol(dataArray, i);
                float minDimension = rawDimension.Min();
                float maxDimension = rawDimension.Max();

                DataSource.DimensionData.Metadata metadata = dimensionData[i].MetaData;

                metadata.minValue = minDimension;
                metadata.maxValue = maxDimension;
                metadata.binCount = (int)(maxDimension - minDimension + 1);

                if (metadataPreset != null)
                {
                    foreach (var binSizePreset in metadataPreset.BinSizePreset)
                    {
                        if (binSizePreset.index == i)
                        {
                            metadata.binCount = binSizePreset.binCount;
                        }
                    }
                }

                dimensionData[i].setMetadata(metadata);

                float[] normalisedDimension = new float[rawDimension.Length];

                //                dimensionsRange.Add(i, new Vector2(minDimension, maxDimension));

                for (int j = 0; j < rawDimension.Length; j++)
                {
                    if (minDimension < maxDimension)
                    {
                        normalisedDimension[j] = normaliseValue(rawDimension[j], minDimension, maxDimension, 0f, 1f);
                    }
                    else
                    {
                        // avoid NaNs or nonsensical normalization
                        normalisedDimension[j] = 0;
                    }
                }

                SetCol<float>(normArray, i, normalisedDimension);
            }

            return normArray;
        }
Beispiel #2
0
        private float[] NormaliseCol(float[,] dataArray, DataMetadata metadataPreset, int col)
        {
            //for each dimensions (column) normalise all data
            float[] result = GetCol(dataArray, col);
            float minDimension = result.Min();
            float maxDimension = result.Max();

            if (minDimension == maxDimension)
            {
                // where there are no distinct values, need the dimension to be distinct 
                // otherwise lots of maths breaks with division by zero, etc.
                // this is the most elegant hack I could think of, but should be fixed properly in future
                minDimension -= 1.0f; 
                maxDimension += 1.0f;
            }

            DataSource.DimensionData.Metadata metadata = dimensionData[col].MetaData;

            metadata.minValue = minDimension;
            metadata.maxValue = maxDimension;
            metadata.categories = result.Distinct().Select(x => normaliseValue(x, minDimension, maxDimension, 0.0f, 1.0f)).ToArray();
            metadata.categoryCount = metadata.categories.Count();
            metadata.binCount = (int)(maxDimension - minDimension + 1);

            if (metadataPreset != null)
            {
                foreach (var binSizePreset in metadataPreset.BinSizePreset)
                {
                    if (binSizePreset.index == col)
                    {
                        metadata.binCount = binSizePreset.binCount;
                    }
                }
            }

            dimensionData[col].setMetadata(metadata);

            for (int j = 0; j < result.Length; j++)
            {
                if (minDimension < maxDimension)
                {
                    result[j] = normaliseValue(result[j], minDimension, maxDimension, 0f, 1f);
                }
                else
                {
                    // avoid NaNs or nonsensical normalization
                    result[j] = 0;
                }
            }

            return result;
        }
Beispiel #3
0
        private float[] NormaliseCol(float[,] dataArray, DataMetadata metadataPreset, int col)
        {
            //for each dimensions (column) normalise all data
            float[] result = GetCol(dataArray, col);
            float minDimension = result.Min();
            float maxDimension = result.Max();

            DataSource.DimensionData.Metadata metadata = dimensionData[col].MetaData;

            metadata.minValue = minDimension;
            metadata.maxValue = maxDimension;
            metadata.categories = result.Distinct().Select(x => normaliseValue(x, minDimension, maxDimension, 0.0f, 1.0f)).ToArray();
            metadata.categoryCount = result.Distinct().Count();
            metadata.binCount = (int)(maxDimension - minDimension + 1);

            if (metadataPreset != null)
            {
                foreach (var binSizePreset in metadataPreset.BinSizePreset)
                {
                    if (binSizePreset.index == col)
                    {
                        metadata.binCount = binSizePreset.binCount;
                    }
                }
            }

            dimensionData[col].setMetadata(metadata);

            for (int j = 0; j < result.Length; j++)
            {
                if (minDimension < maxDimension)
                {
                    result[j] = normaliseValue(result[j], minDimension, maxDimension, 0f, 1f);
                }
                else
                {
                    // avoid NaNs or nonsensical normalization
                    result[j] = 0;
                }
            }

            return result;
        }
Beispiel #4
0
        /// <summary>
        /// 
        /// </summary>
        /// <param name="data"></param>
        public void load(string data, DataMetadata metadataPreset)
        {
            dimensionData = new List<DimensionData>();
            textualDimensionsList = new Dictionary<string, Dictionary<int, string>>();
            textualDimensionsListReverse = new Dictionary<string, Dictionary<string, int>>();


            string[] lines = data.Split(new char[] { '\r', '\n' }, StringSplitOptions.RemoveEmptyEntries);
            if (loadHeaderImpl(lines))
            {
                float[,] dataArray = new float[lines.Length - 1, DimensionCount]; // ignore the first line of identifiers
                dataCount = dataArray.GetUpperBound(0) + 1;

                if (lines.Length > 1)
                {
                    //line reading
                    for (int i = 1; i < lines.Length; i++)
                    {
                        string[] values = lines[i].Split(split);

                        //dimension reading
                        for (int k = 0; k < values.Count(); k++)
                        {

                            string cleanedValue = cleanDataString(values[k]);

                            //1- get the corresponding type
                            if (k <= dimensionData.Count - 1) switch (dimensionData[k].MetaData.type)
                                {
                                    case DataType.Bool:
                                        {
                                            bool result = false;
                                            bool.TryParse(cleanedValue, out result);
                                            dataArray[i - 1, k] = Convert.ToSingle(result);
                                            break;
                                        }
                                    case DataType.Date:
                                        {
                                            string[] valH = cleanedValue.Split('\\');
                                            if (valH.Length == 2)
                                                dataArray[i - 1, k] = float.Parse(valH[0]) * 60f + float.Parse(valH[1]);
                                            else if (valH.Length == 3)
                                                dataArray[i - 1, k] = float.Parse(valH[0]) * 3600f + float.Parse(valH[1]) * 60f + float.Parse(valH[2]);
                                            else dataArray[i - 1, k] = 0f;
                                            break;
                                        }

                                    case DataType.Time:
                                        {
                                            string[] valH = cleanedValue.Split(':');
                                            if (valH.Length == 2)
                                                dataArray[i - 1, k] = float.Parse(valH[0]) * 60f + float.Parse(valH[1]);
                                            else if (valH.Length == 3)
                                                dataArray[i - 1, k] = float.Parse(valH[0]) * 3600f + float.Parse(valH[1]) * 60f + float.Parse(valH[2]);
                                            else dataArray[i - 1, k] = 0f;
                                            break;
                                        }

                                    case DataType.Int:
                                        {
                                            int result = 0;
                                            int.TryParse(cleanedValue, out result);
                                            dataArray[i - 1, k] = (float)result;
                                            break;
                                        }
                                    case DataType.Float:
                                        {
                                            double result = 0.0f;
                                            double.TryParse(cleanedValue, NumberStyles.Any, CultureInfo.InvariantCulture, out result);
                                            dataArray[i - 1, k] = (float)result;
                                            break;
                                        }
                                    case DataType.Graph:
                                        {
                                            char[] graphSeparator = new char[] { '|' };
                                            string[] edges = cleanedValue.Split(graphSeparator);

                                            List<int> localEdges = new List<int>();

                                            //read edges
                                            for (int ed=0;ed<edges.Length;ed++)
                                            {
                                                if(edges[ed]!="")
                                                localEdges.Add(int.Parse(edges[ed]));
                                            }
                                            GraphEdges.Add(i, localEdges);

                                            break;
                                        }
                                    case DataType.String:
                                        {
                                            //check if we have a dictionnary for this dimension
                                            if (textualDimensionsList.ContainsKey(dimensionData[k].Identifier))
                                            {
                                                //if encoded
                                                //get the dictionary
                                                int valueToEncode;
                                                Dictionary<string, int> dimensionDictionaryReverse = textualDimensionsListReverse[dimensionData[k].Identifier];
                                                Dictionary<int, string> dimensionDictionary = textualDimensionsList[dimensionData[k].Identifier];

                                                if (dimensionDictionaryReverse.ContainsKey(cleanedValue))
                                                {
                                                    valueToEncode = dimensionDictionaryReverse[cleanedValue];
                                                    dataArray[i - 1, k] = valueToEncode;
                                                }
                                                else
                                                {
                                                    //increment from the last added element
                                                    int lastEncodedValue = dimensionDictionaryReverse.Values.OrderBy(x => x).Last() + 1;

                                                    dimensionDictionaryReverse.Add(cleanedValue, lastEncodedValue);
                                                    dimensionDictionary.Add(lastEncodedValue, cleanedValue);
                                                    textualDimensionsListReverse[dimensionData[k].Identifier] = dimensionDictionaryReverse;
                                                    textualDimensionsList[dimensionData[k].Identifier] = dimensionDictionary;

                                                    dataArray[i - 1, k] = lastEncodedValue;
                                                }
                                            }
                                            else //if not create one and add the first value
                                            {
                                                Dictionary<int, string> newEntry = new Dictionary<int, string>();
                                                Dictionary<string, int> newEntryReverse = new Dictionary<string, int>();

                                                newEntry.Add(0, cleanedValue);
                                                newEntryReverse.Add(cleanedValue, 0);

                                                textualDimensionsList.Add(dimensionData[k].Identifier, newEntry);
                                                textualDimensionsListReverse.Add(dimensionData[k].Identifier, newEntryReverse);
                                            }
                                            ////lookup if already encoded
                                            //if (textualDimensionsReverse.ContainsKey(cleanedValue))
                                            //{
                                            //    dataArray[i - 1, k] = textualDimensionsReverse[cleanedValue];// textualDimensions.FirstOrDefault(x => x.Value == cleanedValue).Key;
                                            //}
                                            //else
                                            //{
                                            //    //new key
                                            //    textualPointer++;
                                            //    textualDimensions.Add((int)textualPointer, cleanedValue);
                                            //    textualDimensionsReverse.Add(cleanedValue, (int)textualPointer);
                                            //    dataArray[i - 1, k] = textualPointer;
                                            //}
                                            break;
                                        }
                                    default:
                                        {
                                            dataArray[i - 1, k] = 0f;
                                            break;
                                        }
                                }// end switch

                        } // end k
                    }
                }

                // TODO: SORT MULTIPLE VALUES/CRITERIA

                // Populate data structure
                //float[] output = new float[dataCount];
                for (int i = 0; i < DimensionCount; ++i)
                {
                    dimensionData[i].setData(NormaliseCol(dataArray, metadataPreset, i), textualDimensionsList);

                }

                // Raise load event
                if (!isOnLoadNull())
                {
                    raiseOnLoad();
                }
            }
        }
Beispiel #5
0
        /// <summary>
        /// 
        /// </summary>
        /// <param name="data"></param>
        public void load(string data, DataMetadata metadataPreset)
        {
            dimensionData = new List<DimensionData>();
            textualDimensionsList = new Dictionary<string, Dictionary<int, string>>();
            textualDimensionsListReverse = new Dictionary<string, Dictionary<string, int>>();

            Dictionary<string, List<string>> distinctStringValues = new Dictionary<string, List<string>>();  // key: dimension, value: list of distinct values

            string[] lines = data.Split(new char[] { '\r', '\n' }, StringSplitOptions.RemoveEmptyEntries);
            if (loadHeaderImpl(lines))
            {
                float[,] dataArray = new float[lines.Length - 1, DimensionCount]; // ignore the first line of identifiers
                dataCount = dataArray.GetUpperBound(0) + 1;

                if (lines.Length > 1)
                {
                    //line reading
                    for (int i = 1; i < lines.Length; i++)
                    {
                        string[] values = lines[i].Split(split);

                        //dimension reading
                        for (int k = 0; k < values.Count(); k++)
                        {

                            string cleanedValue = cleanDataString(values[k]);

                            //1- get the corresponding type
                            if (k <= dimensionData.Count - 1) switch (dimensionData[k].MetaData.type)
                                {
                                    case DataType.Bool:
                                        {
                                            bool result = false;
                                            bool.TryParse(cleanedValue, out result);
                                            dataArray[i - 1, k] = Convert.ToSingle(result);
                                            break;
                                        }
                                    case DataType.Date:
                                        {
                                            string[] valH = cleanedValue.Split('\\');
                                            if (valH.Length == 2)
                                                dataArray[i - 1, k] = float.Parse(valH[0]) * 60f + float.Parse(valH[1]);
                                            else if (valH.Length == 3)
                                                dataArray[i - 1, k] = float.Parse(valH[0]) * 3600f + float.Parse(valH[1]) * 60f + float.Parse(valH[2]);
                                            else dataArray[i - 1, k] = 0f;
                                            break;
                                        }

                                    case DataType.Time:
                                        {
                                            string[] valH = cleanedValue.Split(':');
                                            if (valH.Length == 2)
                                                dataArray[i - 1, k] = float.Parse(valH[0]) * 60f + float.Parse(valH[1]);
                                            else if (valH.Length == 3)
                                                dataArray[i - 1, k] = float.Parse(valH[0]) * 3600f + float.Parse(valH[1]) * 60f + float.Parse(valH[2]);
                                            else dataArray[i - 1, k] = 0f;
                                            break;
                                        }

                                    case DataType.Int:
                                        {
                                            int result = 0;
                                            int.TryParse(cleanedValue, out result);
                                            dataArray[i - 1, k] = (float)result;
                                            break;
                                        }
                                    case DataType.Float:
                                        {
                                            double result = 0.0f;
                                            double.TryParse(cleanedValue, out result);
                                            dataArray[i - 1, k] = (float)result;
                                            break;
                                        }
                                    case DataType.String:
                                        {
                                            List<string> stringValues;

                                            // Check if there is already a list of distinct string values for this dimension
                                            if (distinctStringValues.ContainsKey(dimensionData[k].Identifier))
                                            {
                                                stringValues = distinctStringValues[dimensionData[k].Identifier];
                                            }
                                            // Otherwise create a new list
                                            else
                                            {
                                                stringValues = new List<string>();
                                                distinctStringValues[dimensionData[k].Identifier] = stringValues;
                                            }

                                            if (!stringValues.Contains(cleanedValue))
                                            {
                                                stringValues.Add(cleanedValue);
                                            }
                                            break;
                                        }
                                    default:
                                        {
                                            dataArray[i - 1, k] = 0f;
                                            break;
                                        }
                                }// end switch

                        } // end k
                    }
                }

                // Populate textual dimensions list
                foreach (string textualDimension in distinctStringValues.Keys)
                {
                    // Create dictionaries that will be added to the textualDimensionsLists
                    Dictionary<int, string> textualDimensionsEntry = new Dictionary<int, string>();
                    Dictionary<string, int> textualDimensionsEntryReverse = new Dictionary<string, int>();

                    // Sort the string values for this dimension
                    List<string> distinctSortedValues = distinctStringValues[textualDimension];

                    // Check if it's actually a date TODO: FIX THIS TO BE STREAMLINED WITH DATE CHECKING
                    string[] vals = distinctSortedValues[0].Split('/');
                    if (vals.Length == 3 && vals[1].Length == 2 && vals[2].Length == 4)
                    {
                        distinctSortedValues = distinctSortedValues.OrderBy(x =>
                        {
                            if (x.IndexOf('/') == 1)
                                return DateTime.ParseExact(x, "d/MM/yyyy", null);
                            else
                                return DateTime.ParseExact(x, "dd/MM/yyyy", null);
                        }).ToList();
                    }
                    else
                    {
                        distinctSortedValues.Sort();
                    }
                    
                    // Populate the dictionaries
                    for (int i = 0; i < distinctSortedValues.Count; i++)
                    {
                        textualDimensionsEntry[i] = distinctSortedValues[i];
                        textualDimensionsEntryReverse[distinctSortedValues[i]] = i;
                    }

                    // Add the dictionaries to the textual dimensions list
                    textualDimensionsList[textualDimension] = textualDimensionsEntry;
                    textualDimensionsListReverse[textualDimension] = textualDimensionsEntryReverse;

                    // Get dimension index
                    int index = dimensionData.FindIndex(d => d.Identifier == textualDimension);

                    // Fill in the data array
                    for (int i = 1; i < lines.Length; i++)
                    {
                        string value = lines[i].Split(split)[index];
                        dataArray[i - 1, index] = textualDimensionsEntryReverse[value];
                    }
                }
                
                // Populate data structure
                //float[] output = new float[dataCount];
                for (int i = 0; i < DimensionCount; ++i)
                {
                    dimensionData[i].setData(NormaliseCol(dataArray, metadataPreset, i), textualDimensionsList);

                }

                // Raise load event
                if (!isOnLoadNull())
                {
                    raiseOnLoad();
                }
            }
        }
        /// <summary>
        /// 
        /// </summary>
        /// <param name="data"></param>
        public void load(string data, DataMetadata metadataPreset)
        {
            dimensionData = new List<DimensionData>();
            textualDimensionsList = new Dictionary<string, Dictionary<int, string>>();
            textualDimensionsListReverse = new Dictionary<string, Dictionary<string, int>>();

            
            string[] lines = data.Split(new char[] { '\r', '\n' }, StringSplitOptions.RemoveEmptyEntries);
            if (loadHeaderImpl(lines))
            {
                float[,] dataArray = new float[lines.Length - 1, DimensionCount]; // ignore the first line of identifiers
                dataCount = dataArray.GetUpperBound(0) + 1;

                if (lines.Length > 1)
                {
                    //line reading
                    for (int i = 1; i < lines.Length; i++)
                    {
                        string[] values = lines[i].Split(split);

                        //dimension reading
                        for (int k = 0; k < values.Count(); k++)
                        {
                            string cleanedValue = cleanDataString(values[k]);
                            //1- get the corresponding type
                            if (k <= dimensionData.Count - 1) switch (dimensionData[k].MetaData.type)
                                {
                                    case DataType.Bool:
                                        {
                                            bool result = false;
                                            bool.TryParse(cleanedValue, out result);
                                            dataArray[i - 1, k] = Convert.ToSingle(result);
                                            break;
                                        }
                                    case DataType.Date:
                                        {
                                            string[] valH = cleanedValue.Split('\\');
                                            if (valH.Length == 2)
                                                dataArray[i - 1, k] = float.Parse(valH[0]) * 60f + float.Parse(valH[1]);
                                            else if (valH.Length == 3)
                                                dataArray[i - 1, k] = float.Parse(valH[0]) * 3600f + float.Parse(valH[1]) * 60f + float.Parse(valH[2]);
                                            else dataArray[i - 1, k] = 0f;
                                            break;
                                        }

                                    case DataType.Time:
                                        {
                                            string[] valH = cleanedValue.Split(':');
                                            if (valH.Length == 2)
                                                dataArray[i - 1, k] = float.Parse(valH[0]) * 60f + float.Parse(valH[1]);
                                            else if (valH.Length == 3)
                                                dataArray[i - 1, k] = float.Parse(valH[0]) * 3600f + float.Parse(valH[1]) * 60f + float.Parse(valH[2]);
                                            else dataArray[i - 1, k] = 0f;
                                            break;
                                        }

                                    case DataType.Int:
                                        {
                                            int result = 0;
                                            int.TryParse(cleanedValue, out result);
                                            dataArray[i - 1, k] = (float)result;
                                            if (k == 10)
                                            {
                                                Debug.LogError((float)result);
                                            }
                                            break;
                                        }
                                    case DataType.Float:
                                        {
                                            double result = 0.0f;

                                            // changed the parsing of floats to support comma and dot as decimal point
                                            // copied from: https://stackoverflow.com/a/19678636
                                            //double.TryParse(cleanedValue, out result);
                                            cleanedValue = cleanedValue.Replace(',', '.');
                                            double.TryParse(cleanedValue, NumberStyles.Any, CultureInfo.InvariantCulture, out result);
                                            dataArray[i - 1, k] = (float)result;
                                            if(k == 10)
                                            {
                                              //  Debug.LogError((float)result);
                                            }
                                            break;
                                        }
                                    case DataType.String:
                                        {
                                            //check if we have a dictionnary for this dimension
                                            if (textualDimensionsList.ContainsKey(dimensionData[k].Identifier))
                                            {
                                                //if encoded
                                                //get the dictionary
                                                int valueToEncode;
                                                Dictionary<string, int> dimensionDictionaryReverse = textualDimensionsListReverse[dimensionData[k].Identifier];
                                                Dictionary<int, string> dimensionDictionary = textualDimensionsList[dimensionData[k].Identifier];

                                                if (dimensionDictionaryReverse.ContainsKey(cleanedValue))
                                                {
                                                    valueToEncode = dimensionDictionaryReverse[cleanedValue];
                                                    dataArray[i - 1, k] = valueToEncode;
                                                }
                                                else
                                                {
                                                    //increment from the last added element
                                                    int lastEncodedValue = dimensionDictionaryReverse.Values.OrderBy(x => x).Last() + 1;

                                                    dimensionDictionaryReverse.Add(cleanedValue, lastEncodedValue);
                                                    dimensionDictionary.Add(lastEncodedValue, cleanedValue);
                                                    textualDimensionsListReverse[dimensionData[k].Identifier] = dimensionDictionaryReverse;
                                                    textualDimensionsList[dimensionData[k].Identifier] = dimensionDictionary;

                                                    dataArray[i - 1, k] = lastEncodedValue;
                                                }
                                            }
                                            else //if not create one and add the first value
                                            {
                                                Dictionary<int, string> newEntry = new Dictionary<int, string>();
                                                Dictionary<string, int> newEntryReverse = new Dictionary<string, int>();

                                                newEntry.Add(0, cleanedValue);
                                                newEntryReverse.Add(cleanedValue, 0);

                                                textualDimensionsList.Add(dimensionData[k].Identifier, newEntry);
                                                textualDimensionsListReverse.Add(dimensionData[k].Identifier, newEntryReverse);
                                            }
                                            ////lookup if already encoded
                                            //if (textualDimensionsReverse.ContainsKey(cleanedValue))
                                            //{
                                            //    dataArray[i - 1, k] = textualDimensionsReverse[cleanedValue];// textualDimensions.FirstOrDefault(x => x.Value == cleanedValue).Key;
                                            //}
                                            //else
                                            //{
                                            //    //new key
                                            //    textualPointer++;
                                            //    textualDimensions.Add((int)textualPointer, cleanedValue);
                                            //    textualDimensionsReverse.Add(cleanedValue, (int)textualPointer);
                                            //    dataArray[i - 1, k] = textualPointer;
                                            //}
                                            break;
                                        }
                                    default:
                                        {
                                            dataArray[i - 1, k] = 0f;
                                            break;
                                        }
                                }// end switch

                        } // end k
                    }
                }


                // Populate data structure
                //float[] output = new float[dataCount];
                for (int i = 0; i < DimensionCount; ++i)
                {
                    if(i == 10)
                    {
                       /* Debug.LogError(GetCol(dataArray, i));
                        foreach (var test in GetCol(dataArray, i))
                        {
                            Debug.LogError(test);
                        }*/
                       /* Debug.LogError(NormaliseCol(dataArray, metadataPreset, i));
                        foreach(var test in NormaliseCol(dataArray, metadataPreset, i))
                        {
                            Debug.LogError(test);
                        }*/
                    }
                    dimensionData[i].setData(NormaliseCol(dataArray, metadataPreset, i), textualDimensionsList);

                }

                // Raise load event
                if (!isOnLoadNull())
                {
                    raiseOnLoad();
                }
            }
        }