/// <summary> /// internal function: normalises all the data input between 0 and 1 /// </summary> private float[,] normaliseArray(float[,] dataArray, DataMetadata metadataPreset) { //1 make a copy of the parsed array float[,] normArray = new float[dataArray.GetUpperBound(0) + 1, dataArray.GetUpperBound(1) + 1]; //for each dimensions (column) normalise all data for (int i = 0; i <= normArray.GetUpperBound(1); i++) { float[] rawDimension = GetCol(dataArray, i); float minDimension = rawDimension.Min(); float maxDimension = rawDimension.Max(); DataSource.DimensionData.Metadata metadata = dimensionData[i].MetaData; metadata.minValue = minDimension; metadata.maxValue = maxDimension; metadata.binCount = (int)(maxDimension - minDimension + 1); if (metadataPreset != null) { foreach (var binSizePreset in metadataPreset.BinSizePreset) { if (binSizePreset.index == i) { metadata.binCount = binSizePreset.binCount; } } } dimensionData[i].setMetadata(metadata); float[] normalisedDimension = new float[rawDimension.Length]; // dimensionsRange.Add(i, new Vector2(minDimension, maxDimension)); for (int j = 0; j < rawDimension.Length; j++) { if (minDimension < maxDimension) { normalisedDimension[j] = normaliseValue(rawDimension[j], minDimension, maxDimension, 0f, 1f); } else { // avoid NaNs or nonsensical normalization normalisedDimension[j] = 0; } } SetCol<float>(normArray, i, normalisedDimension); } return normArray; }
private float[] NormaliseCol(float[,] dataArray, DataMetadata metadataPreset, int col) { //for each dimensions (column) normalise all data float[] result = GetCol(dataArray, col); float minDimension = result.Min(); float maxDimension = result.Max(); if (minDimension == maxDimension) { // where there are no distinct values, need the dimension to be distinct // otherwise lots of maths breaks with division by zero, etc. // this is the most elegant hack I could think of, but should be fixed properly in future minDimension -= 1.0f; maxDimension += 1.0f; } DataSource.DimensionData.Metadata metadata = dimensionData[col].MetaData; metadata.minValue = minDimension; metadata.maxValue = maxDimension; metadata.categories = result.Distinct().Select(x => normaliseValue(x, minDimension, maxDimension, 0.0f, 1.0f)).ToArray(); metadata.categoryCount = metadata.categories.Count(); metadata.binCount = (int)(maxDimension - minDimension + 1); if (metadataPreset != null) { foreach (var binSizePreset in metadataPreset.BinSizePreset) { if (binSizePreset.index == col) { metadata.binCount = binSizePreset.binCount; } } } dimensionData[col].setMetadata(metadata); for (int j = 0; j < result.Length; j++) { if (minDimension < maxDimension) { result[j] = normaliseValue(result[j], minDimension, maxDimension, 0f, 1f); } else { // avoid NaNs or nonsensical normalization result[j] = 0; } } return result; }
private float[] NormaliseCol(float[,] dataArray, DataMetadata metadataPreset, int col) { //for each dimensions (column) normalise all data float[] result = GetCol(dataArray, col); float minDimension = result.Min(); float maxDimension = result.Max(); DataSource.DimensionData.Metadata metadata = dimensionData[col].MetaData; metadata.minValue = minDimension; metadata.maxValue = maxDimension; metadata.categories = result.Distinct().Select(x => normaliseValue(x, minDimension, maxDimension, 0.0f, 1.0f)).ToArray(); metadata.categoryCount = result.Distinct().Count(); metadata.binCount = (int)(maxDimension - minDimension + 1); if (metadataPreset != null) { foreach (var binSizePreset in metadataPreset.BinSizePreset) { if (binSizePreset.index == col) { metadata.binCount = binSizePreset.binCount; } } } dimensionData[col].setMetadata(metadata); for (int j = 0; j < result.Length; j++) { if (minDimension < maxDimension) { result[j] = normaliseValue(result[j], minDimension, maxDimension, 0f, 1f); } else { // avoid NaNs or nonsensical normalization result[j] = 0; } } return result; }
/// <summary> /// /// </summary> /// <param name="data"></param> public void load(string data, DataMetadata metadataPreset) { dimensionData = new List<DimensionData>(); textualDimensionsList = new Dictionary<string, Dictionary<int, string>>(); textualDimensionsListReverse = new Dictionary<string, Dictionary<string, int>>(); string[] lines = data.Split(new char[] { '\r', '\n' }, StringSplitOptions.RemoveEmptyEntries); if (loadHeaderImpl(lines)) { float[,] dataArray = new float[lines.Length - 1, DimensionCount]; // ignore the first line of identifiers dataCount = dataArray.GetUpperBound(0) + 1; if (lines.Length > 1) { //line reading for (int i = 1; i < lines.Length; i++) { string[] values = lines[i].Split(split); //dimension reading for (int k = 0; k < values.Count(); k++) { string cleanedValue = cleanDataString(values[k]); //1- get the corresponding type if (k <= dimensionData.Count - 1) switch (dimensionData[k].MetaData.type) { case DataType.Bool: { bool result = false; bool.TryParse(cleanedValue, out result); dataArray[i - 1, k] = Convert.ToSingle(result); break; } case DataType.Date: { string[] valH = cleanedValue.Split('\\'); if (valH.Length == 2) dataArray[i - 1, k] = float.Parse(valH[0]) * 60f + float.Parse(valH[1]); else if (valH.Length == 3) dataArray[i - 1, k] = float.Parse(valH[0]) * 3600f + float.Parse(valH[1]) * 60f + float.Parse(valH[2]); else dataArray[i - 1, k] = 0f; break; } case DataType.Time: { string[] valH = cleanedValue.Split(':'); if (valH.Length == 2) dataArray[i - 1, k] = float.Parse(valH[0]) * 60f + float.Parse(valH[1]); else if (valH.Length == 3) dataArray[i - 1, k] = float.Parse(valH[0]) * 3600f + float.Parse(valH[1]) * 60f + float.Parse(valH[2]); else dataArray[i - 1, k] = 0f; break; } case DataType.Int: { int result = 0; int.TryParse(cleanedValue, out result); dataArray[i - 1, k] = (float)result; break; } case DataType.Float: { double result = 0.0f; double.TryParse(cleanedValue, NumberStyles.Any, CultureInfo.InvariantCulture, out result); dataArray[i - 1, k] = (float)result; break; } case DataType.Graph: { char[] graphSeparator = new char[] { '|' }; string[] edges = cleanedValue.Split(graphSeparator); List<int> localEdges = new List<int>(); //read edges for (int ed=0;ed<edges.Length;ed++) { if(edges[ed]!="") localEdges.Add(int.Parse(edges[ed])); } GraphEdges.Add(i, localEdges); break; } case DataType.String: { //check if we have a dictionnary for this dimension if (textualDimensionsList.ContainsKey(dimensionData[k].Identifier)) { //if encoded //get the dictionary int valueToEncode; Dictionary<string, int> dimensionDictionaryReverse = textualDimensionsListReverse[dimensionData[k].Identifier]; Dictionary<int, string> dimensionDictionary = textualDimensionsList[dimensionData[k].Identifier]; if (dimensionDictionaryReverse.ContainsKey(cleanedValue)) { valueToEncode = dimensionDictionaryReverse[cleanedValue]; dataArray[i - 1, k] = valueToEncode; } else { //increment from the last added element int lastEncodedValue = dimensionDictionaryReverse.Values.OrderBy(x => x).Last() + 1; dimensionDictionaryReverse.Add(cleanedValue, lastEncodedValue); dimensionDictionary.Add(lastEncodedValue, cleanedValue); textualDimensionsListReverse[dimensionData[k].Identifier] = dimensionDictionaryReverse; textualDimensionsList[dimensionData[k].Identifier] = dimensionDictionary; dataArray[i - 1, k] = lastEncodedValue; } } else //if not create one and add the first value { Dictionary<int, string> newEntry = new Dictionary<int, string>(); Dictionary<string, int> newEntryReverse = new Dictionary<string, int>(); newEntry.Add(0, cleanedValue); newEntryReverse.Add(cleanedValue, 0); textualDimensionsList.Add(dimensionData[k].Identifier, newEntry); textualDimensionsListReverse.Add(dimensionData[k].Identifier, newEntryReverse); } ////lookup if already encoded //if (textualDimensionsReverse.ContainsKey(cleanedValue)) //{ // dataArray[i - 1, k] = textualDimensionsReverse[cleanedValue];// textualDimensions.FirstOrDefault(x => x.Value == cleanedValue).Key; //} //else //{ // //new key // textualPointer++; // textualDimensions.Add((int)textualPointer, cleanedValue); // textualDimensionsReverse.Add(cleanedValue, (int)textualPointer); // dataArray[i - 1, k] = textualPointer; //} break; } default: { dataArray[i - 1, k] = 0f; break; } }// end switch } // end k } } // TODO: SORT MULTIPLE VALUES/CRITERIA // Populate data structure //float[] output = new float[dataCount]; for (int i = 0; i < DimensionCount; ++i) { dimensionData[i].setData(NormaliseCol(dataArray, metadataPreset, i), textualDimensionsList); } // Raise load event if (!isOnLoadNull()) { raiseOnLoad(); } } }
/// <summary> /// /// </summary> /// <param name="data"></param> public void load(string data, DataMetadata metadataPreset) { dimensionData = new List<DimensionData>(); textualDimensionsList = new Dictionary<string, Dictionary<int, string>>(); textualDimensionsListReverse = new Dictionary<string, Dictionary<string, int>>(); Dictionary<string, List<string>> distinctStringValues = new Dictionary<string, List<string>>(); // key: dimension, value: list of distinct values string[] lines = data.Split(new char[] { '\r', '\n' }, StringSplitOptions.RemoveEmptyEntries); if (loadHeaderImpl(lines)) { float[,] dataArray = new float[lines.Length - 1, DimensionCount]; // ignore the first line of identifiers dataCount = dataArray.GetUpperBound(0) + 1; if (lines.Length > 1) { //line reading for (int i = 1; i < lines.Length; i++) { string[] values = lines[i].Split(split); //dimension reading for (int k = 0; k < values.Count(); k++) { string cleanedValue = cleanDataString(values[k]); //1- get the corresponding type if (k <= dimensionData.Count - 1) switch (dimensionData[k].MetaData.type) { case DataType.Bool: { bool result = false; bool.TryParse(cleanedValue, out result); dataArray[i - 1, k] = Convert.ToSingle(result); break; } case DataType.Date: { string[] valH = cleanedValue.Split('\\'); if (valH.Length == 2) dataArray[i - 1, k] = float.Parse(valH[0]) * 60f + float.Parse(valH[1]); else if (valH.Length == 3) dataArray[i - 1, k] = float.Parse(valH[0]) * 3600f + float.Parse(valH[1]) * 60f + float.Parse(valH[2]); else dataArray[i - 1, k] = 0f; break; } case DataType.Time: { string[] valH = cleanedValue.Split(':'); if (valH.Length == 2) dataArray[i - 1, k] = float.Parse(valH[0]) * 60f + float.Parse(valH[1]); else if (valH.Length == 3) dataArray[i - 1, k] = float.Parse(valH[0]) * 3600f + float.Parse(valH[1]) * 60f + float.Parse(valH[2]); else dataArray[i - 1, k] = 0f; break; } case DataType.Int: { int result = 0; int.TryParse(cleanedValue, out result); dataArray[i - 1, k] = (float)result; break; } case DataType.Float: { double result = 0.0f; double.TryParse(cleanedValue, out result); dataArray[i - 1, k] = (float)result; break; } case DataType.String: { List<string> stringValues; // Check if there is already a list of distinct string values for this dimension if (distinctStringValues.ContainsKey(dimensionData[k].Identifier)) { stringValues = distinctStringValues[dimensionData[k].Identifier]; } // Otherwise create a new list else { stringValues = new List<string>(); distinctStringValues[dimensionData[k].Identifier] = stringValues; } if (!stringValues.Contains(cleanedValue)) { stringValues.Add(cleanedValue); } break; } default: { dataArray[i - 1, k] = 0f; break; } }// end switch } // end k } } // Populate textual dimensions list foreach (string textualDimension in distinctStringValues.Keys) { // Create dictionaries that will be added to the textualDimensionsLists Dictionary<int, string> textualDimensionsEntry = new Dictionary<int, string>(); Dictionary<string, int> textualDimensionsEntryReverse = new Dictionary<string, int>(); // Sort the string values for this dimension List<string> distinctSortedValues = distinctStringValues[textualDimension]; // Check if it's actually a date TODO: FIX THIS TO BE STREAMLINED WITH DATE CHECKING string[] vals = distinctSortedValues[0].Split('/'); if (vals.Length == 3 && vals[1].Length == 2 && vals[2].Length == 4) { distinctSortedValues = distinctSortedValues.OrderBy(x => { if (x.IndexOf('/') == 1) return DateTime.ParseExact(x, "d/MM/yyyy", null); else return DateTime.ParseExact(x, "dd/MM/yyyy", null); }).ToList(); } else { distinctSortedValues.Sort(); } // Populate the dictionaries for (int i = 0; i < distinctSortedValues.Count; i++) { textualDimensionsEntry[i] = distinctSortedValues[i]; textualDimensionsEntryReverse[distinctSortedValues[i]] = i; } // Add the dictionaries to the textual dimensions list textualDimensionsList[textualDimension] = textualDimensionsEntry; textualDimensionsListReverse[textualDimension] = textualDimensionsEntryReverse; // Get dimension index int index = dimensionData.FindIndex(d => d.Identifier == textualDimension); // Fill in the data array for (int i = 1; i < lines.Length; i++) { string value = lines[i].Split(split)[index]; dataArray[i - 1, index] = textualDimensionsEntryReverse[value]; } } // Populate data structure //float[] output = new float[dataCount]; for (int i = 0; i < DimensionCount; ++i) { dimensionData[i].setData(NormaliseCol(dataArray, metadataPreset, i), textualDimensionsList); } // Raise load event if (!isOnLoadNull()) { raiseOnLoad(); } } }
/// <summary> /// /// </summary> /// <param name="data"></param> public void load(string data, DataMetadata metadataPreset) { dimensionData = new List<DimensionData>(); textualDimensionsList = new Dictionary<string, Dictionary<int, string>>(); textualDimensionsListReverse = new Dictionary<string, Dictionary<string, int>>(); string[] lines = data.Split(new char[] { '\r', '\n' }, StringSplitOptions.RemoveEmptyEntries); if (loadHeaderImpl(lines)) { float[,] dataArray = new float[lines.Length - 1, DimensionCount]; // ignore the first line of identifiers dataCount = dataArray.GetUpperBound(0) + 1; if (lines.Length > 1) { //line reading for (int i = 1; i < lines.Length; i++) { string[] values = lines[i].Split(split); //dimension reading for (int k = 0; k < values.Count(); k++) { string cleanedValue = cleanDataString(values[k]); //1- get the corresponding type if (k <= dimensionData.Count - 1) switch (dimensionData[k].MetaData.type) { case DataType.Bool: { bool result = false; bool.TryParse(cleanedValue, out result); dataArray[i - 1, k] = Convert.ToSingle(result); break; } case DataType.Date: { string[] valH = cleanedValue.Split('\\'); if (valH.Length == 2) dataArray[i - 1, k] = float.Parse(valH[0]) * 60f + float.Parse(valH[1]); else if (valH.Length == 3) dataArray[i - 1, k] = float.Parse(valH[0]) * 3600f + float.Parse(valH[1]) * 60f + float.Parse(valH[2]); else dataArray[i - 1, k] = 0f; break; } case DataType.Time: { string[] valH = cleanedValue.Split(':'); if (valH.Length == 2) dataArray[i - 1, k] = float.Parse(valH[0]) * 60f + float.Parse(valH[1]); else if (valH.Length == 3) dataArray[i - 1, k] = float.Parse(valH[0]) * 3600f + float.Parse(valH[1]) * 60f + float.Parse(valH[2]); else dataArray[i - 1, k] = 0f; break; } case DataType.Int: { int result = 0; int.TryParse(cleanedValue, out result); dataArray[i - 1, k] = (float)result; if (k == 10) { Debug.LogError((float)result); } break; } case DataType.Float: { double result = 0.0f; // changed the parsing of floats to support comma and dot as decimal point // copied from: https://stackoverflow.com/a/19678636 //double.TryParse(cleanedValue, out result); cleanedValue = cleanedValue.Replace(',', '.'); double.TryParse(cleanedValue, NumberStyles.Any, CultureInfo.InvariantCulture, out result); dataArray[i - 1, k] = (float)result; if(k == 10) { // Debug.LogError((float)result); } break; } case DataType.String: { //check if we have a dictionnary for this dimension if (textualDimensionsList.ContainsKey(dimensionData[k].Identifier)) { //if encoded //get the dictionary int valueToEncode; Dictionary<string, int> dimensionDictionaryReverse = textualDimensionsListReverse[dimensionData[k].Identifier]; Dictionary<int, string> dimensionDictionary = textualDimensionsList[dimensionData[k].Identifier]; if (dimensionDictionaryReverse.ContainsKey(cleanedValue)) { valueToEncode = dimensionDictionaryReverse[cleanedValue]; dataArray[i - 1, k] = valueToEncode; } else { //increment from the last added element int lastEncodedValue = dimensionDictionaryReverse.Values.OrderBy(x => x).Last() + 1; dimensionDictionaryReverse.Add(cleanedValue, lastEncodedValue); dimensionDictionary.Add(lastEncodedValue, cleanedValue); textualDimensionsListReverse[dimensionData[k].Identifier] = dimensionDictionaryReverse; textualDimensionsList[dimensionData[k].Identifier] = dimensionDictionary; dataArray[i - 1, k] = lastEncodedValue; } } else //if not create one and add the first value { Dictionary<int, string> newEntry = new Dictionary<int, string>(); Dictionary<string, int> newEntryReverse = new Dictionary<string, int>(); newEntry.Add(0, cleanedValue); newEntryReverse.Add(cleanedValue, 0); textualDimensionsList.Add(dimensionData[k].Identifier, newEntry); textualDimensionsListReverse.Add(dimensionData[k].Identifier, newEntryReverse); } ////lookup if already encoded //if (textualDimensionsReverse.ContainsKey(cleanedValue)) //{ // dataArray[i - 1, k] = textualDimensionsReverse[cleanedValue];// textualDimensions.FirstOrDefault(x => x.Value == cleanedValue).Key; //} //else //{ // //new key // textualPointer++; // textualDimensions.Add((int)textualPointer, cleanedValue); // textualDimensionsReverse.Add(cleanedValue, (int)textualPointer); // dataArray[i - 1, k] = textualPointer; //} break; } default: { dataArray[i - 1, k] = 0f; break; } }// end switch } // end k } } // Populate data structure //float[] output = new float[dataCount]; for (int i = 0; i < DimensionCount; ++i) { if(i == 10) { /* Debug.LogError(GetCol(dataArray, i)); foreach (var test in GetCol(dataArray, i)) { Debug.LogError(test); }*/ /* Debug.LogError(NormaliseCol(dataArray, metadataPreset, i)); foreach(var test in NormaliseCol(dataArray, metadataPreset, i)) { Debug.LogError(test); }*/ } dimensionData[i].setData(NormaliseCol(dataArray, metadataPreset, i), textualDimensionsList); } // Raise load event if (!isOnLoadNull()) { raiseOnLoad(); } } }