public void InitMetaColumn(IEnumerable <string> mData) { try { var mcl = new List <MetaColumn>(); //parse meta data int counter = 0; foreach (var c in mData) { MetaColumn col = new MetaColumn(); //check if double point appear more than one time. In that case raise exception if (c.Count(x => x == ':') > 1) { throw new Exception("Column data contains double point ':' which is reserved char. PLease remove double point from metadata."); } var strData = c.Substring(c.IndexOf(":") + 1); var colValues = strData.Split(new char[] { ';' }, StringSplitOptions.RemoveEmptyEntries); // col.Name = colValues[0]; col.Id = counter; col.Index = counter; col.MissingValue = colValues[3]; col.Param = colValues[2]; //col.Scale = colValues[4]; col.Type = colValues[1]; col.Encoding = col.Type.Equals("category", StringComparison.OrdinalIgnoreCase) ? CategoryEncoding.OneHot.ToString() : CategoryEncoding.None.ToString(); counter++; mcl.Add(col); } // MetaData = mcl.ToArray(); } catch (Exception) { throw; } }
private MetaColumn[] GetMetadata() { var cols = new List <MetaColumn>(); foreach (var c in m_trainData) { var m = new MetaColumn(); m.Encoding = c.Encoding.Description(); m.Id = c.Id; m.Index = c.Index; // m.IsIngored =c.is m.MissingValue = c.MissingValue.ToString(); m.Name = c.Name; m.Param = c.GetVariableType(); m.Scale = c.Scaling.ToString(); m.Type = c.ColumnDataType.ToString(); cols.Add(m); } return(cols.ToArray()); }
/// <summary> /// Creates columns based of the columns properties argument /// </summary> /// <param name="colProp"></param> /// <returns></returns> private ColumnData CreateColumn(MetaColumn colProp) { //determine column type ColumnType colType; if (colProp.Type.Equals(ColumnType.Numeric.Description(), StringComparison.OrdinalIgnoreCase)) { colType = ColumnType.Numeric; } else if (colProp.Type.Equals(ColumnType.Binary.Description(), StringComparison.OrdinalIgnoreCase)) { colType = ColumnType.Binary; } else if (colProp.Type.Equals(ColumnType.Category.Description(), StringComparison.OrdinalIgnoreCase)) { colType = ColumnType.Category; } else { colType = ColumnType.Unknown; } //determine encoding for category column type CategoryEncoding colEncoding; if (colProp.Encoding == null) { colEncoding = CategoryEncoding.None; } else if (colProp.Encoding.Equals(CategoryEncoding.Level.Description(), StringComparison.OrdinalIgnoreCase) || colProp.Encoding.Equals(CategoryEncoding.Level.ToString(), StringComparison.OrdinalIgnoreCase)) { colEncoding = CategoryEncoding.Level; } else if (colProp.Encoding.Equals(CategoryEncoding.OneHot.Description(), StringComparison.OrdinalIgnoreCase) || colProp.Encoding.Equals(CategoryEncoding.OneHot.ToString(), StringComparison.OrdinalIgnoreCase)) { colEncoding = CategoryEncoding.OneHot; } else if (colProp.Encoding.Equals(CategoryEncoding.Dummy1.Description(), StringComparison.OrdinalIgnoreCase) || colProp.Encoding.Equals(CategoryEncoding.Dummy1.Description(), StringComparison.OrdinalIgnoreCase)) { colEncoding = CategoryEncoding.Dummy1; } else if (colProp.Encoding.Equals(CategoryEncoding.Dummy2.Description(), StringComparison.OrdinalIgnoreCase) || colProp.Encoding.Equals(CategoryEncoding.Dummy2.ToString(), StringComparison.OrdinalIgnoreCase)) { colEncoding = CategoryEncoding.Dummy2; } else if (colProp.Encoding.Equals(CategoryEncoding.Binary1.Description(), StringComparison.OrdinalIgnoreCase) || colProp.Encoding.Equals(CategoryEncoding.Binary1.ToString(), StringComparison.OrdinalIgnoreCase)) { colEncoding = CategoryEncoding.Binary1; } else if (colProp.Encoding.Equals(CategoryEncoding.Binary2.Description(), StringComparison.OrdinalIgnoreCase) || colProp.Encoding.Equals(CategoryEncoding.Binary2.ToString(), StringComparison.OrdinalIgnoreCase)) { colEncoding = CategoryEncoding.Binary2; } else if (colProp.Encoding.Equals(CategoryEncoding.None.Description(), StringComparison.OrdinalIgnoreCase) || colProp.Encoding.Equals(CategoryEncoding.None.ToString(), StringComparison.OrdinalIgnoreCase)) { colEncoding = CategoryEncoding.None; } else { throw new Exception($"Unknown encoding !"); } //create column data type var isOutput = colProp.Param.Equals(ParameterType.Output.Description(), StringComparison.OrdinalIgnoreCase) || colProp.Param.Equals(VariableType.Label.Description(), StringComparison.OrdinalIgnoreCase); ColumnData col = new ColumnData(isOutput, colEncoding); if (colProp.Scale == null) { col.SetNormalization(Scaling.None); } else if (colProp.Scale.Equals(Scaling.MinMax.Description(), StringComparison.OrdinalIgnoreCase)) { col.SetNormalization(Scaling.MinMax); } else if (colProp.Scale.Equals(Scaling.Gauss.Description(), StringComparison.OrdinalIgnoreCase)) { col.SetNormalization(Scaling.Gauss); } else if (colProp.Scale.Equals(Scaling.None.Description(), StringComparison.OrdinalIgnoreCase)) { col.SetNormalization(Scaling.None); } else { throw new Exception($"Unknown scaling for column '{col.Name}'"); } //set missing value action if (colProp.MissingValue.Equals(MissingValue.Ignore.Description(), StringComparison.OrdinalIgnoreCase)) { col.MissingValue = MissingValue.Ignore; } else if (colProp.MissingValue.Equals(MissingValue.Average.Description(), StringComparison.OrdinalIgnoreCase)) { col.MissingValue = MissingValue.Average; } else if (colProp.MissingValue.Equals(MissingValue.Random.Description(), StringComparison.OrdinalIgnoreCase)) { col.MissingValue = MissingValue.Random; } else if (colProp.MissingValue.Equals(MissingValue.Mode.Description(), StringComparison.OrdinalIgnoreCase)) { col.MissingValue = MissingValue.Mode; } else if (colProp.MissingValue.Equals(MissingValue.Max.Description(), StringComparison.OrdinalIgnoreCase)) { col.MissingValue = MissingValue.Max; } else if (colProp.MissingValue.Equals(MissingValue.Min.Description(), StringComparison.OrdinalIgnoreCase)) { col.MissingValue = MissingValue.Min; } else { throw new Exception($"Unknown missing value for column '{col.Name}'"); } //set column name and type col.Name = colProp.Name; col.ColumnDataType = colType; col.Encoding = colEncoding; col.Id = colProp.Id; col.Index = colProp.Index; return(col); }