private static void ProcessColMatch(IDataWithAnnotationColumns mdata, Regex regex, string replacement, int col, bool keepColumns, bool semicolons) { string[] values = new string[mdata.RowCount]; for (int row = 0; row < mdata.RowCount; row++) { string fullString = mdata.StringColumns[col][row]; string[] inputParts = semicolons ? fullString.Split(';') : new[] { fullString }; values[row] = RegexMatchOrReplace(inputParts[0]); for (int i = 1; i < inputParts.Length; i++) { values[row] += ";" + RegexMatchOrReplace(inputParts[i]); } } if (keepColumns) { mdata.AddStringColumn(mdata.StringColumnNames[col], null, values); } else { mdata.StringColumns[col] = values; } string RegexMatchOrReplace(string input) { if (string.IsNullOrEmpty(replacement)) { return(regex.Match(input).Groups[1].ToString()); } return(regex.Replace(input, replacement)); } }
public void WriteDataWithAnnotationColumnsTest() { // main data IDataWithAnnotationColumns mdata = PerseusFactory.CreateDataWithAnnotationColumns(); // annotation columns mdata.AddStringColumn("strcol1", "this is stringcol1", new[] { "1", "2" }); mdata.AddStringColumn("strcol2", "", new[] { "", "hallo" }); mdata.AddNumericColumn("numcol", "", new[] { 1.0, 2.0 }); mdata.AddMultiNumericColumn("multnumcol", "this is multnumcol", new[] { new[] { -2.0, 2.0 }, new double[] {} }); mdata.AddCategoryColumn("catcol", "", new[] { new[] { "cat1", "cat1.1" }, new[] { "cat2", "cat1" } }); string mdataStr; using (MemoryStream memstream = new MemoryStream()) using (StreamWriter writer = new StreamWriter(memstream)) { PerseusUtils.WriteDataWithAnnotationColumns(mdata, writer); writer.Flush(); mdataStr = Encoding.UTF8.GetString(memstream.ToArray()); } IMatrixData mdata3 = PerseusFactory.CreateMatrixData(); PerseusUtils.ReadMatrix(mdata3, new ProcessInfo(new Settings(), status => { }, progress => { }, 1, i => { }), () => { StreamReader tmpStream = new StreamReader(new MemoryStream(Encoding.UTF8.GetBytes(mdataStr))); return(tmpStream); }, "matrix1", '\t'); IDataWithAnnotationColumns mdata2 = mdata3; Assert.AreEqual(2, mdata2.RowCount); Assert.AreEqual(2, mdata2.StringColumnCount); Assert.AreEqual(1, mdata2.NumericColumnCount); Assert.AreEqual(1, mdata2.CategoryColumnCount); Assert.AreEqual(1, mdata2.MultiNumericColumnCount); Assert.AreEqual("hallo", mdata2.StringColumns[mdata2.StringColumnNames.FindIndex(col => col.Equals("strcol2"))][1]); }
private static void AddStringColumns(IDataWithAnnotationColumns mdata1, IDataWithAnnotationColumns mdata2, Parameters parameters, IList <int[]> indexMap, IDataWithAnnotationColumns result) { int[] stringCols = parameters.GetParam <int[]>("Text columns").Value; string[][] newStringColumns = new string[stringCols.Length][]; string[] newStringColNames = new string[stringCols.Length]; for (int i = 0; i < stringCols.Length; i++) { string[] oldCol = mdata2.StringColumns[stringCols[i]]; newStringColNames[i] = mdata2.StringColumnNames[stringCols[i]]; newStringColumns[i] = new string[mdata1.RowCount]; for (int j = 0; j < mdata1.RowCount; j++) { int[] inds = indexMap[j]; List <string> values = new List <string>(); foreach (int ind in inds) { string v = oldCol[ind]; if (v.Length > 0) { values.Add(v); } } newStringColumns[i][j] = values.Count == 0 ? "" : StringUtils.Concat(";", values.ToArray()); } } for (int i = 0; i < stringCols.Length; i++) { result.AddStringColumn(newStringColNames[i], "", newStringColumns[i]); } }
public static void ProcessData(IDataWithAnnotationColumns mdata, IAnnotationProvider annotationProvider, Parameters para, ProcessInfo processInfo) { string[] baseIds = GetBaseIds(mdata, annotationProvider, para); bool success = ProcessDataAddAnnotation(mdata.RowCount, annotationProvider, para, baseIds, processInfo, out string[] name, out int[] catColInds, out int[] textColInds, out int[] numColInds, out string[][][] catCols, out string[][] textCols, out double[][] numCols); if (!success) { return; } for (int i = 0; i < catCols.Length; i++) { mdata.AddCategoryColumn(name[catColInds[i]], "", catCols[i]); } for (int i = 0; i < textCols.Length; i++) { mdata.AddStringColumn(name[textColInds[i]], "", textCols[i]); } for (int i = 0; i < numCols.Length; i++) { mdata.AddNumericColumn(name[numColInds[i]], "", numCols[i]); } }
public static void CombineRows(this IDataWithAnnotationColumns mdata, List <int> rowIdxs, Func <double[], double> combineNumeric, Func <string[], string> combineString, Func <string[][], string[]> combineCategory, Func <double[][], double[]> combineMultiNumeric) { if (!rowIdxs.Any()) { return; } var resultRow = rowIdxs[0]; for (int i = 0; i < mdata.NumericColumnCount; i++) { var column = mdata.NumericColumns[i]; var values = ArrayUtils.SubArray(column, rowIdxs); column[resultRow] = combineNumeric(values); } for (int i = 0; i < mdata.StringColumnCount; i++) { var column = mdata.StringColumns[i]; var values = ArrayUtils.SubArray(column, rowIdxs); column[resultRow] = combineString(values); } for (int i = 0; i < mdata.CategoryColumnCount; i++) { var column = mdata.GetCategoryColumnAt(i); var values = ArrayUtils.SubArray(column, rowIdxs); column[resultRow] = combineCategory(values); mdata.SetCategoryColumnAt(column, i); } for (int i = 0; i < mdata.MultiNumericColumnCount; i++) { var column = mdata.MultiNumericColumns[i]; var values = ArrayUtils.SubArray(column, rowIdxs); column[resultRow] = combineMultiNumeric(values); } }
private static void AddCategoricalColumns(IDataWithAnnotationColumns mdata1, IDataWithAnnotationColumns mdata2, Parameters parameters, IList <int[]> indexMap, IDataWithAnnotationColumns result) { int[] catCols = parameters.GetParam <int[]>("Categorical columns").Value; string[][][] newCatColumns = new string[catCols.Length][][]; string[] newCatColNames = new string[catCols.Length]; for (int i = 0; i < catCols.Length; i++) { string[][] oldCol = mdata2.GetCategoryColumnAt(catCols[i]); newCatColNames[i] = mdata2.CategoryColumnNames[catCols[i]]; newCatColumns[i] = new string[mdata1.RowCount][]; for (int j = 0; j < mdata1.RowCount; j++) { int[] inds = indexMap[j]; List <string[]> values = new List <string[]>(); foreach (int ind in inds) { string[] v = oldCol[ind]; if (v.Length > 0) { values.Add(v); } } newCatColumns[i][j] = values.Count == 0 ? new string[0] : ArrayUtils.UniqueValues(ArrayUtils.Concat(values.ToArray())); } } for (int i = 0; i < catCols.Length; i++) { result.AddCategoryColumn(newCatColNames[i], "", newCatColumns[i]); } }
private static void ReadMatrixDataInto(IDataWithAnnotationColumns data, string file, ProcessInfo processInfo) { var mdata = PerseusFactory.CreateMatrixData(); PerseusUtils.ReadMatrixFromFile(mdata, processInfo, file, '\t'); data.CopyAnnotationColumnsFrom(mdata); }
private static int[][] GetIndexMap(IDataWithAnnotationColumns mdata1, IDataWithAnnotationColumns mdata2, Parameters parameters, string separator) { ParameterWithSubParams <bool> p = parameters.GetParamWithSubParams <bool>("Use additional column pair"); bool addtlCol = p.Value; Dictionary <string, List <int> > idToCols2 = addtlCol ? GetIdToColsPair(mdata2, parameters, p.GetSubParameters(), separator) : GetIdToColsSingle(mdata2, parameters); string[][] matchCol1 = addtlCol ? GetColumnPair(mdata1, parameters, p.GetSubParameters(), separator) : GetColumnSplitBySemicolon(mdata1, parameters, "Matching column in matrix 1"); int[][] indexMap = new int[matchCol1.Length][]; for (int i = 0; i < matchCol1.Length; i++) { List <int> q = new List <int>(); foreach (string s in matchCol1[i]) { if (idToCols2.ContainsKey(s)) { q.AddRange(idToCols2[s]); } } indexMap[i] = ArrayUtils.UniqueValues(q.ToArray()); } return(indexMap); }
private static void AddIndicator(IDataWithAnnotationColumns result, IData mdata2, int[][] indexMap) { string[][] indicatorCol = new string[indexMap.Length][]; for (int i = 0; i < indexMap.Length; i++) { indicatorCol[i] = indexMap[i].Length > 0 ? new[] { "+" } : new string[0]; } result.AddCategoryColumn(mdata2.Name, "", indicatorCol); }
private static List <string[][]> GetCategoryColumns(IDataWithAnnotationColumns mdata) { List <string[][]> result = new List <string[][]>(); for (int i = 0; i < mdata.CategoryColumnCount; i++) { result.Add(mdata.GetCategoryColumnAt(i)); } return(result); }
/// <summary> /// Get the string column with the identifiers used in the mapping. /// </summary> /// <param name="mdata"></param> /// <param name="annotationProvider"></param> /// <param name="para"></param> /// <returns></returns> private static string[] GetBaseIds(IDataWithAnnotationColumns mdata, IAnnotationProvider annotationProvider, Parameters para) { ParameterWithSubParams <int> spd = para.GetParamWithSubParams <int>("Source"); int ind = spd.Value; Parameters param = spd.GetSubParameters(); int baseCol = param.GetParam <int>(annotationProvider.Sources[ind].id + " column").Value; string[] baseIds = mdata.StringColumns[baseCol]; return(baseIds); }
private static string[] GetBaseIds(Parameters para, IDataWithAnnotationColumns mdata) { PerseusUtils.GetAvailableAnnots(out string[] baseNames, out AnnotType[][] types, out string[] files); ParameterWithSubParams <int> spd = para.GetParamWithSubParams <int>("Source"); int ind = spd.Value; Parameters param = spd.GetSubParameters(); int baseCol = param.GetParam <int>(baseNames[ind] + " column").Value; string[] baseIds = mdata.StringColumns[baseCol]; return(baseIds); }
private static string[][] GetColumnPair(IDataWithAnnotationColumns mdata1, Parameters parameters, Parameters subPar, string separator) { string[][] matchCol = GetColumnSplitBySemicolon(mdata1, parameters, "Matching column in matrix 1"); string[][] matchColAddtl = GetColumnSplitBySemicolon(mdata1, subPar, "Additional column in matrix 1"); string[][] result = new string[matchCol.Length][]; for (int i = 0; i < result.Length; i++) { result[i] = Combine(matchCol[i], matchColAddtl[i], separator); } return(result); }
/// <summary> /// Creates a default implementation of <see cref="INetworkInfo"/> from the given graph /// and node/edge tables and indices. /// </summary> public static INetworkInfo CreateNetworkInfo(IGraph graph, IDataWithAnnotationColumns nodeTable, Dictionary <INode, int> nodeIndex, IDataWithAnnotationColumns edgeTable, Dictionary <IEdge, int> edgeIndex, string name, Guid guid) { var networkInfoTypeName = Assembly.CreateQualifiedName("PerseusLibS", "PerseusLibS.Data.Network.NetworkInfo"); var type = Type.GetType(networkInfoTypeName); if (type == null) { throw new Exception($"Cannot load type {networkInfoTypeName}."); } return((INetworkInfo)Activator.CreateInstance(type, graph, nodeTable, nodeIndex, edgeTable, edgeIndex, name, guid)); }
public void UniqueValuesTest() { Mock <IDataWithAnnotationColumns> moq = new Moq.Mock <IDataWithAnnotationColumns>(); List <string[]> testList = new List <string[]> { new[] { "a;b", "a;a" } }; moq.Setup(data => data.StringColumns).Returns(testList); IDataWithAnnotationColumns asdf = moq.Object; asdf.UniqueValues(new[] { 0 }); CollectionAssert.AreEqual(new [] { "a;b", "a" }, testList[0]); }
private static string[][] GetColumnSplitBySemicolon(IDataWithAnnotationColumns mdata, Parameters parameters, string colName) { string[] matchingColumn2 = mdata.StringColumns[parameters.GetParam <int>(colName).Value]; string[][] w = new string[matchingColumn2.Length][]; for (int i = 0; i < matchingColumn2.Length; i++) { string r = matchingColumn2[i].Trim(); w[i] = r.Length == 0 ? new string[0] : r.Split(';'); w[i] = ArrayUtils.UniqueValues(w[i]); } return(w); }
private static void AddMainColumns(IDataWithAnnotationColumns mdata1, IMatrixData mdata2, Parameters parameters, IList <int[]> indexMap, IMatrixData result) { Func <double[], double> avExpression = GetAveraging(parameters.GetParam <int>("Combine main values").Value); int[] exColInds = parameters.GetParam <int[]>("Main columns").Value; if (exColInds.Length > 0) { double[,] newExColumns = new double[mdata1.RowCount, exColInds.Length]; double[,] newQuality = new double[mdata1.RowCount, exColInds.Length]; bool[,] newIsImputed = new bool[mdata1.RowCount, exColInds.Length]; string[] newExColNames = new string[exColInds.Length]; for (int i = 0; i < exColInds.Length; i++) { newExColNames[i] = mdata2.ColumnNames[exColInds[i]]; for (int j = 0; j < mdata1.RowCount; j++) { int[] inds = indexMap[j]; List <double> values = new List <double>(); List <double> qual = new List <double>(); List <bool> imp = new List <bool>(); foreach (int ind in inds) { double v = mdata2.Values.Get(ind, exColInds[i]); if (!double.IsNaN(v) && !double.IsInfinity(v)) { values.Add(v); if (mdata2.Quality.IsInitialized()) { double qx = mdata2.Quality.Get(ind, exColInds[i]); if (!double.IsNaN(qx) && !double.IsInfinity(qx)) { qual.Add(qx); } } if (mdata2.IsImputed != null) { bool isi = mdata2.IsImputed[ind, exColInds[i]]; imp.Add(isi); } } } newExColumns[j, i] = values.Count == 0 ? double.NaN : avExpression(values.ToArray()); newQuality[j, i] = qual.Count == 0 ? double.NaN : avExpression(qual.ToArray()); newIsImputed[j, i] = imp.Count != 0 && AvImp(imp.ToArray()); } } MakeNewNames(newExColNames, result.ColumnNames); AddMainColumns(result, newExColNames, newExColumns, newQuality, newIsImputed); } }
private static HashSet <string> GetAllIds(IDataWithAnnotationColumns mdata, int baseCol) { string[] x = mdata.StringColumns[baseCol]; HashSet <string> result = new HashSet <string>(); foreach (string y in x) { string[] z = y.Length > 0 ? y.Split(';') : new string[0]; foreach (string q in z) { result.Add(q.ToLower()); } } return(result); }
public static void UniqueValues(this IDataWithAnnotationColumns mdata, int[] stringCols) { foreach (string[] col in stringCols.Select(stringCol => mdata.StringColumns[stringCol])) { for (int i = 0; i < col.Length; i++) { string q = col[i]; if (q.Length == 0) { continue; } string[] w = q.Split(';'); w = ArrayUtils.UniqueValues(w); col[i] = StringUtils.Concat(";", w); } } }
private static void ProcessCol(IDataWithAnnotationColumns mdata, Regex regex, int col, bool keepColumns, bool semicolons) { string[] values = new string[mdata.RowCount]; for (int row = 0; row < mdata.RowCount; row++){ string fullString = mdata.StringColumns[col][row]; string[] inputParts = semicolons ? fullString.Split(';') : new[]{fullString}; values[row] = regex.Match(inputParts[0]).Groups[1].ToString(); for (int i = 1; i < inputParts.Length; i++){ values[row] += ";" + regex.Match(inputParts[i]).Groups[1]; } } if (keepColumns){ mdata.AddStringColumn(mdata.StringColumnNames[col], null, values); } else{ mdata.StringColumns[col] = values; } }
/// <summary> /// Add a number of empty rows to the table /// </summary> public static void AddEmptyRows(this IDataWithAnnotationColumns mdata, int length) { for (int i = 0; i < mdata.StringColumnCount; i++) { mdata.StringColumns[i] = mdata.StringColumns[i].Concat(Enumerable.Repeat(String.Empty, length)).ToArray(); } for (int i = 0; i < mdata.NumericColumnCount; i++) { mdata.NumericColumns[i] = mdata.NumericColumns[i].Concat(Enumerable.Repeat(Double.NaN, length)).ToArray(); } for (int i = 0; i < mdata.MultiNumericColumnCount; i++) { mdata.MultiNumericColumns[i] = mdata.MultiNumericColumns[i].Concat(Enumerable.Range(0, length).Select(_ => new double[0])).ToArray(); } for (int i = 0; i < mdata.CategoryColumnCount; i++) { mdata.SetCategoryColumnAt(mdata.GetCategoryColumnAt(i).Concat(Enumerable.Repeat(new string[0], length)).ToArray(), i); } }
private static Dictionary <string, List <int> > GetIdToColsSingle(IDataWithAnnotationColumns mdata2, Parameters parameters) { string[][] matchCol2 = GetColumnSplitBySemicolon(mdata2, parameters, "Matching column in matrix 2"); Dictionary <string, List <int> > idToCols2 = new Dictionary <string, List <int> >(); for (int i = 0; i < matchCol2.Length; i++) { foreach (string s in matchCol2[i]) { if (!idToCols2.ContainsKey(s)) { idToCols2.Add(s, new List <int>()); } idToCols2[s].Add(i); } } return(idToCols2); }
private static void ProcessCol(IDataWithAnnotationColumns mdata, Regex regex, int col, bool keepColumns, bool semicolons) { string[] values = new string[mdata.RowCount]; for (int row = 0; row < mdata.RowCount; row++) { string fullString = mdata.StringColumns[col][row]; string[] inputParts = semicolons ? fullString.Split(';') : new[] { fullString }; values[row] = regex.Match(inputParts[0]).Groups[1].ToString(); for (int i = 1; i < inputParts.Length; i++) { values[row] += ";" + regex.Match(inputParts[i]).Groups[1]; } } if (keepColumns) { mdata.AddStringColumn(mdata.StringColumnNames[col], null, values); } else { mdata.StringColumns[col] = values; } }
private static Dictionary <string, List <int> > GetIdToColsPair(IDataWithAnnotationColumns mdata2, Parameters parameters, Parameters subPar, string separator) { string[][] matchCol = GetColumnSplitBySemicolon(mdata2, parameters, "Matching column in matrix 2"); string[][] matchColAddtl = GetColumnSplitBySemicolon(mdata2, subPar, "Additional column in matrix 2"); Dictionary <string, List <int> > idToCols2 = new Dictionary <string, List <int> >(); for (int i = 0; i < matchCol.Length; i++) { foreach (string s1 in matchCol[i]) { foreach (string s2 in matchColAddtl[i]) { string id = s1 + separator + s2; if (!idToCols2.ContainsKey(id)) { idToCols2.Add(id, new List <int>()); } idToCols2[id].Add(i); } } } return(idToCols2); }
private static void AddNumericColumns(IDataWithAnnotationColumns mdata1, IDataWithAnnotationColumns mdata2, Parameters parameters, IList <int[]> indexMap, IDataWithAnnotationColumns result) { Func <double[], double> avNumerical = GetAveraging(parameters.GetParam <int>("Combine numerical values").Value); int[] numCols = parameters.GetParam <int[]>("Numerical columns").Value; if (avNumerical != null) { double[][] newNumericalColumns = new double[numCols.Length][]; string[] newNumColNames = new string[numCols.Length]; for (int i = 0; i < numCols.Length; i++) { double[] oldCol = mdata2.NumericColumns[numCols[i]]; newNumColNames[i] = mdata2.NumericColumnNames[numCols[i]]; newNumericalColumns[i] = new double[mdata1.RowCount]; for (int j = 0; j < mdata1.RowCount; j++) { int[] inds = indexMap[j]; List <double> values = new List <double>(); foreach (int ind in inds) { double v = oldCol[ind]; if (!double.IsNaN(v)) { values.Add(v); } } newNumericalColumns[i][j] = values.Count == 0 ? double.NaN : avNumerical(values.ToArray()); } } for (int i = 0; i < numCols.Length; i++) { result.AddNumericColumn(newNumColNames[i], "", newNumericalColumns[i]); } } else { double[][][] newMultiNumericalColumns = new double[numCols.Length][][]; string[] newMultiNumColNames = new string[numCols.Length]; for (int i = 0; i < numCols.Length; i++) { double[] oldCol = mdata2.NumericColumns[numCols[i]]; newMultiNumColNames[i] = mdata2.NumericColumnNames[numCols[i]]; newMultiNumericalColumns[i] = new double[mdata1.RowCount][]; for (int j = 0; j < mdata1.RowCount; j++) { int[] inds = indexMap[j]; List <double> values = new List <double>(); foreach (int ind in inds) { double v = oldCol[ind]; if (!double.IsNaN(v)) { values.Add(v); } } newMultiNumericalColumns[i][j] = values.ToArray(); } } for (int i = 0; i < numCols.Length; i++) { result.AddMultiNumericColumn(newMultiNumColNames[i], "", newMultiNumericalColumns[i]); } } }
public RowNameInfo(IDataWithAnnotationColumns mdata) { this.mdata = mdata; }
private static List<string[][]> GetCategoryColumns(IDataWithAnnotationColumns mdata) { List<string[][]> result = new List<string[][]>(); for (int i = 0; i < mdata.CategoryColumnCount; i++){ result.Add(mdata.GetCategoryColumnAt(i)); } return result; }
/// <summary> /// Unsafe shortcut for obtaining a string column. /// </summary> private static string[] GetStringColumn(this IDataWithAnnotationColumns data, string colname) { return(data.StringColumns[data.StringColumnNames.FindIndex(col => col.ToLower().Equals(colname.ToLower()))]); }
public static void AddAnnotationColumns(IDataWithAnnotationColumns result, IDataWithAnnotationColumns mdata2, int[][] indexMap, int[] copyTextColumns, (int[] copy, int combine) numeric, int[] copyCatColumns)
string[] GetColumn(IDataWithAnnotationColumns data, int index) => index < data.StringColumnCount ? data.StringColumns[index] : data.NumericColumns[index - data.StringColumnCount].Select(Convert.ToString).ToArray();