public void WriteDataWithAnnotationColumnsTest() { // main data IDataWithAnnotationColumns mdata = PerseusFactory.CreateDataWithAnnotationColumns(); // annotation columns mdata.AddStringColumn("strcol1", "this is stringcol1", new[] { "1", "2" }); mdata.AddStringColumn("strcol2", "", new[] { "", "hallo" }); mdata.AddNumericColumn("numcol", "", new[] { 1.0, 2.0 }); mdata.AddMultiNumericColumn("multnumcol", "this is multnumcol", new[] { new[] { -2.0, 2.0 }, new double[] {} }); mdata.AddCategoryColumn("catcol", "", new[] { new[] { "cat1", "cat1.1" }, new[] { "cat2", "cat1" } }); string mdataStr; using (MemoryStream memstream = new MemoryStream()) using (StreamWriter writer = new StreamWriter(memstream)) { PerseusUtils.WriteDataWithAnnotationColumns(mdata, writer); writer.Flush(); mdataStr = Encoding.UTF8.GetString(memstream.ToArray()); } IMatrixData mdata3 = PerseusFactory.CreateMatrixData(); PerseusUtils.ReadMatrix(mdata3, new ProcessInfo(new Settings(), status => { }, progress => { }, 1, i => { }), () => { StreamReader tmpStream = new StreamReader(new MemoryStream(Encoding.UTF8.GetBytes(mdataStr))); return(tmpStream); }, "matrix1", '\t'); IDataWithAnnotationColumns mdata2 = mdata3; Assert.AreEqual(2, mdata2.RowCount); Assert.AreEqual(2, mdata2.StringColumnCount); Assert.AreEqual(1, mdata2.NumericColumnCount); Assert.AreEqual(1, mdata2.CategoryColumnCount); Assert.AreEqual(1, mdata2.MultiNumericColumnCount); Assert.AreEqual("hallo", mdata2.StringColumns[mdata2.StringColumnNames.FindIndex(col => col.Equals("strcol2"))][1]); }
public static void ProcessData(IDataWithAnnotationColumns mdata, IAnnotationProvider annotationProvider, Parameters para, ProcessInfo processInfo) { string[] baseIds = GetBaseIds(mdata, annotationProvider, para); bool success = ProcessDataAddAnnotation(mdata.RowCount, annotationProvider, para, baseIds, processInfo, out string[] name, out int[] catColInds, out int[] textColInds, out int[] numColInds, out string[][][] catCols, out string[][] textCols, out double[][] numCols); if (!success) { return; } for (int i = 0; i < catCols.Length; i++) { mdata.AddCategoryColumn(name[catColInds[i]], "", catCols[i]); } for (int i = 0; i < textCols.Length; i++) { mdata.AddStringColumn(name[textColInds[i]], "", textCols[i]); } for (int i = 0; i < numCols.Length; i++) { mdata.AddNumericColumn(name[numColInds[i]], "", numCols[i]); } }
private static void ProcessColMatch(IDataWithAnnotationColumns mdata, Regex regex, string replacement, int col, bool keepColumns, bool semicolons) { string[] values = new string[mdata.RowCount]; for (int row = 0; row < mdata.RowCount; row++) { string fullString = mdata.StringColumns[col][row]; string[] inputParts = semicolons ? fullString.Split(';') : new[] { fullString }; values[row] = RegexMatchOrReplace(inputParts[0]); for (int i = 1; i < inputParts.Length; i++) { values[row] += ";" + RegexMatchOrReplace(inputParts[i]); } } if (keepColumns) { mdata.AddStringColumn(mdata.StringColumnNames[col], null, values); } else { mdata.StringColumns[col] = values; } string RegexMatchOrReplace(string input) { if (string.IsNullOrEmpty(replacement)) { return(regex.Match(input).Groups[1].ToString()); } return(regex.Replace(input, replacement)); } }
private static void AddStringColumns(IDataWithAnnotationColumns mdata1, IDataWithAnnotationColumns mdata2, Parameters parameters, IList <int[]> indexMap, IDataWithAnnotationColumns result) { int[] stringCols = parameters.GetParam <int[]>("Text columns").Value; string[][] newStringColumns = new string[stringCols.Length][]; string[] newStringColNames = new string[stringCols.Length]; for (int i = 0; i < stringCols.Length; i++) { string[] oldCol = mdata2.StringColumns[stringCols[i]]; newStringColNames[i] = mdata2.StringColumnNames[stringCols[i]]; newStringColumns[i] = new string[mdata1.RowCount]; for (int j = 0; j < mdata1.RowCount; j++) { int[] inds = indexMap[j]; List <string> values = new List <string>(); foreach (int ind in inds) { string v = oldCol[ind]; if (v.Length > 0) { values.Add(v); } } newStringColumns[i][j] = values.Count == 0 ? "" : StringUtils.Concat(";", values.ToArray()); } } for (int i = 0; i < stringCols.Length; i++) { result.AddStringColumn(newStringColNames[i], "", newStringColumns[i]); } }
private static void ProcessCol(IDataWithAnnotationColumns mdata, Regex regex, int col, bool keepColumns, bool semicolons) { string[] values = new string[mdata.RowCount]; for (int row = 0; row < mdata.RowCount; row++){ string fullString = mdata.StringColumns[col][row]; string[] inputParts = semicolons ? fullString.Split(';') : new[]{fullString}; values[row] = regex.Match(inputParts[0]).Groups[1].ToString(); for (int i = 1; i < inputParts.Length; i++){ values[row] += ";" + regex.Match(inputParts[i]).Groups[1]; } } if (keepColumns){ mdata.AddStringColumn(mdata.StringColumnNames[col], null, values); } else{ mdata.StringColumns[col] = values; } }
private static void ProcessCol(IDataWithAnnotationColumns mdata, Regex regex, int col, bool keepColumns, bool semicolons) { string[] values = new string[mdata.RowCount]; for (int row = 0; row < mdata.RowCount; row++) { string fullString = mdata.StringColumns[col][row]; string[] inputParts = semicolons ? fullString.Split(';') : new[] { fullString }; values[row] = regex.Match(inputParts[0]).Groups[1].ToString(); for (int i = 1; i < inputParts.Length; i++) { values[row] += ";" + regex.Match(inputParts[i]).Groups[1]; } } if (keepColumns) { mdata.AddStringColumn(mdata.StringColumnNames[col], null, values); } else { mdata.StringColumns[col] = values; } }