public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo) { int[] cols = param.GetMultiChoiceParam("Columns").Value; HashSet<int> w = ArrayUtils.ToHashSet(param.GetMultiChoiceParam("Calculate").Value); bool[] include = new bool[SummaryStatisticsRows.procs.Length]; double[][] rowws = new double[SummaryStatisticsRows.procs.Length][]; for (int i = 0; i < include.Length; i++){ include[i] = w.Contains(i); if (include[i]){ rowws[i] = new double[cols.Length]; } } for (int i = 0; i < cols.Length; i++){ double[] vals = GetColumn(cols[i], mdata); for (int j = 0; j < include.Length; j++){ if (include[j]){ rowws[j][i] = SummaryStatisticsRows.procs[j].Item2(vals); } } } List<double[]> ex = new List<double[]>(); List<string> names = new List<string>(); for (int i = 0; i < include.Length; i++){ if (include[i]){ ex.Add(rowws[i]); names.Add(SummaryStatisticsRows.procs[i].Item1); } } float[,] exVals = GetExVals(ex); string[] colNames = GetColNames(mdata, cols); mdata.SetData("Summary", new List<string>(names.ToArray()), exVals, new List<string>(new[]{"Columns"}), new List<string[]>(new[]{colNames}), mdata.CategoryRowNames, TransformCategories(mdata, cols, mdata.ExpressionColumnCount), mdata.NumericRowNames, TransformNumeric(mdata.NumericRows, cols, mdata.ExpressionColumnCount), new List<string>(), new List<double[][]>()); }
public void ProcessData(IMatrixData data, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo) { bool falseAreIndicated = param.GetSingleChoiceParam("Indicated are").Value == 0; int catCol = param.GetSingleChoiceParam("In column").Value; string word = param.GetStringParam("Indicator").Value; int[] scoreColumns = param.GetMultiChoiceParam("Scores").Value; if (scoreColumns.Length == 0){ processInfo.ErrString = "Please specify at least one column with scores."; return; } bool largeIsGood = param.GetBoolParam("Large values are good").Value; int[] showColumns = param.GetMultiChoiceParam("Display quantity").Value; if (showColumns.Length == 0){ processInfo.ErrString = "Please select at least one quantity to display"; return; } bool[] indCol = GetIndicatorColumn(falseAreIndicated, catCol, word, data); List<string> expColNames = new List<string>(); List<float[]> expCols = new List<float[]>(); foreach (int scoreColumn in scoreColumns){ double[] vals = scoreColumn < data.NumericColumnCount ? data.NumericColumns[scoreColumn] : ArrayUtils.ToDoubles(data.GetExpressionColumn(scoreColumn - data.NumericColumnCount)); string name = scoreColumn < data.NumericColumnCount ? data.NumericColumnNames[scoreColumn] : data.ExpressionColumnNames[scoreColumn - data.NumericColumnCount]; int[] order = GetOrder(vals, largeIsGood); CalcCurve(ArrayUtils.SubArray(indCol, order), showColumns, name, expCols, expColNames); } float[,] expData = ToMatrix(expCols); data.SetData(data.Name, expColNames, expData, new List<string>(), new List<string[]>(), new List<string>(), new List<string[][]>(), new List<string>(), new List<double[]>(), new List<string>(), new List<double[][]>()); }
public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ProcessInfo processInfo) { int nameCol = param.GetSingleChoiceParam("New column names").Value; float[,] x = ArrayUtils.Transpose(mdata.ExpressionValues); List<string> colNames = new List<string>(mdata.StringColumns[nameCol]); List<string> rowNames = mdata.ExpressionColumnNames; mdata.SetData(mdata.Name, colNames, x, new List<string>(new[]{"Name"}), new List<string[]>(new[]{rowNames.ToArray()}), new List<string>(), new List<string[][]>(), new List<string>(), new List<double[]>(), new List<string>(), new List<double[][]>()); }
public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo) { int nameCol = param.GetSingleChoiceParam("New column names").Value; List<string> colNames; if (nameCol >= 0){ HashSet<string> taken = new HashSet<string>(); colNames = new List<string>(); foreach (string n in mdata.StringColumns[nameCol]){ string n1 = GetNextAvailableName(n, taken); taken.Add(n1); colNames.Add(n1); } } else{ colNames = new List<string>(); for (int i = 0; i < mdata.RowCount; i++){ colNames.Add("Column" + (i + 1)); } } List<string> rowNames = mdata.ExpressionColumnNames; mdata.SetData(mdata.Name, mdata.Description, colNames, colNames, ArrayUtils.Transpose(mdata.ExpressionValues), ArrayUtils.Transpose(mdata.IsImputed), ArrayUtils.Transpose(mdata.QualityValues), mdata.QualityName, mdata.QualityBiggerIsBetter, new List<string>(new[]{"Name"}), new List<string>(new[]{"Name"}), new List<string[]>(new[]{rowNames.ToArray()}), mdata.CategoryRowNames, mdata.CategoryRowDescriptions, GetCategoryRows(mdata), mdata.NumericRowNames, mdata.NumericRowDescriptions, mdata.NumericRows, new List<string>(), new List<string>(), new List<double[][]>(), mdata.CategoryColumnNames, mdata.CategoryColumnDescriptions, GetCategoryColumns(mdata), mdata.NumericColumnNames, mdata.NumericColumnDescriptions, mdata.NumericColumns); }
private static void LoadData(IList<string> colNames, IList<string> colDescriptions, IList<int> expressionColIndices, IList<int> catColIndices, IList<int> numColIndices, IList<int> textColIndices, IList<int> multiNumColIndices, string filename, IMatrixData matrixData, IDictionary<string, string[]> annotationRows, Action<int> progress, Action<string> status) { Dictionary<string, string[]> catAnnotatRows; Dictionary<string, string[]> numAnnotatRows; status("Reading data"); SplitAnnotRows(annotationRows, out catAnnotatRows, out numAnnotatRows); int nrows = TabSep.GetRowCount(filename, 0, commentPrefix, commentPrefixExceptions); float[,] expressionValues = new float[nrows,expressionColIndices.Count]; List<string[][]> categoryAnnotation = new List<string[][]>(); foreach (int t in catColIndices){ categoryAnnotation.Add(new string[nrows][]); } List<double[]> numericAnnotation = new List<double[]>(); foreach (int t in numColIndices){ numericAnnotation.Add(new double[nrows]); } List<double[][]> multiNumericAnnotation = new List<double[][]>(); foreach (int t in multiNumColIndices){ multiNumericAnnotation.Add(new double[nrows][]); } List<string[]> stringAnnotation = new List<string[]>(); foreach (int t in textColIndices){ stringAnnotation.Add(new string[nrows]); } StreamReader reader = new StreamReader(filename); reader.ReadLine(); int count = 0; string line; while ((line = reader.ReadLine()) != null){ progress((100*(count + 1))/nrows); if (TabSep.IsCommentLine(line, commentPrefix, commentPrefixExceptions)){ continue; } string[] w = line.Split('\t'); for (int i = 0; i < expressionColIndices.Count; i++){ if (expressionColIndices[i] >= w.Length){ expressionValues[count, i] = float.NaN; } else{ string s = StringUtils.RemoveWhitespace(w[expressionColIndices[i]]); bool success = float.TryParse(s, out expressionValues[count, i]); if (!success){ expressionValues[count, i] = float.NaN; } } } for (int i = 0; i < multiNumColIndices.Count; i++){ if (multiNumColIndices[i] >= w.Length){ multiNumericAnnotation[i][count] = new double[0]; } else{ string q = w[multiNumColIndices[i]].Trim(); if (q.Length >= 2 && q[0] == '\"' && q[q.Length - 1] == '\"'){ q = q.Substring(1, q.Length - 2); } if (q.Length >= 2 && q[0] == '\'' && q[q.Length - 1] == '\''){ q = q.Substring(1, q.Length - 2); } string[] ww = q.Length == 0 ? new string[0] : q.Split(';'); multiNumericAnnotation[i][count] = new double[ww.Length]; for (int j = 0; j < ww.Length; j++){ double q1; bool success = double.TryParse(ww[j], out q1); multiNumericAnnotation[i][count][j] = success ? q1 : double.NaN; } } } for (int i = 0; i < catColIndices.Count; i++){ if (catColIndices[i] >= w.Length){ categoryAnnotation[i][count] = new string[0]; } else{ string q = w[catColIndices[i]].Trim(); if (q.Length >= 2 && q[0] == '\"' && q[q.Length - 1] == '\"'){ q = q.Substring(1, q.Length - 2); } if (q.Length >= 2 && q[0] == '\'' && q[q.Length - 1] == '\''){ q = q.Substring(1, q.Length - 2); } string[] ww = q.Length == 0 ? new string[0] : q.Split(';'); Array.Sort(ww); categoryAnnotation[i][count] = ww; } } for (int i = 0; i < numColIndices.Count; i++){ if (numColIndices[i] >= w.Length){ numericAnnotation[i][count] = double.NaN; } else{ double q; bool success = double.TryParse(w[numColIndices[i]].Trim(), out q); numericAnnotation[i][count] = success ? q : double.NaN; } } for (int i = 0; i < textColIndices.Count; i++){ if (textColIndices[i] >= w.Length){ stringAnnotation[i][count] = ""; } else{ string q = w[textColIndices[i]].Trim(); stringAnnotation[i][count] = RemoveSplitWhitespace(RemoveQuotes(q)); } } count++; } reader.Close(); string[] columnNames = ArrayUtils.SubArray(colNames, expressionColIndices); string[] catColnames = ArrayUtils.SubArray(colNames, catColIndices); string[] numColnames = ArrayUtils.SubArray(colNames, numColIndices); string[] multiNumColnames = ArrayUtils.SubArray(colNames, multiNumColIndices); string[] textColnames = ArrayUtils.SubArray(colNames, textColIndices); matrixData.SetData(filename, RemoveQuotes(columnNames), expressionValues, RemoveQuotes(textColnames), stringAnnotation, RemoveQuotes(catColnames), categoryAnnotation, RemoveQuotes(numColnames), numericAnnotation, RemoveQuotes(multiNumColnames), multiNumericAnnotation); if (colDescriptions != null){ string[] columnDesc = ArrayUtils.SubArray(colDescriptions, expressionColIndices); string[] catColDesc = ArrayUtils.SubArray(colDescriptions, catColIndices); string[] numColDesc = ArrayUtils.SubArray(colDescriptions, numColIndices); string[] multiNumColDesc = ArrayUtils.SubArray(colDescriptions, multiNumColIndices); string[] textColDesc = ArrayUtils.SubArray(colDescriptions, textColIndices); matrixData.ExpressionColumnDescriptions = new List<string>(columnDesc); matrixData.NumericColumnDescriptions = new List<string>(numColDesc); matrixData.CategoryColumnDescriptions = new List<string>(catColDesc); matrixData.StringColumnDescriptions = new List<string>(textColDesc); matrixData.MultiNumericColumnDescriptions = new List<string>(multiNumColDesc); } foreach (string key in ArrayUtils.GetKeys(catAnnotatRows)){ string name = key; string[] svals = ArrayUtils.SubArray(catAnnotatRows[key], expressionColIndices); string[][] cat = new string[svals.Length][]; for (int i = 0; i < cat.Length; i++){ string s = svals[i].Trim(); cat[i] = s.Length > 0 ? s.Split(';') : new string[0]; } matrixData.AddCategoryRow(name, name, cat); } foreach (string key in ArrayUtils.GetKeys(numAnnotatRows)){ string name = key; string[] svals = ArrayUtils.SubArray(numAnnotatRows[key], expressionColIndices); double[] num = new double[svals.Length]; for (int i = 0; i < num.Length; i++){ string s = svals[i].Trim(); num[i] = double.NaN; double.TryParse(s, out num[i]); } matrixData.AddNumericRow(name, name, num); } matrixData.Origin = filename; status(""); }
public void ProcessData(IMatrixData mdata, Parameters param1, ref IMatrixData[] supplTables, ProcessInfo processInfo) { int[] multiNumCols = param1.GetMultiChoiceParam("Multi-numeric columns").Value; Array.Sort(multiNumCols); int[] stringCols = param1.GetMultiChoiceParam("String columns").Value; Array.Sort(stringCols); HashSet<int> multinumCols2 = new HashSet<int>(multiNumCols); HashSet<int> stringCols2 = new HashSet<int>(stringCols); if (multiNumCols.Length + stringCols.Length == 0){ processInfo.ErrString = "Please select some columns."; return; } int rowCount = GetNewRowCount(mdata, multiNumCols, stringCols); float[,] expVals = new float[rowCount,mdata.ExpressionColumnCount]; List<string[]> stringC = new List<string[]>(); for (int i = 0; i < mdata.StringColumnCount; i++){ stringC.Add(new string[rowCount]); } List<double[]> numC = new List<double[]>(); for (int i = 0; i < mdata.NumericColumnCount; i++){ numC.Add(new double[rowCount]); } List<string[][]> catC = new List<string[][]>(); for (int i = 0; i < mdata.CategoryColumnCount; i++){ catC.Add(new string[rowCount][]); } List<double[][]> multiNumC = new List<double[][]>(); for (int i = 0; i < mdata.MultiNumericColumnCount; i++){ multiNumC.Add(new double[rowCount][]); } int count = 0; for (int i = 0; i < mdata.RowCount; i++){ string err; int entryCount = GetEntryCount(i, mdata, multiNumCols, stringCols, out err); if (err != null){ processInfo.ErrString = err; return; } bool empty = entryCount == 0; entryCount = Math.Max(entryCount, 1); for (int j = 0; j < entryCount; j++){ for (int k = 0; k < mdata.ExpressionColumnCount; k++){ expVals[count + j, k] = mdata[i, k]; } for (int k = 0; k < mdata.NumericColumnCount; k++){ numC[k][count + j] = mdata.NumericColumns[k][i]; } for (int k = 0; k < mdata.CategoryColumnCount; k++){ catC[k][count + j] = mdata.CategoryColumns[k][i]; } } for (int k = 0; k < mdata.MultiNumericColumnCount; k++){ if (multinumCols2.Contains(k)){ if (empty){ multiNumC[k][count] = new double[0]; } else{ double[] vals = mdata.MultiNumericColumns[k][i]; for (int j = 0; j < entryCount; j++){ multiNumC[k][count + j] = new[]{vals[j]}; } } } else{ for (int j = 0; j < entryCount; j++){ multiNumC[k][count + j] = mdata.MultiNumericColumns[k][i]; } } } for (int k = 0; k < mdata.StringColumnCount; k++){ if (stringCols2.Contains(k)){ if (empty){ stringC[k][count] = ""; } else{ string[] vals = mdata.StringColumns[k][i].Split(';'); for (int j = 0; j < entryCount; j++){ stringC[k][count + j] = vals[j]; } } } else{ for (int j = 0; j < entryCount; j++){ stringC[k][count + j] = mdata.StringColumns[k][i]; } } } count += entryCount; } int[] multiNumComplement = ArrayUtils.Complement(multiNumCols, mdata.MultiNumericColumnCount); List<double[][]> toBeTransformed = ArrayUtils.SubList(multiNumC, multiNumCols); multiNumC = ArrayUtils.SubList(multiNumC, multiNumComplement); foreach (double[][] d in toBeTransformed){ numC.Add(Transform(d)); } mdata.SetData(mdata.Name, mdata.ExpressionColumnNames, expVals, mdata.StringColumnNames, stringC, mdata.CategoryColumnNames, catC, new List<string>(ArrayUtils.Concat(mdata.NumericColumnNames, ArrayUtils.SubList(mdata.MultiNumericColumnNames, multiNumCols))), numC, new List<string>(ArrayUtils.SubArray(mdata.MultiNumericColumnNames, multiNumComplement)), multiNumC); }
private static void CreateMatrixData(CountingResult result, IMatrixData data, int minCount, IEnumerable selection) { List<string[]> type = new List<string[]>(); List<string[]> name = new List<string[]>(); List<double> count = new List<double>(); List<double> percOfTotal = new List<double>(); List<double> selCount = new List<double>(); List<double> selPerc = new List<double>(); for (int i = 0; i < result.Count; i++){ int c = result.GetTotalCountAt(i); if (c < minCount){ continue; } type.Add(new[]{result.GetType1At(i)}); name.Add(new[]{result.GetName1At(i)}); count.Add(c); percOfTotal.Add(Math.Round(10000.0*c/data.RowCount)/100.0); if (selection != null){ int c1 = result.GetSelectCountAt(i); selCount.Add(c1); selPerc.Add(Math.Round(1000.0*c1/c)/10.0); } } float[,] ex = new float[type.Count,0]; List<string[][]> catCols = new List<string[][]>{type.ToArray(), name.ToArray()}; List<string> catColNames = new List<string>(new[]{"Type", "Name"}); List<double[]> numCols = new List<double[]>{count.ToArray(), percOfTotal.ToArray()}; if (selection != null){ numCols.Add(selCount.ToArray()); numCols.Add(selPerc.ToArray()); } List<string> numColNames = new List<string>(new[]{"Count", "Percentage of total"}); if (selection != null){ numColNames.AddRange(new[]{"Selection count", "Selection percentage"}); } data.SetData("Count", new List<string>(), ex, new List<string>(), new List<string[]>(), catColNames, catCols, numColNames, numCols, new List<string>(), new List<double[][]>()); }