//gene enrichment analysis for both Basic PECA and PECA-N public static IMatrixData GetGOEnr(IMatrixData mdata, string workingDir, int option)//, out string errString) { char separator = '\t'; string filename = Path.Combine(workingDir, @".\Goterms.txt"); IMatrixData mNew = (IMatrixData)mdata.CreateNewInstance(); string name = "GSA"; if (option == 0) { name = name + "_Degradation"; } else if (option == 1) { name = name + "_Synthesis"; } mNew.Clear(); mNew.Name = name; mNew.AltName = name; //update //mNew.AltName = "Gene Set Enrichment Analysis"; //mNew.Description = "Gene Set Enrichment Analysis"; string[] colNames = TabSep.GetColumnNames(filename, 0, PerseusUtils.commentPrefix, PerseusUtils.commentPrefixExceptions, null, separator); string[][] cols = TabSep.GetColumns(colNames, filename, 0, PerseusUtils.commentPrefix, PerseusUtils.commentPrefixExceptions, separator); int nrows = TabSep.GetRowCount(filename); mNew.Values.Init(nrows, 0); mNew.SetAnnotationColumns(new List <string>(colNames), new List <string>(colNames), new List <string[]>(cols), new List <string>(), new List <string>(), new List <string[][]>(), new List <string>(), new List <string>(), new List <double[]>(), new List <string>(), new List <string>(), new List <double[][]>()); //convert the ones not matching regex to numeric string pattern = @"^((?!id|name|members).)*$"; Regex numericReg = new Regex(pattern); List <int> numericList = new List <int>(); for (int i = 0; i < colNames.Length; i++) { if (numericReg.Match(colNames[i]).Success) { numericList.Add(i); } } StringToNumerical(numericList, mNew); return(mNew); }
private static void CreateMatrixData(CountingResult result, IMatrixData data, int minCount, IEnumerable selection) { List <string[]> type = new List <string[]>(); List <string[]> name = new List <string[]>(); List <double> count = new List <double>(); List <double> percOfTotal = new List <double>(); List <double> selCount = new List <double>(); List <double> selPerc = new List <double>(); for (int i = 0; i < result.Count; i++) { int c = result.GetTotalCountAt(i); if (c < minCount) { continue; } type.Add(new[] { result.GetType1At(i) }); name.Add(new[] { result.GetName1At(i) }); count.Add(c); percOfTotal.Add(Math.Round(10000.0 * c / data.RowCount) / 100.0); if (selection != null) { int c1 = result.GetSelectCountAt(i); selCount.Add(c1); selPerc.Add(Math.Round(1000.0 * c1 / c) / 10.0); } } double[,] ex = new double[type.Count, 0]; List <string[][]> catCols = new List <string[][]> { type.ToArray(), name.ToArray() }; List <string> catColNames = new List <string>(new[] { "Type", "Name" }); List <double[]> numCols = new List <double[]> { count.ToArray(), percOfTotal.ToArray() }; if (selection != null) { numCols.Add(selCount.ToArray()); numCols.Add(selPerc.ToArray()); } List <string> numColNames = new List <string>(new[] { "Count", "Percentage of total" }); if (selection != null) { numColNames.AddRange(new[] { "Selection count", "Selection percentage" }); } data.Clear(); data.Name = "Count"; data.ColumnNames = new List <string>(); data.Values.Set(ex); data.SetAnnotationColumns(new List <string>(), new List <string[]>(), catColNames, catCols, numColNames, numCols, new List <string>(), new List <double[][]>()); }
public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo) { int[] cols = param.GetParam <int[]>("Columns").Value; HashSet <int> w = ArrayUtils.ToHashSet(param.GetParam <int[]>("Calculate").Value); bool[] include = new bool[SummaryStatisticsRows.procs.Length]; double[][] rowws = new double[SummaryStatisticsRows.procs.Length][]; for (int i = 0; i < include.Length; i++) { include[i] = w.Contains(i); if (include[i]) { rowws[i] = new double[cols.Length]; } } for (int i = 0; i < cols.Length; i++) { double[] vals = GetColumn(cols[i], mdata); for (int j = 0; j < include.Length; j++) { if (include[j]) { rowws[j][i] = SummaryStatisticsRows.procs[j].Item2(vals); } } } List <double[]> ex = new List <double[]>(); List <string> names = new List <string>(); for (int i = 0; i < include.Length; i++) { if (include[i]) { ex.Add(rowws[i]); names.Add(SummaryStatisticsRows.procs[i].Item1); } } double[,] exVals = GetExVals(ex); string[] colNames = GetColNames(mdata, cols); var categoryRowNames = mdata.CategoryRowNames; var transformedCategories = TransformCategories(mdata, cols, mdata.ColumnCount); var numericRowNames = mdata.NumericRowNames; var transformedNumeric = TransformNumeric(mdata.NumericRows, cols, mdata.ColumnCount); mdata.Clear(); mdata.Name = "Summary"; mdata.ColumnNames = new List <string>(names.ToArray()); mdata.ColumnDescriptions = new List <string>(names.ToArray()); mdata.Values.Set(exVals); mdata.SetAnnotationColumns(new List <string>(new[] { "Columns" }), new List <string[]>(new[] { colNames }), categoryRowNames, transformedCategories, numericRowNames, transformedNumeric, new List <string>(), new List <double[][]>()); }
public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo) { var remoteExe = param.GetParam <string>(InterpreterLabel).Value; if (string.IsNullOrWhiteSpace(remoteExe)) { processInfo.ErrString = Resources.RemoteExeNotSpecified; return; } var inFile = Path.GetTempFileName(); PerseusUtils.WriteMatrixToFile(mdata, inFile, AdditionalMatrices); var outFile = Path.GetTempFileName(); if (!TryGetCodeFile(param, out string codeFile)) { processInfo.ErrString = $"Code file '{codeFile}' was not found"; return; } ; if (supplTables == null) { supplTables = Enumerable.Range(0, NumSupplTables).Select(i => PerseusFactory.CreateMatrixData()).ToArray(); } var suppFiles = supplTables.Select(i => Path.GetTempFileName()).ToArray(); var commandLineArguments = GetCommandLineArguments(param); var args = $"{codeFile} {commandLineArguments} {inFile} {outFile} {string.Join(" ", suppFiles)}"; Debug.WriteLine($"executing > {remoteExe} {args}"); if (Utils.RunProcess(remoteExe, args, processInfo.Status, out string processInfoErrString) != 0) { processInfo.ErrString = processInfoErrString; return; } ; mdata.Clear(); PerseusUtils.ReadMatrixFromFile(mdata, processInfo, outFile, '\t'); for (int i = 0; i < NumSupplTables; i++) { PerseusUtils.ReadMatrixFromFile(supplTables[i], processInfo, suppFiles[i], '\t'); } }
//this function is modified from PerseusPluginLib/Load/UnstructuredTxtUpload.cs LoadSplit function //obtains the output from fdr.exe (so only applicable to PECA CORE and N) public static void GetOutput(IMatrixData mdata, Parameters param, Parameters dataParam, string filename, string geneName, string expSeries1 = "Expression Series 1", int numOfSeries = 2) { char separator = '\t'; //gene name column name is not included in the file so need to replace it //gene name ReplaceFirstLine(filename, geneName); string[] colNames = TabSep.GetColumnNames(filename, 0, PerseusUtils.commentPrefix, PerseusUtils.commentPrefixExceptions, null, separator); string[][] cols = TabSep.GetColumns(colNames, filename, 0, PerseusUtils.commentPrefix, PerseusUtils.commentPrefixExceptions, separator); int nrows = TabSep.GetRowCount(filename); string[] expressionColumnsNames = ArrayUtils.Concat(mdata.ColumnNames, mdata.NumericColumnNames); mdata.Clear(); mdata.Name = "PECA Analysis"; mdata.Values.Init(nrows, 0); mdata.SetAnnotationColumns(new List <string>(colNames), new List <string>(colNames), new List <string[]>(cols), new List <string>(), new List <string>(), new List <string[][]>(), new List <string>(), new List <string>(), new List <double[]>(), new List <string>(), new List <string>(), new List <double[][]>()); //be careful with changes of Number of time points in the future int numOfExpCols = numOfSeries * dataParam.GetParam <int[]>(expSeries1).Value.Length; //file format is structured so that expressions columns are before numeric ones //so convert the numeric ones before expression columns //first column guaranteed to be the name column int[] expList = Enumerable.Range(1, numOfExpCols).ToArray(); int[] numericList = Enumerable.Range(numOfExpCols + 1, colNames.Count() - numOfExpCols - 1).ToArray(); StringToNumerical(numericList, mdata); StringToExpression(expList, mdata); }
/// <summary> /// An auxiliary method for testing the action of regular expressions. /// Limited to a single column, which should be sufficient for this purpose. /// Multiple rows are allowed to test the effect of one regex on several strings. /// </summary> private static void TestRegex(string regexStr, string[] stringsInit, string[] stringsExpect) { const string name = "Test"; IMatrixData[] supplTables = null; IDocumentData[] documents = null; List <string> stringColumnNames = new List <string> { "Column Name" }; List <string[]> stringColumnsInit = new List <string[]> { stringsInit }; List <string[]> stringColumnsExpect = new List <string[]> { stringsExpect }; Parameters param = new Parameters(new Parameter[] { new MultiChoiceParam("Columns", new[] { 0 }) { Values = stringColumnNames }, new StringParam("Regular expression", regexStr), new BoolParam("Keep original columns", false), new BoolParam("Strings separated by semicolons are independent", false) }); IMatrixData mdata = PerseusFactory.CreateNewMatrixData(); mdata.Clear(); mdata.Name = name; mdata.SetAnnotationColumns(stringColumnNames, stringColumnsInit, mdata.CategoryColumnNames, new List <string[][]>(), mdata.NumericColumnNames, mdata.NumericColumns, mdata.MultiNumericColumnNames, mdata.MultiNumericColumns); var ptc = new ProcessTextColumns(); ptc.ProcessData(mdata, param, ref supplTables, ref documents, null); const bool ignoreCase = false; for (int rowInd = 0; rowInd < stringColumnsInit[0].Length; rowInd++) { Assert.AreEqual(mdata.StringColumns[0][rowInd], stringColumnsExpect[0][rowInd], ignoreCase); } }
/// <summary> /// An auxiliary method for testing the action of regular expressions. /// Limited to a single column, which should be sufficient for this purpose. /// Multiple rows are allowed to test the effect of one regex on several strings. /// </summary> private static void TestRegex(string regexStr, string[] stringsInit, string[] stringsExpect) { const string name = "Test"; IMatrixData[] supplTables = null; IDocumentData[] documents = null; List <string> stringColumnNames = new List <string> { "Column Name" }; List <string[]> stringColumnsInit = new List <string[]> { stringsInit }; List <string[]> stringColumnsExpect = new List <string[]> { stringsExpect }; ProcessTextColumns ptc = new ProcessTextColumns(); IMatrixData mdata = PerseusFactory.CreateMatrixData(); mdata.Clear(); mdata.Name = name; mdata.SetAnnotationColumns(stringColumnNames, stringColumnsInit, mdata.CategoryColumnNames, new List <string[][]>(), mdata.NumericColumnNames, mdata.NumericColumns, mdata.MultiNumericColumnNames, mdata.MultiNumericColumns); string errorStr = string.Empty; Parameters param = ptc.GetParameters(mdata, ref errorStr); param.GetParam <int[]>("Columns").Value = new[] { 0 }; param.GetParam <string>("Regular expression").Value = regexStr; param.GetParam <bool>("Keep original columns").Value = false; param.GetParam <bool>("Strings separated by semicolons are independent").Value = false; ptc.ProcessData(mdata, param, ref supplTables, ref documents, null); for (int rowInd = 0; rowInd < stringColumnsInit[0].Length; rowInd++) { string expected = mdata.StringColumns[0][rowInd]; string actual = stringColumnsExpect[0][rowInd]; StringAssert.AreEqualIgnoringCase(expected, actual); } }
public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo) { var remoteExe = param.GetParam <string>(InterpreterLabel).Value; var inFile = Path.GetTempFileName(); PerseusUtils.WriteMatrixToFile(mdata, inFile, false); var paramFile = Path.GetTempFileName(); param.ToFile(paramFile); var outFile = Path.GetTempFileName(); var codeFile = GetCodeFile(param); var args = $"{codeFile} {paramFile} {inFile} {outFile}"; Debug.WriteLine($"executing > {remoteExe} {args}"); if (Utils.RunProcess(remoteExe, args, processInfo.Status, out string processInfoErrString) != 0) { processInfo.ErrString = processInfoErrString; return; } ; mdata.Clear(); PerseusUtils.ReadMatrixFromFile(mdata, processInfo, outFile, '\t'); }