public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo) { Random2 rand = new Random2(); double std = param.GetParam<double>("Standard deviation").Value; int[] inds = param.GetParam<int[]>("Columns").Value; List<int> mainInds = new List<int>(); List<int> numInds = new List<int>(); foreach (int ind in inds){ if (ind < mdata.ColumnCount){ mainInds.Add(ind); } else{ numInds.Add(ind - mdata.ColumnCount); } } foreach (int j in mainInds){ for (int i = 0; i < mdata.RowCount; i++){ mdata.Values.Set(i, j, mdata.Values.Get(i, j) + (float) rand.NextGaussian(0, std)); } } foreach (int j in numInds){ for (int i = 0; i < mdata.RowCount; i++){ mdata.NumericColumns[j][i] += (float) rand.NextGaussian(0, std); } } }
public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo) { Parameter<int> access = param.GetParam<int>("Matrix access"); bool rows = access.Value == 0; UnitVectors(rows, mdata); }
public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo) { const bool rows = false; bool percentage; int minValids = PerseusPluginUtils.GetMinValids(param, out percentage); ParameterWithSubParams<int> modeParam = param.GetParamWithSubParams<int>("Mode"); int modeInd = modeParam.Value; if (modeInd != 0 && mdata.CategoryRowNames.Count == 0){ processInfo.ErrString = "No grouping is defined."; return; } if (modeInd != 0){ processInfo.ErrString = "Group-wise filtering can only be appled to rows."; return; } FilteringMode filterMode; double threshold; double threshold2; PerseusPluginUtils.ReadValuesShouldBeParams(param, out filterMode, out threshold, out threshold2); if (modeInd != 0){ //TODO } else{ PerseusPluginUtils.NonzeroFilter1(rows, minValids, percentage, mdata, param, threshold, threshold2, filterMode); } }
public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo) { bool rows = param.GetParam<int>("Matrix access").Value == 0; double min = param.GetParam<double>("Minimum").Value; double max = param.GetParam<double>("Maximum").Value; MapToInterval1(rows, mdata, min, max, processInfo.NumThreads); }
public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo) { string[] mods = param.GetParam<int[]>("Modifications").StringValue.Split(new[]{';'}, StringSplitOptions.RemoveEmptyEntries); string[] up = mdata.StringColumns[param.GetParam<int>("Uniprot column").Value]; string[][] uprot = new string[up.Length][]; for (int i = 0; i < up.Length; i++){ uprot[i] = up[i].Length > 0 ? up[i].Split(';') : new string[0]; } double[][] c = new double[mods.Length][]; for (int index = 0; index < mods.Length; index++){ string mod = mods[index]; string filename = PhosphoSitePlusParser.GetFilenameForMod(mod); if (filename == null){ processInfo.ErrString = "File does not exist."; return; } string[] seqWins; string[] accs; string[] pubmedLtp; string[] pubmedMs2; string[] cstMs2; string[] species; PhosphoSitePlusParser.ParseKnownMods(filename, out seqWins, out accs, out pubmedLtp, out pubmedMs2, out cstMs2, out species); for (int i = 0; i < seqWins.Length; i++){ seqWins[i] = seqWins[i].ToUpper(); } Dictionary<string, HashSet<string>> counts = new Dictionary<string, HashSet<string>>(); for (int i = 0; i < accs.Length; i++){ string acc = accs[i]; if (!counts.ContainsKey(acc)){ counts.Add(acc, new HashSet<string>()); } counts[acc].Add(seqWins[i]); } c[index] = new double[up.Length]; for (int i = 0; i < up.Length; i++){ c[index][i] = CountSites(uprot[i], counts); } } string[][] catCol = new string[up.Length][]; for (int i = 0; i < catCol.Length; i++){ List<string> x = new List<string>(); for (int j = 0; j < mods.Length; j++){ if (c[j][i] > 0){ x.Add(mods[j]); } } x.Sort(); catCol[i] = x.ToArray(); } mdata.AddCategoryColumn("Known modifications", "Known modifications", catCol); for (int i = 0; i < mods.Length; i++){ mdata.AddNumericColumn(mods[i] + " count", mods[i] + " count", c[i]); } }
public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo) { var vals = param.GetParam<Tuple<Regex, string>>("Regex").Value; var pattern = vals.Item1; string replacementStr = vals.Item2; for (int i = 0; i < mdata.ColumnCount; i++){ mdata.ColumnNames[i] = pattern.Replace(mdata.ColumnNames[i], replacementStr); } }
public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo) { string[] seqWins; string[] subAccs; string[] kinases; string[] kinAccs; string[] species; PhosphoSitePlusParser.ParseKinaseSubstrate(out seqWins, out subAccs, out kinases, out kinAccs, out species); if (seqWins == null){ processInfo.ErrString = "File does not exist."; return; } string[] up = mdata.StringColumns[param.GetParam<int>("Uniprot column").Value]; string[][] uprot = new string[up.Length][]; for (int i = 0; i < up.Length; i++){ uprot[i] = up[i].Length > 0 ? up[i].Split(';') : new string[0]; } string[] win = mdata.StringColumns[param.GetParam<int>("Sequence window").Value]; Dictionary<string, List<Tuple<string, string, string>>> substrateProperties = new Dictionary<string, List<Tuple<string, string, string>>>(); for (int i = 0; i < seqWins.Length; i++){ string subAcc = subAccs[i]; if (!substrateProperties.ContainsKey(subAcc)){ substrateProperties.Add(subAcc, new List<Tuple<string, string, string>>()); } substrateProperties[subAcc].Add(new Tuple<string, string, string>(seqWins[i], kinases[i], kinAccs[i])); } string[] kinaseNameColumn = new string[uprot.Length]; string[] kinaseUniprotColumn = new string[uprot.Length]; for (int i = 0; i < kinaseNameColumn.Length; i++){ string[] win1 = AddKnownSites.TransformIl(win[i]).Split(';'); HashSet<string> kinaseNamesHits = new HashSet<string>(); HashSet<string> kinaseUniprotHits = new HashSet<string>(); foreach (string ux in uprot[i]){ if (substrateProperties.ContainsKey(ux)){ List<Tuple<string, string, string>> properties = substrateProperties[ux]; foreach (Tuple<string, string, string> property in properties){ string w = property.Item1; if (AddKnownSites.Contains(win1, AddKnownSites.TransformIl(w.ToUpper().Substring(1, w.Length - 2)))){ kinaseNamesHits.Add(property.Item2); kinaseUniprotHits.Add(property.Item3); } } } } kinaseNameColumn[i] = kinaseNamesHits.Count > 0 ? StringUtils.Concat(";", ArrayUtils.ToArray(kinaseNamesHits)) : ""; kinaseUniprotColumn[i] = kinaseUniprotHits.Count > 0 ? StringUtils.Concat(";", ArrayUtils.ToArray(kinaseUniprotHits)) : ""; } mdata.AddStringColumn("PhosphoSitePlus kinase", "", kinaseNameColumn); mdata.AddStringColumn("PhosphoSitePlus kinase uniprot", "", kinaseUniprotColumn); }
public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo) { string regexStr = param.GetParam<string>("Regular expression").Value; Regex regex = new Regex(regexStr); int[] inds = param.GetParam<int[]>("Columns").Value; bool keepColumns = param.GetParam<bool>("Keep original columns").Value; bool semicolons = param.GetParam<bool>("Strings separated by semicolons are independent").Value; foreach (int col in inds){ ProcessCol(mdata, regex, col, keepColumns, semicolons); } }
public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo) { string colName = param.GetParam<string>("Name of new column").Value; int[] columns = param.GetParam<int[]>("Categories").Value; bool inverse = param.GetParam<bool>("Inverse").Value; int[] catCols; int[] stringCols; Split(columns, out catCols, out stringCols, mdata.CategoryColumnCount); string[] word1 = param.GetParam<string[]>("Search terms").Value; if (word1.Length == 0){ processInfo.ErrString = "Please specify one or more search terms."; return; } if (string.IsNullOrEmpty(colName)){ colName = word1[0]; } string[] word = new string[word1.Length]; for (int i = 0; i < word.Length; i++){ word[i] = word1[i].ToLower().Trim(); } bool[] indicator = new bool[mdata.RowCount]; foreach (int col in catCols){ for (int i = 0; i < mdata.RowCount; i++){ foreach (string s in mdata.GetCategoryColumnEntryAt(col, i)){ foreach (string s1 in word){ if (s.ToLower().Contains(s1)){ indicator[i] = true; break; } } } } } foreach (string[] txt in stringCols.Select(col => mdata.StringColumns[col])){ for (int i = 0; i < txt.Length; i++){ string s = txt[i]; foreach (string s1 in word){ if (s.ToLower().Contains(s1)){ indicator[i] = true; break; } } } } string[][] newCol = new string[indicator.Length][]; for (int i = 0; i < newCol.Length; i++){ bool yes = inverse ? !indicator[i] : indicator[i]; newCol[i] = yes ? new[]{"+"} : new string[0]; } mdata.AddCategoryColumn(colName, "", newCol); }
public IMatrixData ProcessData(IMatrixData[] inputData, Parameters parameters, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo) { IMatrixData mdata1 = inputData[0]; Dictionary<string, string> map = GetMap(inputData[1], parameters); IMatrixData result = (IMatrixData) mdata1.Clone(); int ind = parameters.GetParam<int>("Column in matrix 1 to be edited").Value; string[] x = mdata1.StringColumns[ind]; for (int i = 0; i < x.Length; i++){ x[i] = Process(x[i], map); } return result; }
public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo) { int nameCol = param.GetParam<int>("New column names").Value; List<string> colNames; if (nameCol >= 0){ HashSet<string> taken = new HashSet<string>(); colNames = new List<string>(); foreach (string n in mdata.StringColumns[nameCol]){ string n1 = StringUtils.GetNextAvailableName(n, taken); taken.Add(n1); colNames.Add(n1); } } else{ colNames = new List<string>(); for (int i = 0; i < mdata.RowCount; i++){ colNames.Add("Column" + (i + 1)); } } List<string> rowNames = mdata.ColumnNames; mdata.Values = mdata.Values.Transpose(); if (mdata.IsImputed != null){ mdata.IsImputed = mdata.IsImputed.Transpose(); } if (mdata.Quality != null){ mdata.Quality = mdata.Quality.Transpose(); } List<string> stringColumnNames = mdata.StringColumnNames; List<string> categoryColumnNames = mdata.CategoryColumnNames; List<string> numericColumnNames = mdata.NumericColumnNames; List<string> multiNumericColumnNames = mdata.MultiNumericColumnNames; List<string> stringColumnDescriptions = mdata.StringColumnDescriptions; List<string> categoryColumnDescriptions = mdata.CategoryColumnDescriptions; List<string> numericColumnDescriptions = mdata.NumericColumnDescriptions; List<string> multiNumericColumnDescriptions = mdata.MultiNumericColumnDescriptions; List<string[]> stringColumns = mdata.StringColumns; List<string[][]> categoryColumns = GetCategoryColumns(mdata); List<double[]> numericColumns = mdata.NumericColumns; List<double[][]> multiNumericColumns = mdata.MultiNumericColumns; mdata.SetAnnotationColumns(new List<string>(new[]{"Name"}), new List<string>(new[]{"Name"}), new List<string[]>(new[]{rowNames.ToArray()}), mdata.CategoryRowNames, mdata.CategoryRowDescriptions, GetCategoryRows(mdata), mdata.NumericRowNames, mdata.NumericRowDescriptions, mdata.NumericRows, new List<string>(), new List<string>(), new List<double[][]>()); mdata.ColumnNames = colNames; mdata.SetAnnotationRows(stringColumnNames, stringColumnDescriptions, stringColumns, categoryColumnNames, categoryColumnDescriptions, categoryColumns, numericColumnNames, numericColumnDescriptions, numericColumns, multiNumericColumnNames, multiNumericColumnDescriptions, multiNumericColumns); }
public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo) { string word = param.GetParam<string>("Find what").Value; int colInd = param.GetParam<int>("Look in").Value; bool matchCase = param.GetParam<bool>("Match case").Value; bool matchWholeWord = param.GetParam<bool>("Match whole word").Value; string scolName = mdata.StringColumnNames[colInd]; string[] scol = mdata.StringColumns[colInd]; string[][] catCol = new string[mdata.RowCount][]; for (int i = 0; i < catCol.Length; i++){ bool found = Find(scol[i], word, matchCase, matchWholeWord); catCol[i] = found ? new[]{"+"} : new string[0]; } mdata.AddCategoryColumn("Search: " + scolName, "Search: " + scolName, catCol); }
public void ProcessData(IMatrixData data, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo) { int[] numColInds = param.GetParam<int[]>("Numerical rows").Value; int[] multiNumColInds = param.GetParam<int[]>("Multi-numerical rows").Value; int[] catColInds = param.GetParam<int[]>("Categorical rows").Value; int[] textColInds = param.GetParam<int[]>("Text rows").Value; data.NumericRows = ArrayUtils.SubList(data.NumericRows, numColInds); data.NumericRowNames = ArrayUtils.SubList(data.NumericRowNames, numColInds); data.NumericRowDescriptions = ArrayUtils.SubList(data.NumericRowDescriptions, numColInds); data.MultiNumericRows = ArrayUtils.SubList(data.MultiNumericRows, multiNumColInds); data.MultiNumericRowNames = ArrayUtils.SubList(data.MultiNumericRowNames, multiNumColInds); data.MultiNumericRowDescriptions = ArrayUtils.SubList(data.MultiNumericRowDescriptions, multiNumColInds); data.CategoryRows = PerseusPluginUtils.GetCategoryRows(data, catColInds); data.CategoryRowNames = ArrayUtils.SubList(data.CategoryRowNames, catColInds); data.CategoryRowDescriptions = ArrayUtils.SubList(data.CategoryRowDescriptions, catColInds); data.StringRows = ArrayUtils.SubList(data.StringRows, textColInds); data.StringRowNames = ArrayUtils.SubList(data.StringRowNames, textColInds); data.StringRowDescriptions = ArrayUtils.SubList(data.StringRowDescriptions, textColInds); }
public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo) { int stringColumnIndx = param.GetParam<int>("Sequence window").Value; string[] win = mdata.StringColumns[stringColumnIndx]; int start = param.GetParam<int>("Start").Value - 1; int length = param.GetParam<int>("Length").Value; if (start < 0){ processInfo.ErrString = "Start position cannot be smaller than 1."; return; } if (start + length > win[0].Length){ processInfo.ErrString = "Start + length cannot exceed the total length of the sequence."; return; } string[] shortenedMotifs = new string[win.Length]; for (int i = 0; i < mdata.RowCount; ++i){ shortenedMotifs[i] = win[i].Substring(start, length); } mdata.AddStringColumn("Short sequence window", "", shortenedMotifs); }
public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo) { ParameterWithSubParams<int> p = param.GetParamWithSubParams<int>("Column"); int colInd = p.Value; if (colInd < 0){ processInfo.ErrString = "No categorical columns available."; return; } Parameter<int[]> mcp = p.GetSubParameters().GetParam<int[]>("Values"); int[] inds = mcp.Value; if (inds.Length == 0){ processInfo.ErrString = "Please select at least one term for filtering."; return; } string[] values = new string[inds.Length]; string[] v = mdata.GetCategoryColumnValuesAt(colInd); for (int i = 0; i < values.Length; i++){ values[i] = v[inds[i]]; } HashSet<string> value = new HashSet<string>(values); bool remove = param.GetParam<int>("Mode").Value == 0; List<int> valids = new List<int>(); for (int i = 0; i < mdata.RowCount; i++){ bool valid = true; foreach (string w in mdata.GetCategoryColumnEntryAt(colInd, i)){ if (value.Contains(w)){ valid = false; break; } } if ((valid && remove) || (!valid && !remove)){ valids.Add(i); } } PerseusPluginUtils.FilterRows(mdata, param, valids.ToArray()); }
public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo) { string[][] col = mdata.GetCategoryColumnAt(param.GetParam<int>("Indicator column").Value); string term = param.GetParam<string>("Value").Value; List<int> inds = new List<int>(); for (int i = 0; i < col.Length; i++){ if (Contains(col[i], term)){ inds.Add(i); } } double[][] profiles = new double[inds.Count][]; for (int i = 0; i < profiles.Length; i++){ profiles[i] = ArrayUtils.ToDoubles(mdata.Values.GetRow(inds[i])); float mean = (float) ArrayUtils.Mean(profiles[i]); for (int j = 0; j < profiles[i].Length; j++){ profiles[i][j] -= mean; } } double[] totalProfile = new double[mdata.ColumnCount]; for (int i = 0; i < totalProfile.Length; i++){ List<double> vals = new List<double>(); foreach (double[] t in profiles){ double val = t[i]; if (double.IsNaN(val) || double.IsInfinity(val)){ continue; } vals.Add(val); } totalProfile[i] = vals.Count > 0 ? ArrayUtils.Median(vals) : double.NaN; } for (int i = 0; i < mdata.RowCount; i++){ for (int j = 0; j < mdata.ColumnCount; j++){ mdata.Values.Set(i, j, mdata.Values.Get(i, j)-(float) totalProfile[j]); } } }
public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo) { const bool rows = true; bool percentage; int minValids = PerseusPluginUtils.GetMinValids(param, out percentage); ParameterWithSubParams<int> modeParam = param.GetParamWithSubParams<int>("Mode"); int modeInd = modeParam.Value; if (modeInd != 0 && mdata.CategoryRowNames.Count == 0){ processInfo.ErrString = "No grouping is defined."; return; } FilteringMode filterMode; double threshold; double threshold2; PerseusPluginUtils.ReadValuesShouldBeParams(param, out filterMode, out threshold, out threshold2); if (modeInd != 0){ int gind = modeParam.GetSubParameters().GetParam<int>("Grouping").Value; string[][] groupCol = mdata.GetCategoryRowAt(gind); NonzeroFilterGroup(minValids, percentage, mdata, param, modeInd == 2, threshold, threshold2, filterMode, groupCol); } else{ PerseusPluginUtils.NonzeroFilter1(rows, minValids, percentage, mdata, param, threshold, threshold2, filterMode); } }
public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo) { int colInd = param.GetParam<int>("Column").Value; string searchString = param.GetParam<string>("Search string").Value; if (string.IsNullOrEmpty(searchString)){ processInfo.ErrString = "Please provide a search string"; return; } bool remove = param.GetParam<int>("Mode").Value == 0; bool matchCase = param.GetParam<bool>("Match case").Value; bool matchWholeWord = param.GetParam<bool>("Match whole word").Value; string[] vals = mdata.StringColumns[colInd]; List<int> valids = new List<int>(); for (int i = 0; i < vals.Length; i++){ bool matches = Matches(vals[i], searchString, matchCase, matchWholeWord); if (matches && !remove){ valids.Add(i); } else if (!matches && remove){ valids.Add(i); } } PerseusPluginUtils.FilterRows(mdata, param, valids.ToArray()); }
public IAnalysisResult AnalyzeData(IMatrixData mdata, Parameters param, ProcessInfo processInfo) { return new SelectRowsManuallyResult(mdata); }
public static void ReadMatrixFromFile(IMatrixData mdata, ProcessInfo processInfo, string filename, char separator) { var annotationRows = new Dictionary<string, string[]>(); var colNames = TabSep.GetColumnNames(filename, commentPrefix, commentPrefixExceptions, annotationRows, separator); var typeRow = annotationRows["Type"]; int[] eInds, nInds, cInds, tInds, mInds; ColumnIndices(typeRow, out eInds, out nInds, out cInds, out tInds, out mInds); var filters = new List<Tuple<Relation[], int[], bool>>(); int nrows; using(StreamReader reader = FileUtils.GetReader(filename)) using (StreamReader auxReader = FileUtils.GetReader(filename)) { nrows = GetRowCount(reader, auxReader, eInds, filters, separator); } using (StreamReader reader = FileUtils.GetReader(filename)) using (StreamReader auxReader = FileUtils.GetReader(filename)) { LoadMatrixData(annotationRows, eInds, cInds, nInds, tInds, mInds, processInfo, colNames, mdata, reader, auxReader, nrows, filename, separator, false, filters); } }
public static void ReadMatrixFromFile(IMatrixData mdata, ProcessInfo processInfo, string filename, int[] eInds, int[] nInds, int[] cInds, int[] tInds, int[] mInds, Parameters[] mainFilterParameters, Parameters[] numericalFilterParameters, bool shortenExpressionColumnNames) { if (!File.Exists(filename)) { processInfo.ErrString = "File '" + filename + "' does not exist."; return; } string ftl = filename.ToLower(); bool csv = ftl.EndsWith(".csv") || ftl.EndsWith(".csv.gz"); char separator = csv ? ',' : '\t'; string[] colNames; Dictionary<string, string[]> annotationRows = new Dictionary<string, string[]>(); try { colNames = TabSep.GetColumnNames(filename, commentPrefix, commentPrefixExceptions, annotationRows, separator); } catch (Exception) { processInfo.ErrString = "Could not open the file '" + filename + "'. It is probably opened in another program."; return; } string origin = filename; List<Tuple<Relation[], int[], bool>> filters = new List<Tuple<Relation[], int[], bool>>(); string errString; foreach (Parameters p in mainFilterParameters) { AddFilter(filters, p, eInds, out errString); if (errString != null) { processInfo.ErrString = errString; return; } } foreach (Parameters p in numericalFilterParameters) { AddFilter(filters, p, nInds, out errString); if (errString != null) { processInfo.ErrString = errString; return; } } int nrows; using(StreamReader reader = FileUtils.GetReader(filename)) using (StreamReader auxReader = FileUtils.GetReader(filename)) { nrows = GetRowCount(reader, auxReader, eInds, filters, separator); } using (StreamReader reader = FileUtils.GetReader(filename)) using (StreamReader auxReader = FileUtils.GetReader(filename)) { LoadMatrixData(annotationRows, eInds, cInds, nInds, tInds, mInds, processInfo, colNames, mdata, reader, auxReader, nrows, origin, separator, shortenExpressionColumnNames, filters); } GC.Collect(); }
public static void LoadMatrixData(IDictionary<string, string[]> annotationRows, int[] eInds, int[] cInds, int[] nInds, int[] tInds, int[] mInds, ProcessInfo processInfo, IList<string> colNames, IMatrixData mdata, StreamReader reader, StreamReader auxReader, int nrows, string origin, char separator, bool shortenExpressionNames, List<Tuple<Relation[], int[], bool>> filters) { string[] colDescriptions = null; if (annotationRows.ContainsKey("Description")){ colDescriptions = annotationRows["Description"]; annotationRows.Remove("Description"); } int[] allInds = ArrayUtils.Concat(new[]{eInds, cInds, nInds, tInds, mInds}); Array.Sort(allInds); for (int i = 0; i < allInds.Length - 1; i++){ if (allInds[i + 1] == allInds[i]){ processInfo.ErrString = "Column '" + colNames[allInds[i]] + "' has been selected multiple times"; return; } } string[] allColNames = ArrayUtils.SubArray(colNames, allInds); Array.Sort(allColNames); for (int i = 0; i < allColNames.Length - 1; i++){ if (allColNames[i + 1].Equals(allColNames[i])){ processInfo.ErrString = "Column name '" + allColNames[i] + "' occurs multiple times."; return; } } LoadMatrixData(colNames, colDescriptions, eInds, cInds, nInds, tInds, mInds, origin, mdata, annotationRows, processInfo.Progress, processInfo.Status, separator, reader, auxReader, nrows, shortenExpressionNames, filters); }
public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo) { bool keepEmpty = param.GetBoolParam("Keep rows without ID").Value; AverageType atype = GetAverageType(param.GetSingleChoiceParam("Average type for expression columns").Value); string[] ids2 = mdata.StringColumns[param.GetSingleChoiceParam("ID column").Value]; string[][] ids = SplitIds(ids2); int[] present; int[] absent; GetPresentAbsentIndices(ids, out present, out absent); ids = ArrayUtils.SubArray(ids, present); int[][] rowInds = new int[present.Length][]; for (int i = 0; i < rowInds.Length; i++){ rowInds[i] = new[]{present[i]}; } ClusterRows(ref rowInds, ref ids); if (keepEmpty){ rowInds = ProlongRowInds(rowInds, absent); } int nrows = rowInds.Length; int ncols = mdata.ExpressionColumnCount; float[,] expVals = new float[nrows,ncols]; for (int j = 0; j < ncols; j++){ float[] c = mdata.GetExpressionColumn(j); for (int i = 0; i < nrows; i++){ float[] d = ArrayUtils.SubArray(c, rowInds[i]); expVals[i, j] = Average(d, atype); } } mdata.ExpressionValues = expVals; for (int i = 0; i < mdata.NumericColumnCount; i++){ string name = mdata.NumericColumnNames[i]; AverageType atype1 = GetAverageType(param.GetSingleChoiceParam("Average type for " + name).Value); double[] c = mdata.NumericColumns[i]; double[] newCol = new double[nrows]; for (int k = 0; k < nrows; k++){ double[] d = ArrayUtils.SubArray(c, rowInds[k]); newCol[k] = Average(d, atype1); } mdata.NumericColumns[i] = newCol; } for (int i = 0; i < mdata.CategoryColumnCount; i++){ string[][] c = mdata.GetCategoryColumnAt(i); string[][] newCol = new string[nrows][]; for (int k = 0; k < nrows; k++){ string[][] d = ArrayUtils.SubArray(c, rowInds[k]); newCol[k] = Average(d); } mdata.SetCategoryColumnAt(newCol,i); } for (int i = 0; i < mdata.StringColumnCount; i++){ string[] c = mdata.StringColumns[i]; string[] newCol = new string[nrows]; for (int k = 0; k < nrows; k++){ string[] d = ArrayUtils.SubArray(c, rowInds[k]); newCol[k] = Average(d); } mdata.StringColumns[i] = newCol; } for (int i = 0; i < mdata.MultiNumericColumnCount; i++){ double[][] c = mdata.MultiNumericColumns[i]; double[][] newCol = new double[nrows][]; for (int k = 0; k < nrows; k++){ double[][] d = ArrayUtils.SubArray(c, rowInds[k]); newCol[k] = Average(d); } mdata.MultiNumericColumns[i] = newCol; } }
public void ProcessData(IMatrixData mdata, Parameters param1, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo) { int[] multiNumCols = param1.GetParam<int[]>("Multi-numeric columns").Value; Array.Sort(multiNumCols); int[] stringCols = param1.GetParam<int[]>("Text columns").Value; Array.Sort(stringCols); HashSet<int> multinumCols2 = new HashSet<int>(multiNumCols); HashSet<int> stringCols2 = new HashSet<int>(stringCols); if (multiNumCols.Length + stringCols.Length == 0){ processInfo.ErrString = "Please select some columns."; return; } int rowCount = GetNewRowCount(mdata, multiNumCols, stringCols); float[,] expVals = new float[rowCount, mdata.ColumnCount]; List<string[]> stringC = new List<string[]>(); for (int i = 0; i < mdata.StringColumnCount; i++){ stringC.Add(new string[rowCount]); } List<double[]> numC = new List<double[]>(); for (int i = 0; i < mdata.NumericColumnCount; i++){ numC.Add(new double[rowCount]); } List<string[][]> catC = new List<string[][]>(); for (int i = 0; i < mdata.CategoryColumnCount; i++){ catC.Add(new string[rowCount][]); } List<double[][]> multiNumC = new List<double[][]>(); for (int i = 0; i < mdata.MultiNumericColumnCount; i++){ multiNumC.Add(new double[rowCount][]); } int count = 0; for (int i = 0; i < mdata.RowCount; i++){ string err; int entryCount = GetEntryCount(i, mdata, multiNumCols, stringCols, out err); if (err != null){ processInfo.ErrString = err; return; } bool empty = entryCount == 0; entryCount = Math.Max(entryCount, 1); for (int j = 0; j < entryCount; j++){ for (int k = 0; k < mdata.ColumnCount; k++){ expVals[count + j, k] = mdata.Values.Get(i, k); } for (int k = 0; k < mdata.NumericColumnCount; k++){ numC[k][count + j] = mdata.NumericColumns[k][i]; } for (int k = 0; k < mdata.CategoryColumnCount; k++){ catC[k][count + j] = mdata.GetCategoryColumnEntryAt(k, i); } } for (int k = 0; k < mdata.MultiNumericColumnCount; k++){ if (multinumCols2.Contains(k)){ if (empty){ multiNumC[k][count] = new double[0]; } else{ double[] vals = mdata.MultiNumericColumns[k][i]; for (int j = 0; j < entryCount; j++){ multiNumC[k][count + j] = new[]{vals[j]}; } } } else{ for (int j = 0; j < entryCount; j++){ multiNumC[k][count + j] = mdata.MultiNumericColumns[k][i]; } } } for (int k = 0; k < mdata.StringColumnCount; k++){ if (stringCols2.Contains(k)){ if (empty){ stringC[k][count] = ""; } else{ string[] vals = mdata.StringColumns[k][i].Split(';'); for (int j = 0; j < entryCount; j++){ stringC[k][count + j] = vals[j]; } } } else{ for (int j = 0; j < entryCount; j++){ stringC[k][count + j] = mdata.StringColumns[k][i]; } } } count += entryCount; } int[] multiNumComplement = ArrayUtils.Complement(multiNumCols, mdata.MultiNumericColumnCount); List<double[][]> toBeTransformed = ArrayUtils.SubList(multiNumC, multiNumCols); multiNumC = ArrayUtils.SubList(multiNumC, multiNumComplement); foreach (double[][] d in toBeTransformed){ numC.Add(Transform(d)); } mdata.ColumnNames = mdata.ColumnNames; mdata.Values.Set(expVals); mdata.SetAnnotationColumns(mdata.StringColumnNames, stringC, mdata.CategoryColumnNames, catC, new List<string>(ArrayUtils.Concat(mdata.NumericColumnNames, ArrayUtils.SubList(mdata.MultiNumericColumnNames, multiNumCols))), numC, new List<string>(ArrayUtils.SubArray(mdata.MultiNumericColumnNames, multiNumComplement)), multiNumC); }
public void ParseFile(string path, ProcessInfo processInfo) { processInfo.Status("Parsing " + path); string accession = ""; int sequenceCounter = 0; StringBuilder sequence = new StringBuilder(); ProteinSequence protein = new ProteinSequence(); try{ StreamReader file = new StreamReader(path); string line; while ((line = file.ReadLine()) != null){ // valid line if (sequenceCounter%500 == 0){ processInfo.Status("Parsing " + path + ", " + (int) ((float) file.BaseStream.Position/file.BaseStream.Length*100) + "%"); } bool lineIsHeader = line.StartsWith(">"); // skip all lines until the first header is found if (sequenceCounter == 0 && !lineIsHeader){ continue; } // line is a piece of a sequence if (sequenceCounter > 0 && !lineIsHeader){ sequence.Append(line.Trim()); continue; } // line is a fasta header if (lineIsHeader){ if (sequenceCounter > 0) // this is not the first header, i.e. the previous sequence is now completely read in { // add the previous protein protein.SetSequence(sequence.ToString()); entries.Add(accession, protein); } // initialize a new protein protein = new ProteinSequence(); sequenceCounter++; // then parse the new header string header = line; Match m = regexUniprotAccession.Match(header); if (m.Success){ // uniprot header accession = m.Groups[1].Value; protein.Accession = accession; protein.Header = header; } else{ // fallback position: take entire header after the > as accession accession = header.Substring(1).Trim(); protein.Accession = accession; protein.Header = header; } sequence = new StringBuilder(); } } //end while file.Close(); //add the last protein if (sequenceCounter > 0){ // make sure there is at least one sequence in the file protein.SetSequence(sequence.ToString()); entries.Add(accession, protein); } } catch (Exception){ processInfo.ErrString = "Something went wrong while parsing the fasta file.\nMake sure the path is correct and the " + "file is not opened in another application.\nMake sure the fasta file is valid."; } }
public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo) { double width = param.GetParam<double>("Width").Value; double shift = param.GetParam<double>("Down shift").Value; bool separateColumns = param.GetParam<int>("Mode").Value == 1; int[] cols = param.GetParam<int[]>("Columns").Value; if (cols.Length == 0){ return; } if (separateColumns){ ReplaceMissingsByGaussianByColumn(width, shift, mdata, cols); } else{ string err = ReplaceMissingsByGaussianWholeMatrix(width, shift, mdata, cols); if (err != null){ processInfo.ErrString = err; } } }
public void ProcessData(IMatrixData data, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo) { bool falseAreIndicated = param.GetSingleChoiceParam("Indicated are").Value == 0; int catCol = param.GetSingleChoiceParam("In column").Value; string word = param.GetStringParam("Indicator").Value; int[] scoreColumns = param.GetMultiChoiceParam("Scores").Value; if (scoreColumns.Length == 0){ processInfo.ErrString = "Please specify at least one column with scores."; return; } bool largeIsGood = param.GetBoolParam("Large values are good").Value; int[] showColumns = param.GetMultiChoiceParam("Display quantity").Value; if (showColumns.Length == 0){ processInfo.ErrString = "Please select at least one quantity to display"; return; } bool[] indCol = GetIndicatorColumn(falseAreIndicated, catCol, word, data); List<string> expColNames = new List<string>(); List<float[]> expCols = new List<float[]>(); foreach (int scoreColumn in scoreColumns){ double[] vals = scoreColumn < data.NumericColumnCount ? data.NumericColumns[scoreColumn] : ArrayUtils.ToDoubles(data.GetExpressionColumn(scoreColumn - data.NumericColumnCount)); string name = scoreColumn < data.NumericColumnCount ? data.NumericColumnNames[scoreColumn] : data.ExpressionColumnNames[scoreColumn - data.NumericColumnCount]; int[] order = GetOrder(vals, largeIsGood); CalcCurve(ArrayUtils.SubArray(indCol, order), showColumns, name, expCols, expColNames); } float[,] expData = ToMatrix(expCols); data.SetData(data.Name, expColNames, expData, new List<string>(), new List<string[]>(), new List<string>(), new List<string[][]>(), new List<string>(), new List<double[]>(), new List<string>(), new List<double[][]>()); }
public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo) { string[] seqWins; string[] accs; string[] function; string[] process; string[] protInteract; string[] otherInteract; string[] notes; string[] species; PhosphoSitePlusParser.ParseRegulatorySites(out seqWins, out accs, out function, out process, out protInteract, out otherInteract, out notes, out species); if (seqWins == null){ processInfo.ErrString = "File does not exist."; return; } string[] up = mdata.StringColumns[param.GetParam<int>("Uniprot column").Value]; string[][] uprot = new string[up.Length][]; for (int i = 0; i < up.Length; i++){ uprot[i] = up[i].Length > 0 ? up[i].Split(';') : new string[0]; } string[] win = mdata.StringColumns[param.GetParam<int>("Sequence window").Value]; Dictionary<string, List<int>> map = new Dictionary<string, List<int>>(); for (int i = 0; i < seqWins.Length; i++){ string acc = accs[i]; if (!map.ContainsKey(acc)){ map.Add(acc, new List<int>()); } map[acc].Add(i); } string[][] newCatCol = new string[uprot.Length][]; string[][] function2 = new string[uprot.Length][]; string[][] process2 = new string[uprot.Length][]; string[][] protInteract2 = new string[uprot.Length][]; string[][] otherInteract2 = new string[uprot.Length][]; string[][] notes2 = new string[uprot.Length][]; for (int i = 0; i < uprot.Length; i++){ string[] win1 = TransformIl(win[i]).Split(';'); HashSet<string> wins = new HashSet<string>(); HashSet<string> function1 = new HashSet<string>(); HashSet<string> process1 = new HashSet<string>(); HashSet<string> protInteract1 = new HashSet<string>(); HashSet<string> otherInteract1 = new HashSet<string>(); HashSet<string> notes1 = new HashSet<string>(); foreach (string ux in uprot[i]){ if (map.ContainsKey(ux)){ List<int> n = map[ux]; foreach (int ind in n){ string s = seqWins[ind]; if (Contains(win1, TransformIl(s.ToUpper().Substring(1, s.Length - 2)))){ wins.Add(s); if (function[ind].Length > 0){ function1.Add(function[ind]); } if (process[ind].Length > 0){ process1.Add(process[ind]); } if (protInteract[ind].Length > 0){ protInteract1.Add(protInteract[ind]); } if (otherInteract[ind].Length > 0){ otherInteract1.Add(otherInteract[ind]); } if (notes[ind].Length > 0){ notes1.Add(notes[ind]); } } } } } if (wins.Count > 0){ newCatCol[i] = new[]{"+"}; function2[i] = ArrayUtils.ToArray(function1); process2[i] = ArrayUtils.ToArray(process1); protInteract2[i] = ArrayUtils.ToArray(protInteract1); otherInteract2[i] = ArrayUtils.ToArray(otherInteract1); notes2[i] = ArrayUtils.ToArray(notes1); } else{ newCatCol[i] = new string[0]; function2[i] = new string[0]; process2[i] = new string[0]; protInteract2[i] = new string[0]; otherInteract2[i] = new string[0]; notes2[i] = new string[0]; } } mdata.AddCategoryColumn("Regulatory site", "", newCatCol); mdata.AddCategoryColumn("Regulatory site function", "", function2); mdata.AddCategoryColumn("Regulatory site process", "", process2); mdata.AddCategoryColumn("Regulatory site protInteract", "", protInteract2); mdata.AddCategoryColumn("Regulatory site otherInteract", "", otherInteract2); mdata.AddCategoryColumn("Regulatory site notes", "", notes2); }
public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo) { int[] colIndx = param.GetParam<int[]>("x").Value; int[] colIndy = param.GetParam<int[]>("y").Value; if (colIndx.Length == 0){ processInfo.ErrString = "Please select some columns"; return; } if (colIndx.Length != colIndy.Length){ processInfo.ErrString = "Please select the same number of columns in the boxes for the first and second columns."; return; } int typeInd = param.GetParam<int>("Distribution type").Value; int points = param.GetParam<int>("Number of points").Value; for (int k = 0; k < colIndx.Length; k++){ float[] xvals = GetColumn(mdata, colIndx[k]); float[] yvals = GetColumn(mdata, colIndy[k]); float[] xvals1; float[] yvals1; GetValidPairs(xvals, yvals, out xvals1, out yvals1); double xmin; double xmax; double ymin; double ymax; DensityEstimation.CalcRanges(xvals1, yvals1, out xmin, out xmax, out ymin, out ymax); float[,] values = DensityEstimation.GetValuesOnGrid(xvals1, xmin, (xmax - xmin)/points, points, yvals1, ymin, (ymax - ymin)/points, points); if (typeInd == 1){ MakeConditional1(values); } if (typeInd == 2){ MakeConditional2(values); } if (typeInd == 3){ MakeConditional3(values); } DensityEstimation.DivideByMaximum(values); double[] xmat = new double[points]; for (int i = 0; i < points; i++){ xmat[i] = xmin + i*(xmax - xmin)/points; } double[] ymat = new double[points]; for (int i = 0; i < points; i++){ ymat[i] = ymin + i*(ymax - ymin)/points; } float[,] percvalues = CalcExcludedPercentage(values); double[] dvals = new double[xvals.Length]; double[] pvals = new double[xvals.Length]; for (int i = 0; i < dvals.Length; i++){ double xx = xvals[i]; double yy = yvals[i]; if (!double.IsNaN(xx) && !double.IsNaN(yy)){ int xind = ArrayUtils.ClosestIndex(xmat, xx); int yind = ArrayUtils.ClosestIndex(ymat, yy); dvals[i] = values[xind, yind]; pvals[i] = percvalues[xind, yind]; } else{ dvals[i] = double.NaN; pvals[i] = double.NaN; } } string xname = GetColumnName(mdata, colIndx[k]); string yname = GetColumnName(mdata, colIndy[k]); mdata.AddNumericColumn("Density_" + xname + "_" + yname, "Density of data points in the plane spanned by the columns " + xname + " and " + yname + ".", dvals); mdata.AddNumericColumn("Excluded fraction_" + xname + "_" + yname, "Percentage of points with a point density smaller than at this point in the plane spanned by the columns " + xname + " and " + yname + ".", pvals); } }
public abstract IMatrixData ProcessData(IMatrixData[] inputData, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo);