public void ReadMappingTest() { Assert.Inconclusive("Should be moved to integration tests, using conf"); //[DeploymentItem("conf", "conf")] string[] baseNames, files; var annots = PerseusUtils.GetAvailableAnnots(out baseNames, out files); var uniprotIndex = baseNames.ToList().FindIndex(name => name.ToLower().Equals("uniprot")); var selection = annots[uniprotIndex].ToList().FindIndex(name => name.ToLower().Equals("gene name")); var ids = new[] { "P08908" }; var mapping = AddAnnotationToMatrix.ReadMapping(ids, files[1], new[] { selection }); CollectionAssert.AreEqual(new [] { "HTR1A" }, mapping[ids[0]]); }
private static string[] GetBaseIds(Parameters para, IDataWithAnnotationColumns mdata) { string[] baseNames; AnnotType[][] types; string[] files; PerseusUtils.GetAvailableAnnots(out baseNames, out types, out files); ParameterWithSubParams <int> spd = para.GetParamWithSubParams <int>("Source"); int ind = spd.Value; Parameters param = spd.GetSubParameters(); int baseCol = param.GetParam <int>(baseNames[ind] + " column").Value; string[] baseIds = mdata.StringColumns[baseCol]; return(baseIds); }
public void GetAvailableAnnotsTest() { string[] annotFiles = PerseusUtils.GetAnnotFiles(); Assert.Inconclusive("Should be moved to integration tests, using conf"); //[DeploymentItem("conf", "conf")] string[] baseNames; string[] files; string[][] annots = PerseusUtils.GetAvailableAnnots(out baseNames, out files); Assert.AreEqual(3, files.Length); Assert.AreEqual(3, baseNames.Length); Assert.AreEqual(3, annots.Length); CollectionAssert.AreEqual(new[] { "ENSG", "UniProt", "ENSG" }, baseNames); CollectionAssert.AreEqual(new[] { "Chromosome", "Base pair index", "Orientation" }, annots[0]); }
private static string[][] GetAvailableTextAnnots(out string[] baseNames, out int[][] inds, out string[] files) { AnnotType[][] types; string[][] annots = PerseusUtils.GetAvailableAnnots(out baseNames, out types, out files); inds = new int[files.Length][]; for (int i = 0; i < files.Length; i++) { List <int> result = new List <int>(); for (int j = 0; j < types[i].Length; j++) { if (types[i][j] == AnnotType.Text) { result.Add(j); } } inds[i] = result.ToArray(); annots[i] = ArrayUtils.SubArray(annots[i], result); } return(annots); }
public Parameters GetParameters(IMatrixData mdata, ref string errorString) { List <string> colChoice = mdata.StringColumnNames; string[] baseNames; string[] files; List <string> badFiles; string[][] annots = PerseusUtils.GetAvailableAnnots(out baseNames, out files, out badFiles); if (badFiles.Any()) { errorString = $"Could not load annotations from file(s): {string.Join(", ", badFiles)}"; } int selFile = 0; bool isMainAnnot = false; for (int i = 0; i < files.Length; i++) { if (files[i].ToLower().Contains("perseusannot")) { selFile = i; isMainAnnot = true; break; } } Parameters[] subParams = new Parameters[files.Length]; for (int i = 0; i < subParams.Length; i++) { int colInd = 0; if (isMainAnnot && i == selFile) { for (int j = 0; j < colChoice.Count; j++) { if (colChoice[j].ToUpper().Contains("PROTEIN IDS")) { colInd = j; break; } } for (int j = 0; j < colChoice.Count; j++) { if (colChoice[j].ToUpper().Contains("MAJORITY PROTEIN IDS")) { colInd = j; break; } } } else { for (int j = 0; j < colChoice.Count; j++) { if (colChoice[j].ToUpper().Contains(baseNames[i].ToUpper())) { colInd = j; break; } } } subParams[i] = new Parameters( new SingleChoiceParam(baseNames[i] + " column") { Values = colChoice, Value = colInd, Help = "Specify here the column that contains the base identifiers which are going to be " + "matched to the annotation." }, new MultiChoiceParam("Annotations to be added") { Values = annots[i] }); } return (new Parameters( new SingleChoiceWithSubParams("Source", selFile) { Values = files, SubParams = subParams, ParamNameWidth = 136, TotalWidth = 735 }, new MultiChoiceParam("Additional sources") { Values = files })); }
public static bool ProcessDataAddAnnotation(int nrows, Parameters para, string[] baseIds, ProcessInfo processInfo, out string[] name, out int[] catColInds, out int[] textColInds, out int[] numColInds, out string[][][] catCols, out string[][] textCols, out double[][] numCols) { string[] baseNames; AnnotType[][] types; string[] files; string[][] names = PerseusUtils.GetAvailableAnnots(out baseNames, out types, out files); const bool deHyphenate = true; ParameterWithSubParams <int> spd = para.GetParamWithSubParams <int>("Source"); int ind = spd.Value; Parameters param = spd.GetSubParameters(); AnnotType[] type = types[ind]; name = names[ind]; int[] addtlSources = para.GetParam <int[]>("Additional sources").Value; addtlSources = ArrayUtils.Remove(addtlSources, ind); foreach (int addtlSource in addtlSources) { AnnotType[] type1 = types[addtlSource]; string[] name1 = names[addtlSource]; if (!ArrayUtils.EqualArrays(type, type1)) { processInfo.ErrString = "Additional annotation file does not have the same column structure."; catColInds = new int[] {}; textColInds = new int[] {}; numColInds = new int[] {}; catCols = new string[][][] {}; textCols = new string[][] {}; numCols = new double[][] {}; return(false); } if (!ArrayUtils.EqualArrays(name, name1)) { processInfo.ErrString = "Additional annotation file does not have the same column structure."; catColInds = new int[] {}; textColInds = new int[] {}; numColInds = new int[] {}; catCols = new string[][][] {}; textCols = new string[][] {}; numCols = new double[][] {}; return(false); } } int[] selection = param.GetParam <int[]>("Annotations to be added").Value; type = ArrayUtils.SubArray(type, selection); name = ArrayUtils.SubArray(name, selection); HashSet <string> allIds = GetAllIds(baseIds, deHyphenate); Dictionary <string, string[]> mapping = ReadMapping(allIds, files[ind], selection); foreach (int addtlSource in addtlSources) { Dictionary <string, string[]> mapping1 = ReadMapping(allIds, files[addtlSource], selection); foreach (string key in mapping1.Keys.Where(key => !mapping.ContainsKey(key))) { mapping.Add(key, mapping1[key]); } } SplitIds(type, out textColInds, out catColInds, out numColInds); catCols = new string[catColInds.Length][][]; for (int i = 0; i < catCols.Length; i++) { catCols[i] = new string[nrows][]; } textCols = new string[textColInds.Length][]; for (int i = 0; i < textCols.Length; i++) { textCols[i] = new string[nrows]; } numCols = new double[numColInds.Length][]; for (int i = 0; i < numCols.Length; i++) { numCols[i] = new double[nrows]; } for (int i = 0; i < nrows; i++) { string[] ids = baseIds[i].Length > 0 ? baseIds[i].Split(';') : new string[0]; HashSet <string>[] catVals = new HashSet <string> [catCols.Length]; for (int j = 0; j < catVals.Length; j++) { catVals[j] = new HashSet <string>(); } HashSet <string>[] textVals = new HashSet <string> [textCols.Length]; for (int j = 0; j < textVals.Length; j++) { textVals[j] = new HashSet <string>(); } List <double>[] numVals = new List <double> [numCols.Length]; for (int j = 0; j < numVals.Length; j++) { numVals[j] = new List <double>(); } foreach (string id in ids) { if (mapping.ContainsKey(id)) { string[] values = mapping[id]; AddCatVals(ArrayUtils.SubArray(values, catColInds), catVals); AddTextVals(ArrayUtils.SubArray(values, textColInds), textVals); AddNumVals(ArrayUtils.SubArray(values, numColInds), numVals); } else if (id.Contains("-")) { string q = id.Substring(0, id.IndexOf('-')); if (mapping.ContainsKey(q)) { string[] values = mapping[q]; AddCatVals(ArrayUtils.SubArray(values, catColInds), catVals); AddTextVals(ArrayUtils.SubArray(values, textColInds), textVals); AddNumVals(ArrayUtils.SubArray(values, numColInds), numVals); } } } for (int j = 0; j < catVals.Length; j++) { string[] q = ArrayUtils.ToArray(catVals[j]); Array.Sort(q); catCols[j][i] = q; } for (int j = 0; j < textVals.Length; j++) { string[] q = ArrayUtils.ToArray(textVals[j]); Array.Sort(q); textCols[j][i] = StringUtils.Concat(";", q); } for (int j = 0; j < numVals.Length; j++) { numCols[j][i] = ArrayUtils.Median(numVals[j]); } } return(true); }