Beispiel #1
0
        public void ReadMappingTest()
        {
            Assert.Inconclusive("Should be moved to integration tests, using conf");
            //[DeploymentItem("conf", "conf")]
            string[] baseNames, files;
            var      annots       = PerseusUtils.GetAvailableAnnots(out baseNames, out files);
            var      uniprotIndex = baseNames.ToList().FindIndex(name => name.ToLower().Equals("uniprot"));
            var      selection    = annots[uniprotIndex].ToList().FindIndex(name => name.ToLower().Equals("gene name"));
            var      ids          = new[] { "P08908" };
            var      mapping      = AddAnnotationToMatrix.ReadMapping(ids, files[1], new[] { selection });

            CollectionAssert.AreEqual(new [] { "HTR1A" }, mapping[ids[0]]);
        }
        private static string[] GetBaseIds(Parameters para, IDataWithAnnotationColumns mdata)
        {
            string[]      baseNames;
            AnnotType[][] types;
            string[]      files;
            PerseusUtils.GetAvailableAnnots(out baseNames, out types, out files);
            ParameterWithSubParams <int> spd = para.GetParamWithSubParams <int>("Source");
            int        ind     = spd.Value;
            Parameters param   = spd.GetSubParameters();
            int        baseCol = param.GetParam <int>(baseNames[ind] + " column").Value;

            string[] baseIds = mdata.StringColumns[baseCol];
            return(baseIds);
        }
 public void GetAvailableAnnotsTest()
 {
     string[] annotFiles = PerseusUtils.GetAnnotFiles();
     Assert.Inconclusive("Should be moved to integration tests, using conf");
     //[DeploymentItem("conf", "conf")]
     string[]   baseNames;
     string[]   files;
     string[][] annots = PerseusUtils.GetAvailableAnnots(out baseNames, out files);
     Assert.AreEqual(3, files.Length);
     Assert.AreEqual(3, baseNames.Length);
     Assert.AreEqual(3, annots.Length);
     CollectionAssert.AreEqual(new[] { "ENSG", "UniProt", "ENSG" }, baseNames);
     CollectionAssert.AreEqual(new[] { "Chromosome", "Base pair index", "Orientation" }, annots[0]);
 }
 private static string[][] GetAvailableTextAnnots(out string[] baseNames, out int[][] inds, out string[] files)
 {
     AnnotType[][] types;
     string[][]    annots = PerseusUtils.GetAvailableAnnots(out baseNames, out types, out files);
     inds = new int[files.Length][];
     for (int i = 0; i < files.Length; i++)
     {
         List <int> result = new List <int>();
         for (int j = 0; j < types[i].Length; j++)
         {
             if (types[i][j] == AnnotType.Text)
             {
                 result.Add(j);
             }
         }
         inds[i]   = result.ToArray();
         annots[i] = ArrayUtils.SubArray(annots[i], result);
     }
     return(annots);
 }
        public Parameters GetParameters(IMatrixData mdata, ref string errorString)
        {
            List <string> colChoice = mdata.StringColumnNames;

            string[]      baseNames;
            string[]      files;
            List <string> badFiles;

            string[][] annots = PerseusUtils.GetAvailableAnnots(out baseNames, out files, out badFiles);
            if (badFiles.Any())
            {
                errorString = $"Could not load annotations from file(s): {string.Join(", ", badFiles)}";
            }
            int  selFile     = 0;
            bool isMainAnnot = false;

            for (int i = 0; i < files.Length; i++)
            {
                if (files[i].ToLower().Contains("perseusannot"))
                {
                    selFile     = i;
                    isMainAnnot = true;
                    break;
                }
            }
            Parameters[] subParams = new Parameters[files.Length];
            for (int i = 0; i < subParams.Length; i++)
            {
                int colInd = 0;
                if (isMainAnnot && i == selFile)
                {
                    for (int j = 0; j < colChoice.Count; j++)
                    {
                        if (colChoice[j].ToUpper().Contains("PROTEIN IDS"))
                        {
                            colInd = j;
                            break;
                        }
                    }
                    for (int j = 0; j < colChoice.Count; j++)
                    {
                        if (colChoice[j].ToUpper().Contains("MAJORITY PROTEIN IDS"))
                        {
                            colInd = j;
                            break;
                        }
                    }
                }
                else
                {
                    for (int j = 0; j < colChoice.Count; j++)
                    {
                        if (colChoice[j].ToUpper().Contains(baseNames[i].ToUpper()))
                        {
                            colInd = j;
                            break;
                        }
                    }
                }
                subParams[i] =
                    new Parameters(
                        new SingleChoiceParam(baseNames[i] + " column")
                {
                    Values = colChoice,
                    Value  = colInd,
                    Help   =
                        "Specify here the column that contains the base identifiers which are going to be " +
                        "matched to the annotation."
                }, new MultiChoiceParam("Annotations to be added")
                {
                    Values = annots[i]
                });
            }
            return
                (new Parameters(
                     new SingleChoiceWithSubParams("Source", selFile)
            {
                Values = files,
                SubParams = subParams,
                ParamNameWidth = 136,
                TotalWidth = 735
            }, new MultiChoiceParam("Additional sources")
            {
                Values = files
            }));
        }
        public static bool ProcessDataAddAnnotation(int nrows, Parameters para, string[] baseIds, ProcessInfo processInfo,
                                                    out string[] name, out int[] catColInds, out int[] textColInds, out int[] numColInds, out string[][][] catCols,
                                                    out string[][] textCols, out double[][] numCols)
        {
            string[]      baseNames;
            AnnotType[][] types;
            string[]      files;
            string[][]    names              = PerseusUtils.GetAvailableAnnots(out baseNames, out types, out files);
            const bool    deHyphenate        = true;
            ParameterWithSubParams <int> spd = para.GetParamWithSubParams <int>("Source");
            int        ind   = spd.Value;
            Parameters param = spd.GetSubParameters();

            AnnotType[] type = types[ind];
            name = names[ind];
            int[] addtlSources = para.GetParam <int[]>("Additional sources").Value;
            addtlSources = ArrayUtils.Remove(addtlSources, ind);
            foreach (int addtlSource in addtlSources)
            {
                AnnotType[] type1 = types[addtlSource];
                string[]    name1 = names[addtlSource];
                if (!ArrayUtils.EqualArrays(type, type1))
                {
                    processInfo.ErrString = "Additional annotation file does not have the same column structure.";
                    catColInds            = new int[] {};
                    textColInds           = new int[] {};
                    numColInds            = new int[] {};
                    catCols  = new string[][][] {};
                    textCols = new string[][] {};
                    numCols  = new double[][] {};
                    return(false);
                }
                if (!ArrayUtils.EqualArrays(name, name1))
                {
                    processInfo.ErrString = "Additional annotation file does not have the same column structure.";
                    catColInds            = new int[] {};
                    textColInds           = new int[] {};
                    numColInds            = new int[] {};
                    catCols  = new string[][][] {};
                    textCols = new string[][] {};
                    numCols  = new double[][] {};
                    return(false);
                }
            }
            int[] selection = param.GetParam <int[]>("Annotations to be added").Value;
            type = ArrayUtils.SubArray(type, selection);
            name = ArrayUtils.SubArray(name, selection);
            HashSet <string> allIds = GetAllIds(baseIds, deHyphenate);
            Dictionary <string, string[]> mapping = ReadMapping(allIds, files[ind], selection);

            foreach (int addtlSource in addtlSources)
            {
                Dictionary <string, string[]> mapping1 = ReadMapping(allIds, files[addtlSource], selection);
                foreach (string key in mapping1.Keys.Where(key => !mapping.ContainsKey(key)))
                {
                    mapping.Add(key, mapping1[key]);
                }
            }
            SplitIds(type, out textColInds, out catColInds, out numColInds);
            catCols = new string[catColInds.Length][][];
            for (int i = 0; i < catCols.Length; i++)
            {
                catCols[i] = new string[nrows][];
            }
            textCols = new string[textColInds.Length][];
            for (int i = 0; i < textCols.Length; i++)
            {
                textCols[i] = new string[nrows];
            }
            numCols = new double[numColInds.Length][];
            for (int i = 0; i < numCols.Length; i++)
            {
                numCols[i] = new double[nrows];
            }
            for (int i = 0; i < nrows; i++)
            {
                string[]           ids     = baseIds[i].Length > 0 ? baseIds[i].Split(';') : new string[0];
                HashSet <string>[] catVals = new HashSet <string> [catCols.Length];
                for (int j = 0; j < catVals.Length; j++)
                {
                    catVals[j] = new HashSet <string>();
                }
                HashSet <string>[] textVals = new HashSet <string> [textCols.Length];
                for (int j = 0; j < textVals.Length; j++)
                {
                    textVals[j] = new HashSet <string>();
                }
                List <double>[] numVals = new List <double> [numCols.Length];
                for (int j = 0; j < numVals.Length; j++)
                {
                    numVals[j] = new List <double>();
                }
                foreach (string id in ids)
                {
                    if (mapping.ContainsKey(id))
                    {
                        string[] values = mapping[id];
                        AddCatVals(ArrayUtils.SubArray(values, catColInds), catVals);
                        AddTextVals(ArrayUtils.SubArray(values, textColInds), textVals);
                        AddNumVals(ArrayUtils.SubArray(values, numColInds), numVals);
                    }
                    else if (id.Contains("-"))
                    {
                        string q = id.Substring(0, id.IndexOf('-'));
                        if (mapping.ContainsKey(q))
                        {
                            string[] values = mapping[q];
                            AddCatVals(ArrayUtils.SubArray(values, catColInds), catVals);
                            AddTextVals(ArrayUtils.SubArray(values, textColInds), textVals);
                            AddNumVals(ArrayUtils.SubArray(values, numColInds), numVals);
                        }
                    }
                }
                for (int j = 0; j < catVals.Length; j++)
                {
                    string[] q = ArrayUtils.ToArray(catVals[j]);
                    Array.Sort(q);
                    catCols[j][i] = q;
                }
                for (int j = 0; j < textVals.Length; j++)
                {
                    string[] q = ArrayUtils.ToArray(textVals[j]);
                    Array.Sort(q);
                    textCols[j][i] = StringUtils.Concat(";", q);
                }
                for (int j = 0; j < numVals.Length; j++)
                {
                    numCols[j][i] = ArrayUtils.Median(numVals[j]);
                }
            }
            return(true);
        }