コード例 #1
0
        public static void AppendProteinDescriptionToMascotReport(string csvMascotFile, string fastaFile, string csvFileOut)
        {
            vsCSV       csvMascot = new vsCSV(csvMascotFile);
            vsCSVWriter writer    = new vsCSVWriter(csvFileOut);

            try
            {
                FileStream fs;
                try
                {
                    fs = new FileStream(fastaFile, FileMode.Open, FileAccess.Read, FileShare.Read);
                }
                catch (System.Exception)
                {
                    fs = new FileStream(fastaFile, FileMode.Open, FileAccess.Read, FileShare.ReadWrite);
                }

                Dictionary <string, string> DicOfProt = new Dictionary <string, string>();
                foreach (string line in csvMascot.LINES_LIST)
                {
                    string[] splits = line.Split(',');
                    if (splits.Length > 2 && !DicOfProt.ContainsKey(splits[2]))
                    {
                        DicOfProt.Add(splits[2], "");
                    }
                }
                using (StreamReader sr = new StreamReader(fs))
                {
                    string line;
                    while ((line = sr.ReadLine()) != null)
                    {
                        if (line.StartsWith(">"))
                        {
                            string[] split = line.Substring(1).Split(' ');
                            if (DicOfProt.ContainsKey(split[0]))
                            {
                                DicOfProt[split[0]] = line.Substring(split[0].Length + 1);
                            }
                        }
                    }
                }
                foreach (string line in csvMascot.LINES_LIST)
                {
                    string[] splits      = line.Split(',');
                    string   lineToWrite = line;
                    if (splits.Length > 2 && DicOfProt.ContainsKey(splits[2]))
                    {
                        lineToWrite += "," + DicOfProt[splits[2]];
                    }
                    writer.AddLine(lineToWrite);
                }
                writer.WriteToFile();
            }
            catch (System.Exception ex)
            {
                Console.WriteLine(ex.Message);
                Console.WriteLine(ex.StackTrace);
            }
        }
コード例 #2
0
ファイル: Sample.cs プロジェクト: olivierlizotte/Trinity.Main
        /*
        public static vsSDF LoadSDF(Sample project)
        {
            try
            {
                //if (project.m_vsSDF == null || string.Compare(project.m_vsSDF.m_name, project.sSDF) != 0)
                //{
                    vsSDF result = null;
                    result = vsSDF.Load(project.sSDF, null);
                    result.MZ_GAPS = Proteomics.Cluster.Scoring.MzTol(result.MAX_MZ_VALUE);
                    project.sSDF = result.m_name;//Change the name in case there was misspelling fixed by the SDF.Load method
                    //project.m_vsSDF = result;
                //}
                    return result;// project.m_vsSDF;
            }
            catch (Exception ex)
            {
                Sol.CONSOLE.OutputLine("Error jk302me :" + ex.Message + "    " + ex.StackTrace);
                return null;
            }
        }//*/
        
        private void loadProjectFile(string project_file, int maxFractionSpreading)
        {
            // current line number in the project csv file
            int lineNum = 0;
            //vsList<stCondition> iterativeHelper = null;
            try
            {
                this.Clear();

                // project csv file is comma delimited
                char[] splitter = { ',' };
                
                vsCSV csvProject = new vsCSV(project_file);
                for (int i = 0; i < csvProject.LINES_LIST.Count; i++)
                {
                    try
                    {
                        string line = csvProject.LINES_LIST[i];
                        lineNum++;

                        string[] lineParts = line.Split(splitter);

                        // skip header line
                        if (lineNum == 1)
                        {
                            continue;
                        }

                        // project csv file format: REPLICATE,REPLICATE,FRACTION,peptide map location, sdf file location
                        // ex. 1,1,1,c:\work\adhoc\Promix_070706\Promix_10_070706_Out_peptides.csv,c:\work\adhoc\Promix_070706\Promix_10_070706_Out.sdf
                        int condition = int.Parse(lineParts[0]);
                        int replicate = int.Parse(lineParts[1]);
                        int fraction = int.Parse(lineParts[2]);
                        string peptideMap = lineParts[3];
                        string sdf = lineParts[4];
                        string nameCol = "";
                        if (lineParts.Length >= 6)
                            nameCol = lineParts[5];

                        // KE Aug 7 2008 - lower case
                        //OLI Why lower case? Files does not exist under linux!
                        this.Add(new Sample(condition, replicate, fraction, peptideMap, sdf, maxFractionSpreading, nameCol));
                    }
                    catch (System.Exception ex)
                    {
                        dbOptions.ConSole.WriteLine("Error sfj4aau34 : LineNum = " + lineNum + "   " + ex.Message + "    " + ex.StackTrace);
                    }
                }
            }
            catch (Exception ex)
            {
                dbOptions.ConSole.WriteLine("Error sfj4u34 : " + ex.Message + "  \n  " + ex.StackTrace);
            }
            //ITERATIVE_HELPER = iterativeHelper;
//            GenerateIterativeHelper();
  //          return iterativeHelper;
        }
コード例 #3
0
        public static void FilterFromIDs(string fastaFileIn, string fastaFileOut, string csvFile)
        {
            vsCSV csv = new vsCSV(csvFile);

            try
            {
                FileStream fs;
                try
                {
                    fs = new FileStream(fastaFileIn, FileMode.Open, FileAccess.Read, FileShare.Read);
                }
                catch (System.Exception)
                {
                    fs = new FileStream(fastaFileIn, FileMode.Open, FileAccess.Read, FileShare.ReadWrite);
                }

                FileStream fsOut = new FileStream(fastaFileOut, FileMode.CreateNew);
                using (StreamWriter sw = new StreamWriter(fsOut))
                {
                    using (StreamReader sr = new StreamReader(fs))
                    {
                        string line;
                        bool   ignore = true;
                        while ((line = sr.ReadLine()) != null)
                        {
                            if (line.StartsWith(">"))
                            {
                                ignore = true;
                                foreach (string id in csv.LINES_LIST)
                                {
                                    if (line.Contains(id))
                                    {
                                        ignore = false;
                                        break;
                                    }
                                }
                            }
                            if (!ignore)
                            {
                                sw.WriteLine(line);
                            }
                        }
                    }
                }
                fsOut.Close();
                fs.Close();
            }
            catch (System.Exception ex)
            {
                Console.WriteLine(ex.Message);
                Console.WriteLine(ex.StackTrace);
            }
        }
コード例 #4
0
ファイル: Tests.cs プロジェクト: olivierlizotte/Trinity.Main
        public static bool MascotCompare()
        {
            List <string> listMascotFiles = new List <string>();

            listMascotFiles.Add(@"N:\Thibault\Frederic Lamoliatte\Olivier\no MSed.csv");
            listMascotFiles.Add(@"N:\Thibault\Frederic Lamoliatte\Olivier\MSed1.csv");
            listMascotFiles.Add(@"N:\Thibault\Frederic Lamoliatte\Olivier\MSed2.csv");

            Dictionary <string, string[]> dicOfPep = new Dictionary <string, string[]>();

            for (int i = 0; i < listMascotFiles.Count; i++)
            {
                vsCSV csv = new vsCSV(listMascotFiles[i]);
                foreach (string line in csv.LINES_LIST)
                {
                    string[] splits = line.Split(vsCSV._Generic_Separator);
                    if (splits.Length > 26)
                    {
                        string key = splits[1] + "," + splits[33];
                        if (!dicOfPep.ContainsKey(key))//raw+scan+seq+mod
                        {
                            dicOfPep.Add(key, new string[3]);
                        }

                        dicOfPep[key][i] = "," + splits[13] + "," + splits[14] + "," + splits[18] + "," + splits[26];
                    }
                }
            }

            vsCSVWriter writer = new vsCSVWriter(@"C:\_IRIC\DATA\Sumo\outputCompare.csv");

            foreach (string key in dicOfPep.Keys)
            {
                string str = key;
                for (int i = 0; i < listMascotFiles.Count; i++)
                {
                    if (dicOfPep[key][i] != null)
                    {
                        str += dicOfPep[key][i];
                    }
                    else
                    {
                        str += ",,,,";
                    }
                }
                writer.AddLine(str);
            }
            writer.WriteToFile();

            return(true);
        }
コード例 #5
0
        public static void AddIPIToUbiPredFile(string fastaFile, string csvUbiFile, string csvFileOut)
        {
            vsCSV       csv    = new vsCSV(csvUbiFile);
            vsCSVWriter writer = new vsCSVWriter(csvFileOut);

            try
            {
                FileStream fs;
                try
                {
                    fs = new FileStream(fastaFile, FileMode.Open, FileAccess.Read, FileShare.Read);
                }
                catch (System.Exception)
                {
                    fs = new FileStream(fastaFile, FileMode.Open, FileAccess.Read, FileShare.ReadWrite);
                }

                Dictionary <int, string> IDs = new Dictionary <int, string>();
                using (StreamReader sr = new StreamReader(fs))
                {
                    int    idNb = 0;
                    string line;
                    while ((line = sr.ReadLine()) != null)
                    {
                        if (line.StartsWith(">"))
                        {
                            idNb++;

                            foreach (string csvLine in csv.LINES_LIST)
                            {
                                string[] splits = csvLine.Split(vsCSV._Generic_Separator);
                                if (line.Contains(splits[1]))
                                {
                                    writer.AddLine(csvLine + "," + line.Substring(splits[1].Length + 1, 13));
                                }
                            }
                        }
                    }
                }
                fs.Close();
                writer.WriteToFile();
            }
            catch (System.Exception ex)
            {
                Console.WriteLine(ex.Message);
                Console.WriteLine(ex.StackTrace);
            }
        }
コード例 #6
0
ファイル: Tests.cs プロジェクト: olivierlizotte/Trinity.Main
        public static Dictionary <string, List <int> > ReadZNF(string filename)
        {
            vsCSV csv = new vsCSV(filename);
            Dictionary <string, List <int> > dicOfSites = new Dictionary <string, List <int> >();

            for (int i = 0; i < csv.LINES_LIST.Count; i++)
            {
                string[] splits = csv.LINES_LIST[i].Split(vsCSV._Generic_Separator);
                if (!dicOfSites.ContainsKey(splits[4] + "|" + splits[3]))
                {
                    dicOfSites.Add(splits[4] + "|" + splits[3], new List <int>());
                }
                dicOfSites[splits[4] + "|" + splits[3]].Add(int.Parse(splits[3]));
            }
            return(dicOfSites);
        }
コード例 #7
0
        public static Spectra Import(string filenameMSMS, string filenameTracks, DBOptions dbOptions)
        {
            Spectra spectra = new Spectra();
            vsCSV   csv     = new vsCSV(filenameMSMS);

            if (csv.LINES_LIST.Count == 0 || csv.LINES_LIST[0].CompareTo(ProductSpectrum.TITLE) != 0)
            {
                return(null);
            }
            for (int i = 1; i < csv.LINES_LIST.Count; i++)
            {
                string[] splits  = csv.LINES_LIST[i].Split(vsCSV._Generic_Separator);
                double   mz      = double.Parse(splits[3]);
                int      charge  = int.Parse(splits[5]);
                int      nbPeaks = int.Parse(splits[9]);
                GraphML_List <MsMsPeak> peaks = new GraphML_List <MsMsPeak>(nbPeaks);
                i++;
                for (int j = 0; j < nbPeaks; i++, j++)
                {
                    try
                    {
                        string[] splitPeaks = csv.LINES_LIST[i].Split('\t');
                        if (splitPeaks.Length > 2)
                        {
                            peaks.Add(new MsMsPeak(double.Parse(splitPeaks[0]), double.Parse(splitPeaks[1]), int.Parse(splitPeaks[2])));
                        }
                        else
                        {
                            peaks.Add(new MsMsPeak(double.Parse(splitPeaks[0]), double.Parse(splitPeaks[1]), 0));
                        }
                    }
                    catch (Exception)
                    {
                        dbOptions.ConSole.WriteLine("Error parsing line : " + csv.LINES_LIST[i]);
                    }
                }
                spectra.AddMSMS(new ProductSpectrum(int.Parse(splits[0]), double.Parse(splits[1]), splits[2], mz, double.Parse(splits[4]), charge, Proteomics.Utilities.Numerics.MassFromMZ(mz, charge), peaks, double.Parse(splits[8]), double.Parse(splits[10]), double.Parse(splits[11])));
            }
            if (!string.IsNullOrEmpty(filenameTracks))
            {
                spectra.tracks = Tracks.Import(filenameTracks, dbOptions);
            }
            return(spectra);
        }
コード例 #8
0
        public static void AppendUbiPredToMascotReport(string csvUbiFile, string csvMascotFile, string csvFileOut)
        {
            vsCSV       csvUbi    = new vsCSV(csvUbiFile);
            vsCSV       csvMascot = new vsCSV(csvMascotFile);
            vsCSVWriter writer    = new vsCSVWriter(csvFileOut);

            try
            {
                foreach (string lineMascot in csvMascot.LINES_LIST)
                {
                    string strToAppend = "";
                    try
                    {
                        string[] mSplits = lineMascot.Split(vsCSV._Generic_Separator);
                        if (mSplits.Length >= 17 && !lineMascot.StartsWith("Search"))
                        {
                            int indexStart = int.Parse(mSplits[16]);
                            int indexStop  = int.Parse(mSplits[17]);
                            foreach (string lineUbi in csvUbi.LINES_LIST)
                            {
                                string[] splits   = lineUbi.Split(vsCSV._Generic_Separator);
                                int      indexUbi = int.Parse(splits[2]);
                                if (splits[4].Contains(mSplits[3]) && indexUbi >= indexStart && indexUbi <= indexStop)
                                {
                                    strToAppend += "," + splits[2] + "," + splits[3];
                                }
                            }
                        }
                    }
                    catch (System.Exception ex)
                    {
                        Console.WriteLine(ex.Message);
                        Console.WriteLine(ex.StackTrace);
                    }
                    writer.AddLine(lineMascot + strToAppend);
                }
                writer.WriteToFile();
            }
            catch (System.Exception ex)
            {
                Console.WriteLine(ex.Message);
                Console.WriteLine(ex.StackTrace);
            }
        }
コード例 #9
0
ファイル: Tracks.cs プロジェクト: olivierlizotte/Trinity.Main
        public static Tracks Import(string filename, DBOptions dbOptions)
        {
            vsCSV csv = new vsCSV(filename);

            if (csv.LINES_LIST.Count == 0 || csv.LINES_LIST[0].CompareTo(Track.TITLE) != 0)
            {
                return(null);
            }
            Tracks tracks = new Tracks();

            for (int i = 1; i < csv.LINES_LIST.Count; i++)
            {
                try
                {
                    string[] splits = csv.LINES_LIST[i].Split(vsCSV._Generic_Separator);
                    tracks.AddTrack(double.Parse(splits[0]), double.Parse(splits[1]), double.Parse(splits[3]), double.Parse(splits[4]), double.Parse(splits[2]));
                }
                catch (Exception)
                {
                    dbOptions.ConSole.WriteLine("Error parsing line : " + csv.LINES_LIST[i]);
                }
            }
            return(tracks);
        }
コード例 #10
0
ファイル: Tests.cs プロジェクト: olivierlizotte/Trinity.Main
        public static void YangLiuPeptidesWithAllProteins()
        {
            //vsCSV csvPeptides = new vsCSV(@"G:\Thibault\-=Proteomics_Raw_Data=-\ELITE\DEC18_2012\DMatton\Clustering_186716\Identifications.csv");
            vsCSV       csvPeptides = new vsCSV(@"G:\Thibault\-=Proteomics_Raw_Data=-\ELITE\DEC18_2012\DMatton\Clustering_186716\Cluster_Intensity_peptides_NormP.csv");
            vsCSVWriter writer      = new vsCSVWriter(@"G:\Thibault\-=Proteomics_Raw_Data=-\ELITE\DEC18_2012\DMatton\Clustering_186716\ProteinsPerPeptidesFromDatabases_AllReadingFrames.csv");

            NucleicAcid.InitHash();

            FileStream     protein_fasta_database1 = new FileStream(@"G:\Thibault\Olivier\Databases\DMatton\Matton_Illumina_Anthesis_WithReverse.fasta", FileMode.Open, FileAccess.Read, FileShare.Read);
            List <Protein> proteins1 = new List <Protein>(ProteinFastaReader.ReadProteins(protein_fasta_database1, false));
            Dictionary <string, List <string> > protein1AAs = new Dictionary <string, List <string> >();

            foreach (Protein prot in proteins1)
            {
                for (int shift = 0; shift < 3; shift++)
                {
                    protein1AAs.Add(prot.Description + " | Reading Frame " + shift + " | Forward", NucleicAcid.ConvertNA3ToAAs(prot.BaseSequence, shift, false));
                    protein1AAs.Add(prot.Description + " | Reading Frame " + shift + " | Backward", NucleicAcid.ConvertNA3ToAAs(prot.BaseSequence, shift, true));
                }
            }

            FileStream     protein_fasta_database2 = new FileStream(@"G:\Thibault\Olivier\Databases\DMatton\mattond_20110418_WithReverse_EditedJuly2013.fasta", FileMode.Open, FileAccess.Read, FileShare.Read);
            List <Protein> proteins2 = new List <Protein>(ProteinFastaReader.ReadProteins(protein_fasta_database2, false));
            Dictionary <string, List <string> > protein2AAs = new Dictionary <string, List <string> >();

            foreach (Protein prot in proteins2)
            {
                for (int shift = 0; shift < 3; shift++)
                {
                    protein2AAs.Add(prot.Description + " | Reading Frame " + shift + " | Forward", NucleicAcid.ConvertNA3ToAAs(prot.BaseSequence, shift, false));
                    protein2AAs.Add(prot.Description + " | Reading Frame " + shift + " | Backward", NucleicAcid.ConvertNA3ToAAs(prot.BaseSequence, shift, true));
                }
            }

            writer.AddLine(csvPeptides.LINES_LIST[0]);
            Dictionary <string, List <string> > dicOfPepProt = new Dictionary <string, List <string> >();

            for (int i = 1; i < csvPeptides.LINES_LIST.Count; i++)
            {
                string[] splits = csvPeptides.LINES_LIST[i].Split(vsCSV._Generic_Separator);
                string   seq    = splits[4];
                //string seq = splits[13];

                /*
                 * string protDesc = splits[10];
                 * if (protein1AAs.ContainsKey(protDesc))
                 *  if (!protein1AAs[protDesc].Contains(seq))
                 *      Console.WriteLine("Should be there 1");
                 *
                 * if (protein2AAs.ContainsKey(protDesc))
                 *  if (!protein2AAs[protDesc].Contains(seq))
                 *      Console.WriteLine("Should be there 1");
                 * //*/

                StringBuilder sb = new StringBuilder();
                foreach (string key in protein1AAs.Keys)
                {
                    foreach (string protSeq in protein1AAs[key])
                    {
                        if (protSeq.Contains(seq))
                        {
                            sb.Append(key + ";");
                            break;
                        }
                    }
                }

                foreach (string key in protein2AAs.Keys)
                {
                    foreach (string protSeq in protein2AAs[key])
                    {
                        if (protSeq.Contains(seq))
                        {
                            sb.Append(key + ";");
                            break;
                        }
                    }
                }

                if (sb.Length == 0)
                {
                    Console.WriteLine("Zut");
                }
                writer.AddLine(csvPeptides.LINES_LIST[i] + "," + sb.ToString().Trim());
            }
            writer.WriteToFile();
        }
コード例 #11
0
ファイル: Tests.cs プロジェクト: olivierlizotte/Trinity.Main
        public static bool LysineConservation()
        {
            List <string> zincIDs = GetZincProteinsENSP();
            Dictionary <string, List <int> > dicOfSites = ReadZNF(@"C:\_IRIC\DATA\Sumo\ZNF.csv");
            string csvMatrix          = @"C:\_IRIC\DATA\Sumo\matrix_human.tsv";
            string csvToAnnotate      = @"C:\_IRIC\DATA\Sumo\liste des sites SUMO.csv";
            string output             = @"C:\_IRIC\DATA\Sumo\outputL.csv";
            string outputConservation = @"C:\_IRIC\DATA\Sumo\outputConservation.csv";
            string outputAll          = @"C:\_IRIC\DATA\Sumo\outputConservationAll_b.csv";
            vsCSV  csvM = new vsCSV(csvMatrix);
            vsCSV  csvA = new vsCSV(csvToAnnotate);

            Dictionary <string, int> dicOfAnnotates = new Dictionary <string, int>();

            for (int i = 1; i < csvA.LINES_LIST.Count; i++)
            {
                string[] items = csvA.LINES_LIST[i].Split(vsCSV._Generic_Separator);
                dicOfAnnotates.Add(items[0] + items[5], i);
            }

            vsCSVWriter writer = new vsCSVWriter(output);

            writer.AddLine(csvA.getFirstLine());

            Dictionary <string, List <double> > dicOfAllAA = new Dictionary <string, List <double> >();
            vsCSVWriter   writerConsAll = new vsCSVWriter(outputAll);
            StringBuilder sb            = new StringBuilder();

            for (char aa = 'A'; aa <= 'Z'; aa++)
            {
                dicOfAllAA.Add(aa.ToString(), new List <double>());
                sb.Append(aa + ",");
            }
            sb.Append("SpecialK,ZincEveryWhere,ZincSumo");
            dicOfAllAA.Add("SpecialK", new List <double>());
            dicOfAllAA.Add("ZincEveryWhere", new List <double>());
            dicOfAllAA.Add("ZincSumo", new List <double>());
            writerConsAll.AddLine(sb.ToString());

            writer.AddLine("AminoAcid,Some Number");
            Dictionary <string, double> dicOfAA       = new Dictionary <string, double>();
            Dictionary <string, int>    dicOfAANumber = new Dictionary <string, int>();

            int           nbK = 0;
            List <double> echantillonageNoZero = new List <double>();
            double        value = 0;

            for (int j = 0; j < csvM.LINES_LIST.Count; j++)
            {
                string[] splitsJ = csvM.LINES_LIST[j].Split('\t');
                string   aa      = splitsJ[3];
                if (!dicOfAA.ContainsKey(aa))
                {
                    dicOfAANumber.Add(aa, 0);
                    dicOfAA.Add(aa, 0);
                }
                value = -1;
                if (double.TryParse(splitsJ[5], out value))
                {
                    dicOfAA[aa] += value;
                    dicOfAANumber[aa]++;

                    dicOfAllAA[aa].Add(value);
                }

                //string ensbJ = splitsJ[1];
                //int positionJ = int.Parse(splitsJ[2]);
                if ("K".CompareTo(aa) == 0)
                {
                    value = -1;
                    if (double.TryParse(splitsJ[5], out value))
                    {
                        bool found = false;
                        foreach (string id in zincIDs)
                        {
                            if (id.CompareTo(splitsJ[1]) == 0)
                            {
                                found = true;
                            }
                        }
                        if (found)
                        {
                            dicOfAllAA["ZincEveryWhere"].Add(value);
                        }

                        if (dicOfSites.ContainsKey(splitsJ[1] + "|" + splitsJ[2]))
                        {
                            dicOfAllAA["ZincSumo"].Add(value);
                        }

                        echantillonageNoZero.Add(value);

                        if (dicOfAnnotates.ContainsKey(splitsJ[1] + splitsJ[2]))
                        {
                            if (!dicOfAA.ContainsKey("SpecialK"))
                            {
                                dicOfAANumber.Add("SpecialK", 0);
                                dicOfAA.Add("SpecialK", 0);
                            }
                            dicOfAA["SpecialK"] += value;
                            dicOfAANumber["SpecialK"]++;
                            dicOfAllAA["SpecialK"].Add(value);
                            nbK++;
                            writer.AddLine(csvA.LINES_LIST[dicOfAnnotates[splitsJ[1] + splitsJ[2]]] + "," + csvM.LINES_LIST[j].Replace('\t', ','));
                        }
                    }
                }
            }
            writer.WriteToFile();

            vsCSVWriter writerCons = new vsCSVWriter(outputConservation);

            foreach (string key in dicOfAA.Keys)
            {
                writerCons.AddLine(key + "," + dicOfAA[key] / (double)dicOfAANumber[key]);
            }

            double meanNoZero = 0;
            Random r          = new Random();

            for (int i = 0; i < nbK; i++)
            {
                int index = (int)Math.Floor(r.NextDouble() * (echantillonageNoZero.Count - 1));
                meanNoZero += echantillonageNoZero[index];
            }
            writerCons.AddLine("Echantillonnage K," + meanNoZero / (double)nbK);
            writerCons.WriteToFile();

            int  lineIndex = 0;
            bool keepGoing = true;

            while (keepGoing)
            {
                StringBuilder sb2 = new StringBuilder();
                keepGoing = false;
                foreach (string key in dicOfAllAA.Keys)
                {
                    if (lineIndex < dicOfAllAA[key].Count)
                    {
                        sb2.Append(dicOfAllAA[key][lineIndex] + ",");
                        keepGoing = true;
                    }
                    else
                    {
                        sb2.Append(",");
                    }
                }
                writerConsAll.AddLine(sb2.ToString());
                lineIndex++;
            }
            writerConsAll.WriteToFile();
            return(true);
        }
コード例 #12
0
        public static void Launch(IConSol console)
        {
            vsCSV csv = new vsCSV(@"C:\Users\caronlio\Downloads\Via.Science.Pre.Interview.Assignment.Data.2013.10.18.csv");
            Dictionary <DateTime, List <Variable> > DicOfTime = new Dictionary <DateTime, List <Variable> >();
            Dictionary <string, List <Variable> >   DicOfVar  = new Dictionary <string, List <Variable> >();

            //Data sorted based on date
            for (int i = 1; i < csv.LINES_LIST.Count; i++)
            {
                Variable tmpVar = new Variable(csv.LINES_LIST[i]);
                if (!DicOfTime.ContainsKey(tmpVar.time))
                {
                    DicOfTime.Add(tmpVar.time, new List <Variable>());
                }
                DicOfTime[tmpVar.time].Add(tmpVar);

                if (!DicOfVar.ContainsKey(tmpVar.name))
                {
                    DicOfVar.Add(tmpVar.name, new List <Variable>());
                }
                DicOfVar[tmpVar.name].Add(tmpVar);
            }

            foreach (string name in DicOfVar.Keys)
            {
                InterpolateMissingValues(name, DicOfTime, DicOfVar);
            }

            //Rebuild DicOfVar
            DicOfVar.Clear();
            foreach (List <Variable> list in DicOfTime.Values)
            {
                foreach (Variable variable in list)
                {
                    if (!DicOfVar.ContainsKey(variable.name))
                    {
                        DicOfVar.Add(variable.name, new List <Variable>());
                    }
                    DicOfVar[variable.name].Add(variable);
                }
            }

            //Compute Normalized values
            NormalizeVariables(DicOfVar);

            //Foreach variable, compare correlation with the "price" variable
            List <double> prices = GetArrayofNormed("price", DicOfTime, DicOfVar);
            Dictionary <string, double> DicOfCorrelation = new Dictionary <string, double>();

            foreach (string name in DicOfVar.Keys)
            {
                List <double> normedVals = GetArrayofNormed(name, DicOfTime, DicOfVar);
                double        corr       = MathNet.Numerics.Statistics.Correlation.Pearson(prices, normedVals);
                if (name.CompareTo("price") == 0)
                {
                    Console.WriteLine("test");
                }
                DicOfCorrelation.Add(name, corr);
            }

            //Prediction
            vsCSVWriter output = new vsCSVWriter(@"C:\_IRIC\predictions.csv");

            output.AddLine("Time,Price,Prediction");
            foreach (DateTime time in DicOfTime.Keys)
            {
                double pred = 0;
                foreach (string name in DicOfCorrelation.Keys)
                {
                    if (name.CompareTo("price") != 0)
                    {
                        foreach (Variable v in DicOfTime[time])
                        {
                            if (v.name.CompareTo(name) == 0)
                            {
                                pred += DicOfCorrelation[name] * v.normValue;
                            }
                        }
                    }
                }
                pred *= 100000;
                output.AddLine(pred.ToString());
            }
            output.WriteToFile();

            //Export a csv of the varialbes, ordered by date
            ExportAllVariables(@"C:\_IRIC\assignOut.csv", DicOfTime, DicOfVar, DicOfCorrelation);

            Console.WriteLine("Done!");
        }
コード例 #13
0
//            Proteomics.Utilities.Fasta.FastaRead.AppendProteinDescriptionToMascotReport(@"C:\Users\caronlio\Downloads\filtered peptides.csv",
//                                                                                        @"C:\_IRIC\DATA\Tariq\peptideDb-minOcc60_WithReverse.fasta",
//                                                                                        @"C:\Users\caronlio\Downloads\filtered peptides_WithProteinDescriptions.csv");
//
        public static void AppendhCKSAAPToMascotReport(string txtHCKSAAPFile, string csvMascotFile, string csvFileOut)
        {
            vsCSV       csvUbi    = new vsCSV(txtHCKSAAPFile);
            vsCSV       csvMascot = new vsCSV(csvMascotFile);
            vsCSVWriter writer    = new vsCSVWriter(csvFileOut);

            try
            {
                foreach (string lineMascot in csvMascot.LINES_LIST)
                {
                    string strToAppend = "";
                    try
                    {
                        string[] mSplits = lineMascot.Split(vsCSV._Generic_Separator);
                        if (mSplits.Length >= 17 && !lineMascot.StartsWith("Search"))
                        {
                            int  indexStart = int.Parse(mSplits[16]);
                            int  indexStop  = int.Parse(mSplits[17]);
                            bool inIPI      = false;
                            foreach (string lineUbi in csvUbi.LINES_LIST)
                            {
                                if (lineUbi.StartsWith(">"))
                                {
                                    if (inIPI)
                                    {
                                        break;
                                    }

                                    inIPI = false;
                                    if (lineUbi.StartsWith(">IPI:") && lineUbi.Contains(mSplits[3]))
                                    {
                                        inIPI = true;
                                    }
                                }
                                if (inIPI)
                                {
                                    string[] splits   = lineUbi.Split('\t');
                                    int      indexPos = -1;
                                    if (splits.Length > 6 && int.TryParse(splits[0], out indexPos))
                                    {
                                        if (indexPos >= indexStart && indexPos <= indexStop)
                                        {
                                            strToAppend += "," + splits[0] + "," + splits[5];
                                        }
                                    }
                                }
                            }
                        }
                    }
                    catch (System.Exception ex)
                    {
                        Console.WriteLine(ex.Message);
                        Console.WriteLine(ex.StackTrace);
                    }
                    writer.AddLine(lineMascot + strToAppend);
                }
                writer.WriteToFile();
            }
            catch (System.Exception ex)
            {
                Console.WriteLine(ex.Message);
                Console.WriteLine(ex.StackTrace);
            }
        }
コード例 #14
0
        public static void FromFolderWithRetentionTimeCSV(string mascotReportCSVFile, string folder, string outputCSV)
        {
            string[]     files = Directory.GetFiles(folder, "*_RetentionTimes.csv");
            List <vsCSV> RTs   = new List <vsCSV>();

            foreach (string file in files)
            {
                RTs.Add(new vsCSV(file));
            }

            vsCSVWriter writer = new vsCSVWriter(outputCSV);

            vsCSV mascotReport       = new vsCSV(mascotReportCSVFile);
            int   indexScanNumber    = -1;
            int   indexFileName      = -1;
            int   indexRetentionTime = -1;
            bool  isContent          = false;

            for (int i = 0; i < mascotReport.LINES_LIST.Count; i++)
            {
                string   line   = mascotReport.LINES_LIST[i];
                string[] splits = line.Split(vsCSV._Generic_Separator);
                if (line.Contains("Scan Number"))
                {
                    indexScanNumber = vsCSV.GetColumnIndex(splits, "Scan Number");
                }
                if (line.Contains("FileName"))
                {
                    indexFileName = vsCSV.GetColumnIndex(splits, "FileName");
                }
                if (line.Contains("Pep Elution Time"))
                {
                    indexRetentionTime = vsCSV.GetColumnIndex(splits, "Pep Elution Time");
                }
                if (isContent)
                {
                    string[] strScanSplits = splits[indexScanNumber].Split('-');
                    int      tmpScan       = 0;
                    for (int k = 0; k < strScanSplits.Length; k++)
                    {
                        tmpScan += int.Parse(strScanSplits[k]);
                    }

                    string file = vsCSV.GetFileName_NoExtension(splits[indexFileName]);
                    string rt   = "";
                    for (int j = 0; j < files.Length; j++)
                    {
                        if (files[j].Contains(file))
                        {
                            rt = RTs[j].LINES_LIST[tmpScan].Split(vsCSV._Generic_Separator)[1];
                            break;
                        }
                    }

                    splits[indexRetentionTime] = rt;
                    line = vsCSV.Concatenate(splits, ",");
                }
                if (indexScanNumber >= 0 && indexScanNumber < splits.Length &&
                    indexFileName >= 0 && indexFileName < splits.Length &&
                    indexRetentionTime >= 0 && indexRetentionTime < splits.Length)
                {
                    isContent = true;
                }

                writer.AddLine(line);
            }
            writer.WriteToFile();
        }