private void Create(string RegexFile, string PssmFile, string DatabaseFile, string VariantsFile, string OutputDir, bool grouping) { string line; // regex UnixCfg rd = new UnixCfg(RegexFile); line = rd.ReadUnixLine(); if (line == null) { throw new ApplicationException("Empty regex"); } rd.Close(); if (PssmFile.Length > 0) { mRegex = new WregexManager(line, new PSSM(PssmFile), grouping); } else { mRegex = new WregexManager(line, grouping); } // Fasta mSeqs = new List <Fasta>(); if (DatabaseFile.Contains(".fasta")) { LoadFasta(DatabaseFile); } else { SortedList <string, List <Variant> > list = null; if (VariantsFile.Length != 0) { list = LoadVariants(VariantsFile); } LoadXml(DatabaseFile, list); } // Sort Variants foreach (Fasta seq in mSeqs) { seq.mVariants.Sort(); } mDataId = Path.GetFileNameWithoutExtension(DatabaseFile); mOutputDir = OutputDir; Directory.CreateDirectory(OutputDir); }
private void LoadFasta(string path) { string line; //char[] sep = new char[]{'|',' ','\t'}; char[] sep = new char[] { '|' }; Variant v; UnixCfg rd = new UnixCfg(path); line = rd.ReadUnixLine(); if (line == null || line[0] != '>') { throw new ApplicationException("FASTA header not found"); } Fasta f = new Fasta(Fasta.Type.Protein, line.Split(sep)[0].Substring(1), ""); do { line = rd.ReadUnixLine(); if (line == null || line[0] == '>') // EOF or next element { if (f.mSequence.Length == 0) { throw new ApplicationException("FASTA sequence not found"); } mSeqs.Add(f); if (line != null) { f = new Fasta(Fasta.Type.Protein, line.Split(sep)[0].Substring(1), ""); } } else if (line.StartsWith("NP_")) // Variant { v = new Variant(line); if (!f.mVariants.Contains(v)) { f.mVariants.Add(v); } } else // Sequence { f.mSequence += line; } } while(line != null); rd.Close(); }
/*public void Load( string filename ) { * UnixCfg rd = new UnixCfg( filename ); * string line = rd.ReadUnixLine(); * char[] sep = new char[]{ ':' }; * string[] fields; * * while( line != null ) { * PSSMposition pos; * pos.order = uint.Parse( line.Replace("{","") ); * pos.entries = new List<PSSMentry>(); * line = rd.ReadUnixLine(); * while( line != null && !line.Contains("}") ) { * PSSMentry entry; * fields = line.Split( sep ); * entry.condition = fields[0].Trim(); * entry.score = double.Parse( fields[1] ); * pos.entries.Add( entry ); * line = rd.ReadUnixLine(); * } * if( line != null ) * line = rd.ReadUnixLine(); * m_pssm.Add( pos ); * } * rd.Close(); * }*/ public void Load(string filename) { UnixCfg rd = new UnixCfg(filename); char[] sep = new char[] { ' ', '\t' }; string[] fields; PSSMentry entry; string line = rd.ReadUnixLine(); if (line == null) { rd.Close(); return; } fields = line.Split(sep); PSSMposition[] pos = new PSSMposition[fields.Length - 1]; for (uint i = 0; i < pos.Length; i++) { pos[i].order = i; pos[i].entries = new List <PSSMentry>(); } do { fields = line.Split(sep); entry.condition = fields[0].Trim(); for (uint i = 0; i < pos.Length; i++) { entry.score = double.Parse(fields[i + 1]); pos[i].entries.Add(entry); } line = rd.ReadUnixLine(); } while(line != null); rd.Close(); m_pssm.AddRange(pos); }