private void Create( string RegexFile, string PssmFile, string DatabaseFile, string VariantsFile, string OutputDir, bool grouping ) { string line; // regex UnixCfg rd = new UnixCfg( RegexFile ); line = rd.ReadUnixLine(); if( line == null ) throw new ApplicationException( "Empty regex" ); rd.Close(); if( PssmFile.Length > 0 ) mRegex = new WregexManager( line, new PSSM(PssmFile), grouping ); else mRegex = new WregexManager( line, grouping ); // Fasta mSeqs = new List<Fasta>(); if( DatabaseFile.Contains(".fasta") ) LoadFasta( DatabaseFile ); else { SortedList<string,List<Variant>> list = null; if( VariantsFile.Length != 0 ) list = LoadVariants( VariantsFile ); LoadXml( DatabaseFile, list ); } // Sort Variants foreach( Fasta seq in mSeqs ) seq.mVariants.Sort(); mDataId = Path.GetFileNameWithoutExtension( DatabaseFile ); mOutputDir = OutputDir; Directory.CreateDirectory( OutputDir ); }
private void LoadFasta( string path ) { string line; //char[] sep = new char[]{'|',' ','\t'}; char[] sep = new char[]{'|'}; Variant v; UnixCfg rd = new UnixCfg( path ); line = rd.ReadUnixLine(); if( line == null || line[0] != '>' ) throw new ApplicationException( "FASTA header not found" ); Fasta f = new Fasta(Fasta.Type.Protein, line.Split(sep)[0].Substring(1), ""); do { line = rd.ReadUnixLine(); if( line == null || line[0] == '>' ) { // EOF or next element if( f.mSequence.Length == 0 ) throw new ApplicationException( "FASTA sequence not found" ); mSeqs.Add(f); if( line != null ) f = new Fasta(Fasta.Type.Protein, line.Split(sep)[0].Substring(1), ""); } else if( line.StartsWith("NP_") ) { // Variant v = new Variant(line); if( !f.mVariants.Contains(v) ) f.mVariants.Add(v); } else // Sequence f.mSequence += line; } while( line != null ); rd.Close(); }
/*public void Load( string filename ) { UnixCfg rd = new UnixCfg( filename ); string line = rd.ReadUnixLine(); char[] sep = new char[]{ ':' }; string[] fields; while( line != null ) { PSSMposition pos; pos.order = uint.Parse( line.Replace("{","") ); pos.entries = new List<PSSMentry>(); line = rd.ReadUnixLine(); while( line != null && !line.Contains("}") ) { PSSMentry entry; fields = line.Split( sep ); entry.condition = fields[0].Trim(); entry.score = double.Parse( fields[1] ); pos.entries.Add( entry ); line = rd.ReadUnixLine(); } if( line != null ) line = rd.ReadUnixLine(); m_pssm.Add( pos ); } rd.Close(); }*/ public void Load( string filename ) { UnixCfg rd = new UnixCfg( filename ); char[] sep = new char[]{ ' ', '\t' }; string[] fields; PSSMentry entry; string line = rd.ReadUnixLine(); if( line == null ) { rd.Close(); return; } fields = line.Split( sep ); PSSMposition[] pos = new PSSMposition[fields.Length-1]; for( uint i = 0; i < pos.Length; i++ ) { pos[i].order = i; pos[i].entries = new List<PSSMentry>(); } do { fields = line.Split( sep ); entry.condition = fields[0].Trim(); for( uint i = 0; i < pos.Length; i++ ) { entry.score = double.Parse( fields[i+1] ); pos[i].entries.Add( entry ); } line = rd.ReadUnixLine(); } while( line != null ); rd.Close(); m_pssm.AddRange( pos ); }