public void TestReadFromFasta() { FastaFormat ff = new FastaFormat(); List <Sequence> seqs = SequenceUtils.Read(ff, @TestContext.CurrentContext.TestDirectory + "/../../../data//Standard_Protein_FIT_060222.noredundant.fasta"); Assert.AreEqual(43, seqs.Count); }
/// <summary> /// Fill reference allele from genome fasta file. /// </summary> /// <param name="snpItems"></param> /// <param name="fastaFile"></param> /// <param name="progress"></param> public static void FillReferenceAlleleFromFasta(this IEnumerable<SNPItem> snpItems, string fastaFile, IProgressCallback progress = null) { if (progress == null) { progress = new ConsoleProgressCallback(); } var dic = snpItems.ToGroupDictionary(m => m.Chrom); progress.SetMessage("Filling reference allele from {0} file ...", fastaFile); using (var sw = new StreamReader(fastaFile)) { var ff = new FastaFormat(); Sequence seq; while ((seq = ff.ReadSequence(sw)) != null) { progress.SetMessage("chromosome " + seq.Name + " ..."); var chr = HumanChromosomeToInt(seq.Name); if (dic.ContainsKey(chr)) { var snps = dic[chr]; foreach (var snp in snps) { snp.RefChar = char.ToUpper(seq.SeqString[snp.Position - 1]); } } } } progress.SetMessage("Filling reference allele finished."); }
public static Dictionary <char, double> GetDatabaseComposition(string fastaFile) { Dictionary <char, double> result = new Dictionary <char, double>(); using (StreamReader sr = new StreamReader(fastaFile)) { FastaFormat ff = new FastaFormat(); Sequence seq; while ((seq = ff.ReadSequence(sr)) != null) { foreach (char c in seq.SeqString) { if (result.ContainsKey(c)) { result[c] = result[c] + 1; } else { result[c] = 1; } } } double total = result.Values.Sum(); foreach (char c in result.Keys.ToArray()) { result[c] = result[c] / total; } } return(result); }
public void TestReadSequence() { FastaFormat ff = new FastaFormat(); StreamReader sr = new StreamReader("../../../data/test.fasta"); Sequence test1 = ff.ReadSequence(sr); Assert.AreEqual("test1", test1.Name); Assert.AreEqual("AAAAA", test1.SeqString); Sequence test2 = ff.ReadSequence(sr); Assert.AreEqual("test2", test2.Name); Assert.AreEqual("BBBBB", test2.SeqString); Sequence test3 = ff.ReadSequence(sr); Assert.IsNull(test3); }
public void TestReadSequence() { FastaFormat ff = new FastaFormat(); StreamReader sr = new StreamReader(TestContext.CurrentContext.TestDirectory + "/../../../data//test.fasta"); Sequence test1 = ff.ReadSequence(sr); Assert.AreEqual("test1", test1.Name); Assert.AreEqual("AAAAA", test1.SeqString); Sequence test2 = ff.ReadSequence(sr); Assert.AreEqual("test2", test2.Name); Assert.AreEqual("BBBBB", test2.SeqString); Sequence test3 = ff.ReadSequence(sr); Assert.IsNull(test3); }
public static Dictionary<string, Sequence> GetAccessNumberMap(string database, IStringParser<string> acParser) { Dictionary<string, Sequence> result = new Dictionary<string, Sequence>(); using (StreamReader sr = new StreamReader(database)) { FastaFormat sf = new FastaFormat(); Sequence seq; while ((seq = sf.ReadSequence(sr)) != null) { string ac = acParser.GetValue(seq.Name); result[ac] = seq; } } return result; }
public static Dictionary <string, Sequence> GetAccessNumberMap(string database, IStringParser <string> acParser) { Dictionary <string, Sequence> result = new Dictionary <string, Sequence>(); using (StreamReader sr = new StreamReader(database)) { FastaFormat sf = new FastaFormat(); Sequence seq; while ((seq = sf.ReadSequence(sr)) != null) { string ac = acParser.GetValue(seq.Name); result[ac] = seq; } } return(result); }
private void ProcessFile(ref int index, StreamWriter sw, string fastaFile, bool isContaminant) { FastaFormat ff = new FastaFormat(); using (StreamReader sr = new StreamReader(fastaFile)) { Progress.SetRange(0, sr.BaseStream.Length); Sequence seq; while ((seq = ff.ReadSequence(sr)) != null) { Progress.SetPosition(StreamUtils.GetCharpos(sr)); if (isContaminant) { if (!seq.Reference.StartsWith("CON_")) { seq.Reference = "CON_" + seq.Reference; } } if (combined) { ff.WriteSequence(sw, seq); } if (pseudoAminoacid) { builder.Build(seq); } index++; Sequence reversedSeq = SequenceUtils.GetReversedSequence(seq.SeqString, index); ff.WriteSequence(sw, reversedSeq); } } }
public static IAccessNumberParser GuessParser(string databaseFileName) { FastaFormat ff = new FastaFormat(); using (StreamReader sr = new StreamReader(databaseFileName)) { Sequence seq; while ((seq = ff.ReadSequence(sr)) != null) { if (seq.Reference.StartsWith("IPI")) { return(new NoExceptionAccessNumberParser(new IPIAccessNumberParser())); } if (seq.Reference.StartsWith("gi|")) { return(new NoExceptionAccessNumberParser(new NRAccessNumberParser())); } } } return(DefaultAccessNumberParser.GetInstance()); }
public static IAccessNumberParser GuessParser(string databaseFileName) { FastaFormat ff = new FastaFormat(); using (StreamReader sr = new StreamReader(databaseFileName)) { Sequence seq; while ((seq = ff.ReadSequence(sr)) != null) { if (seq.Reference.StartsWith("IPI")) { return new NoExceptionAccessNumberParser(new IPIAccessNumberParser()); } if (seq.Reference.StartsWith("gi|")) { return new NoExceptionAccessNumberParser(new NRAccessNumberParser()); } } } return DefaultAccessNumberParser.GetInstance(); }
public static List<CoverageRegion> GetTargetCoverageRegion(ITargetBuilderOptions options, IProgressCallback progress, bool removeRegionWithoutSequence = true) { List<CoverageRegion> result; if (options.TargetFile.EndsWith(".xml")) { result = GetTargetCoverageRegionFromXml(options, progress); } else { result = GetTargetCoverageRegionFromBed(options, progress); } var dic = result.ToGroupDictionary(m => m.Seqname); progress.SetMessage("Filling sequence from {0}...", options.GenomeFastaFile); using (var sr = new StreamReader(options.GenomeFastaFile)) { var ff = new FastaFormat(); Sequence seq; while ((seq = ff.ReadSequence(sr)) != null) { progress.SetMessage("Processing chromosome {0} ...", seq.Reference); var seqname = seq.Name.StringAfter("chr"); List<CoverageRegion> lst; if (dic.TryGetValue(seqname, out lst)) { foreach (var l in lst) { l.Sequence = seq.SeqString.Substring((int)(l.Start - 1), (int)l.Length); if(l.Strand == '+') { l.ReverseComplementedSequence = SequenceUtils.GetReverseComplementedSequence(l.Sequence); } } } } } if (removeRegionWithoutSequence) { result.RemoveAll(l => string.IsNullOrEmpty(l.Sequence)); } progress.SetMessage("Filling sequence finished."); var namemap = new MapReader(1, 12).ReadFromFile(options.RefgeneFile); result.ForEach(m => { var gene = m.Name.StringBefore("_utr3"); m.GeneSymbol = namemap.ContainsKey(gene) ? namemap[gene] : string.Empty; }); return result; }
public void TestReadFromFasta() { FastaFormat ff = new FastaFormat(); List<Sequence> seqs = SequenceUtils.Read(ff, @"../../../data/Standard_Protein_FIT_060222.noredundant.fasta"); Assert.AreEqual(43, seqs.Count); }
public static Dictionary<char, double> GetDatabaseComposition(string fastaFile) { Dictionary<char, double> result = new Dictionary<char, double>(); using (StreamReader sr = new StreamReader(fastaFile)) { FastaFormat ff = new FastaFormat(); Sequence seq; while ((seq = ff.ReadSequence(sr)) != null) { foreach (char c in seq.SeqString) { if (result.ContainsKey(c)) { result[c] = result[c] + 1; } else { result[c] = 1; } } } double total = result.Values.Sum(); foreach (char c in result.Keys.ToArray()) { result[c] = result[c] / total; } } return result; }