Example #1
0
        public NamedSequence Translate(bool dashAsMissing)
        {
            string        aaSeq      = Translate(Sequence, dashAsMissing);
            NamedSequence namedAaSeq = new NamedSequence(Name, Protein, aaSeq);

            return(namedAaSeq);
        }
Example #2
0
        public override List <NamedSequence> Parse(TextReader reader)
        {
            if (NoHeader)
            {
                reader = new StringReader(OUTPUT_HEADER + "\n" + reader.ReadToEnd());
            }

            List <NamedSequence> seqs = new List <NamedSequence>();

            string[] header = null;
            foreach (Dictionary <string, string> row in SpecialFunctions.TabFileTable(reader, "", true, false, true))
            {
                if (header == null)
                {
                    header = row[""].Split('\t');
                    Helper.CheckCondition(header.Length == 2, "Header does not conform to tab file format: " + row[""]);
                    continue;
                }

                string        name     = row[header[0]];
                string        seq      = row[header[1]];
                NamedSequence namedSeq = NamedSequence.Parse(name);
                namedSeq.Sequence = RemoveWhiteSpace(seq);
                seqs.Add(namedSeq);
            }
            return(seqs);
        }
Example #3
0
        public override List <NamedSequence> Parse(TextReader reader)
        {
            List <NamedSequence> seqs = new List <NamedSequence>();
            string line;

            //string name = null;
            NamedSequence seqToAdd = null;
            StringBuilder sequence = new StringBuilder();

            while ((line = reader.ReadLine()) != null)
            {
                if (line.StartsWith(">"))
                {
                    if (seqToAdd != null)
                    {
                        //seqs.Add(new NamedSequence(name, RemoveWhiteSpace(sequence.ToString())));
                        seqToAdd.Sequence = sequence.ToString();
                        seqs.Add(seqToAdd);
                    }

                    seqToAdd = NamedSequence.Parse(line.Substring(1).Trim());
                    //name = line.Substring(1).Trim();
                    sequence = new StringBuilder(sequence.Length);
                }
                else
                {
                    sequence.Append(line.Trim());
                }
            }
            //seqs.Add(new NamedSequence(name, RemoveWhiteSpace(sequence.ToString())));
            seqToAdd.Sequence = sequence.ToString();
            seqs.Add(seqToAdd);

            return(seqs);
        }
Example #4
0
        public static NamedSequence Parse(string constructorArgs)
        {
            ConstructorArguments constArgs = new ConstructorArguments("(" + constructorArgs + ")");
            NamedSequence        result    = new NamedSequence();

            result.Protein = constArgs.ExtractOptional <string>("Protein", null);
            result.Name    = constArgs.ExtractNext <string>("name");
            return(result);
        }
Example #5
0
        public override List <NamedSequence> Parse(TextReader reader)
        {
            string line;

            string[] fields;

            line = reader.ReadLine().Trim();
            if (!ConformsToFileFormat(line))
            {
                throw new FormatException("File does not conform to Phylip format. First line must be two integers specifying number of sequences and sequence length.");
            }

            fields = Regex.Split(line, @"\s+");

            int lineCount, lineLength;

            try
            {
                lineCount  = int.Parse(fields[0]);
                lineLength = int.Parse(fields[1]);
            }
            catch (FormatException e)
            {
                Console.WriteLine("Error parsing Phylip header: {0} split to {1} and {2}", line, fields[0], fields[1]);
                throw e;
            }

            List <NamedSequence> seqs = new List <NamedSequence>(lineCount);

            while ((line = reader.ReadLine()) != null)
            {
                if (!string.IsNullOrWhiteSpace(line))
                {
                    int endOfName = Math.Max(10, line.IndexOfAny(new char[] { ' ', '\t' }, 0, Math.Min(50, line.Length)));

                    string name = line.Substring(0, endOfName).Trim();
                    string seq  = RemoveWhiteSpace(line.Substring(endOfName));

                    //NamedSequence sequence = new NamedSequence(name, seq);
                    NamedSequence sequence = NamedSequence.Parse(name);
                    sequence.Sequence = seq;
                    if (sequence.Sequence.Length != lineLength)
                    {
                        throw new FormatException(string.Format("Expected {0} to be {1} chars long, but its {2} long.",
                                                                sequence.Name, lineLength, sequence.Sequence.Length));
                    }

                    seqs.Add(sequence);
                }
            }
            if (seqs.Count != lineCount)
            {
                throw new FormatException(string.Format("Expected {0} sequences. Read {1}.", lineCount, seqs.Count));
            }

            return(seqs);
        }
Example #6
0
        public override bool Equals(object obj)
        {
            NamedSequence other = obj as NamedSequence;

            if (other == null)
            {
                return(false);
            }

            return(other.Name == this.Name && other.Sequence == this.Sequence);
        }
Example #7
0
        public static NamedSequence GetConsensus(List <NamedSequence> seqs)
        {
            int len = -1;
            //StringBuilder consensusSeq = new StringBuilder();
            List <AASeq> aaSeqs = new List <AASeq>();
            bool         isDna  = seqs[0].IsDna();

            foreach (NamedSequence seq in seqs)
            {
                AASeq aaSeq = isDna ? DnaSeq.GetInstance(seq.Sequence, MixtureSemantics.Uncertainty) : AASeq.GetInstance(seq.Sequence, MixtureSemantics.Uncertainty);

                len = Math.Max(len, aaSeq.Count);
                //if (len < 0)
                //{
                //    len = aaSeq.Count;
                //}
                //else
                //{
                //    Helper.CheckCondition(len == aaSeq.Count, String.Format("Sequence {0} is a different length from previous sequences", seq.Name));
                //}
                aaSeqs.Add(aaSeq);
            }

            //for (int i = 0; i < len; i++)
            //{
            //    Dictionary<string, int> charToCount = new Dictionary<string, int>();
            //    KeyValuePair<string, int> currentConsensus = new KeyValuePair<string, int>("z", -1);
            //    foreach (AASeq aaSeq in aaSeqs)
            //    {
            //        if (i < aaSeq.Count)
            //        {
            //            string residue = aaSeq.SubSeqAA0Pos(i, 1).ToString();
            //            charToCount[residue] = SpecialFunctions.GetValueOrDefault(charToCount, residue) + 1;
            //            if (charToCount[residue] > currentConsensus.Value)
            //            {
            //                currentConsensus = new KeyValuePair<string, int>(residue, charToCount[residue]);
            //            }
            //        }
            //    }
            //    consensusSeq.Append(currentConsensus.Key);
            //}
            AASeq         consensusAaSeq = GetAaSeqConsensus(aaSeqs);
            NamedSequence consensus      = new NamedSequence("consensus", consensusAaSeq.ToString());

            return(consensus);
        }
Example #8
0
        /// <param name="readingFrameToTranslate">Will start translation at start+readingFrameToTranslate-1. If RF>3, will take the reverse
        /// commplement, then translate the revComp using rf -= 3. The sequence will be padded at the end to make it the same length as the
        /// original, and the final result will be reversed, so that the sequence is read in the same order as the original string!
        /// ie RF is 1-based.</param>
        public static List <NamedSequence> Translate(List <NamedSequence> seqs, bool nucToAaDashAsMissing = true, int readingFrameToTranslate = 1)
        {
            Helper.CheckCondition(readingFrameToTranslate > 0 && readingFrameToTranslate <= 6, "readingFrameToTranslate must be between 0 and 6. " + readingFrameToTranslate + " is not valid.");
            bool isAntisense = false;

            if (readingFrameToTranslate > 3)
            {
                seqs = NucSeqReverseComplement(seqs);
                readingFrameToTranslate -= 3;
                isAntisense              = true;
            }
            List <NamedSequence> result = new List <NamedSequence>(seqs.Count);
            int lastLen = -1;

            foreach (NamedSequence seq in seqs)
            {
                string seqToTranslate = seq.Sequence.Substring(readingFrameToTranslate - 1);
                if (seqToTranslate.Length != lastLen)
                {
                    lastLen = seqToTranslate.Length;
                    if (lastLen % 3 != 0)
                    {
                        Console.Error.WriteLine("WARNING: Sequence of length {0} is not divisible by 3. Cutting off the end for translation.", lastLen);
                    }
                }
                if (seqToTranslate.Length % 3 != 0)
                {
                    seqToTranslate = seqToTranslate.Substring(0, seqToTranslate.Length - seqToTranslate.Length % 3);
                }
                //seqToTranslate += Enumerable.Repeat("-", 3 - (seqToTranslate.Length % 3)).StringJoin();

                var translatedSeq = new NamedSequence(seq.Name, seq.Protein, NamedSequence.Translate(seqToTranslate, nucToAaDashAsMissing));
                result.Add(isAntisense ? translatedSeq.ReverseSequence() : translatedSeq);
            }
            return(result);
        }