예제 #1
0
        public static NamedSequence GetConsensus(List <NamedSequence> seqs)
        {
            int len = -1;
            //StringBuilder consensusSeq = new StringBuilder();
            List <AASeq> aaSeqs = new List <AASeq>();
            bool         isDna  = seqs[0].IsDna();

            foreach (NamedSequence seq in seqs)
            {
                AASeq aaSeq = isDna ? DnaSeq.GetInstance(seq.Sequence, MixtureSemantics.Uncertainty) : AASeq.GetInstance(seq.Sequence, MixtureSemantics.Uncertainty);

                len = Math.Max(len, aaSeq.Count);
                //if (len < 0)
                //{
                //    len = aaSeq.Count;
                //}
                //else
                //{
                //    Helper.CheckCondition(len == aaSeq.Count, String.Format("Sequence {0} is a different length from previous sequences", seq.Name));
                //}
                aaSeqs.Add(aaSeq);
            }

            //for (int i = 0; i < len; i++)
            //{
            //    Dictionary<string, int> charToCount = new Dictionary<string, int>();
            //    KeyValuePair<string, int> currentConsensus = new KeyValuePair<string, int>("z", -1);
            //    foreach (AASeq aaSeq in aaSeqs)
            //    {
            //        if (i < aaSeq.Count)
            //        {
            //            string residue = aaSeq.SubSeqAA0Pos(i, 1).ToString();
            //            charToCount[residue] = SpecialFunctions.GetValueOrDefault(charToCount, residue) + 1;
            //            if (charToCount[residue] > currentConsensus.Value)
            //            {
            //                currentConsensus = new KeyValuePair<string, int>(residue, charToCount[residue]);
            //            }
            //        }
            //    }
            //    consensusSeq.Append(currentConsensus.Key);
            //}
            AASeq         consensusAaSeq = GetAaSeqConsensus(aaSeqs);
            NamedSequence consensus      = new NamedSequence("consensus", consensusAaSeq.ToString());

            return(consensus);
        }
예제 #2
0
        new static public AASeq GetInstance(string dnaSeqAsString, MixtureSemantics mixtureSemantics, int offset)
        {
            DnaSeq dnaSeq = new DnaSeq(dnaSeqAsString, mixtureSemantics, offset);

            return(dnaSeq);
        }
예제 #3
0
        public void WriteAsTable(List <NamedSequence> sequences, TextWriter writer)
        {
            CaseIdToAASeq cidToAASeq = CaseIdToAASeq.GetInstance();
            bool          isDna      = sequences[0].IsDna();

            foreach (NamedSequence seq in sequences)
            {
                cidToAASeq.Add(seq.Name,
                               isDna ?
                               DnaSeq.GetInstance(seq.Sequence, MixtureSemantics) :
                               AASeq.GetInstance(seq.Sequence, MixtureSemantics));
            }

            List <string> header = new List <string>(sequences.Count + 1);

            header.Add("Var");
            header.AddRange(sequences.Select(seq => seq.Name));

            writer.WriteLine(header.StringJoin("\t"));

            int maxLen = cidToAASeq.Dictionary.Values.Select(aaSeq => aaSeq.Count).Max();

            for (int pos0 = 0; pos0 < maxLen; pos0++)
            {
                foreach (char aa in cidToAASeq.EveryAminoAcid(pos0))
                {
                    string        merAndPos        = (pos0 + 1) + "@" + aa;
                    int?[]        values           = new int?[sequences.Count];
                    HashSet <int> nonMissingValues = new HashSet <int>();
                    for (int pidIdx = 0; pidIdx < sequences.Count; pidIdx++)
                    {
                        int?       value;
                        Set <char> observedAAs = cidToAASeq.Dictionary[sequences[pidIdx].Name][pos0];
                        if (observedAAs.Contains('?') || observedAAs.Count == 0 ||
                            (observedAAs.Count > 1 && MixtureSemantics == MixtureSemantics.Uncertainty && observedAAs.Contains(aa)))
                        {
                            value = null;
                        }
                        else if (observedAAs.Contains(aa) && (MixtureSemantics != MixtureSemantics.Pure || observedAAs.Count == 1))
                        {
                            value = 1;
                        }
                        else
                        {
                            value = 0;
                        }

                        values[pidIdx] = value;
                        if (value != null)
                        {
                            nonMissingValues.Add((int)value);
                        }
                    }
                    if (nonMissingValues.Count > 1 || (KeepOneValueVariables && nonMissingValues.Count == 1 && nonMissingValues.First() == 1))
                    {
                        writer.WriteLine(Helper.CreateTabString(merAndPos, values.Select(v => v.HasValue ? v.ToString() : MissingStatistics.GetInstance().ToString()).StringJoin("\t")));
                    }
                }
            }


            writer.Flush();
        }