Beispiel #1
0
        //	/*
        //	1189MB	MEPVDPNLEPWNHPGSQPKTPCTNCYCKHCSYHCLVCFQTKGLGISYGRK
        //	J112MA	MEPVDPNLEPWNHPGSQPITACNKCYCKYCSYHCLVCFQTKGLGISYGRK
        //	1157M3M   MEPVDPNLEPWNHPGSQPKTPCNKCYCKHCSYHCLVCFQTKGLGISYGRK
        //	1195MB	MEPVDPNLEPWNHPGSQPKTPCNKCYCKYCSYHCLVCFQTKGLGISYGRK
        //	 */
        static public CaseIdToAASeq GetInstance(TextReader textReader, MixtureSemantics mixtureSemantics, int offset)
        {
            CaseIdToAASeq caseIdToAASeq = CaseIdToAASeq.GetInstance();

            foreach (Dictionary <string, string> row in SpecialFunctions.TabFileTable(textReader, "cid\taaSeq", false))
            {
                string caseId        = row["cid"];   //!!!const
                string aaSeqAsString = row["aaSeq"]; //!!!const
                AASeq  aaSeq         = AASeq.GetInstance(aaSeqAsString, mixtureSemantics, offset);
                caseIdToAASeq.Add(caseId, aaSeq);
            }

            return(caseIdToAASeq);
        }
Beispiel #2
0
        public void WriteAsTable(List <NamedSequence> sequences, TextWriter writer)
        {
            CaseIdToAASeq cidToAASeq = CaseIdToAASeq.GetInstance();
            bool          isDna      = sequences[0].IsDna();

            foreach (NamedSequence seq in sequences)
            {
                cidToAASeq.Add(seq.Name,
                               isDna ?
                               DnaSeq.GetInstance(seq.Sequence, MixtureSemantics) :
                               AASeq.GetInstance(seq.Sequence, MixtureSemantics));
            }

            List <string> header = new List <string>(sequences.Count + 1);

            header.Add("Var");
            header.AddRange(sequences.Select(seq => seq.Name));

            writer.WriteLine(header.StringJoin("\t"));

            int maxLen = cidToAASeq.Dictionary.Values.Select(aaSeq => aaSeq.Count).Max();

            for (int pos0 = 0; pos0 < maxLen; pos0++)
            {
                foreach (char aa in cidToAASeq.EveryAminoAcid(pos0))
                {
                    string        merAndPos        = (pos0 + 1) + "@" + aa;
                    int?[]        values           = new int?[sequences.Count];
                    HashSet <int> nonMissingValues = new HashSet <int>();
                    for (int pidIdx = 0; pidIdx < sequences.Count; pidIdx++)
                    {
                        int?       value;
                        Set <char> observedAAs = cidToAASeq.Dictionary[sequences[pidIdx].Name][pos0];
                        if (observedAAs.Contains('?') || observedAAs.Count == 0 ||
                            (observedAAs.Count > 1 && MixtureSemantics == MixtureSemantics.Uncertainty && observedAAs.Contains(aa)))
                        {
                            value = null;
                        }
                        else if (observedAAs.Contains(aa) && (MixtureSemantics != MixtureSemantics.Pure || observedAAs.Count == 1))
                        {
                            value = 1;
                        }
                        else
                        {
                            value = 0;
                        }

                        values[pidIdx] = value;
                        if (value != null)
                        {
                            nonMissingValues.Add((int)value);
                        }
                    }
                    if (nonMissingValues.Count > 1 || (KeepOneValueVariables && nonMissingValues.Count == 1 && nonMissingValues.First() == 1))
                    {
                        writer.WriteLine(Helper.CreateTabString(merAndPos, values.Select(v => v.HasValue ? v.ToString() : MissingStatistics.GetInstance().ToString()).StringJoin("\t")));
                    }
                }
            }


            writer.Flush();
        }