private static Dictionary <string, Dictionary <string, int> > CreateMerStringToOriginalAA0PositionToCount(int merLength, TextWriter textWriterForWarnings, Dictionary <string, AASeq> caseToCompressedAASeq)
        {
            Dictionary <string, Dictionary <string, int> > merStringToOriginalAA0PositionToCount = new Dictionary <string, Dictionary <string, int> >();

            foreach (string caseId in caseToCompressedAASeq.Keys)
            {
                AASeq aaSeq = caseToCompressedAASeq[caseId];

                Set <string> SeenIt = new Set <string>();
                foreach (AASeq mer in aaSeq.SubSeqEnumeration(merLength))
                {
                    if (mer.Ambiguious)
                    {
                        continue;
                    }

                    string merString = mer.ToString();
                    if (SeenIt.Contains(merString))
                    {
                        textWriterForWarnings.WriteLine("Warning: Mer '{0}' appears again in case '{1}'", merString, caseId);
                    }
                    SeenIt.AddNewOrOld(merString);

                    string originalAA1Position = mer.OriginalAA1Position(0);

                    Dictionary <string, int> originalAA0PositionToCount = SpecialFunctions.GetValueOrDefault(merStringToOriginalAA0PositionToCount, merString);
                    originalAA0PositionToCount[originalAA1Position] = 1 + SpecialFunctions.GetValueOrDefault(originalAA0PositionToCount, originalAA1Position);
                }
            }
            return(merStringToOriginalAA0PositionToCount);
        }
Beispiel #2
0
        static public AASeq GetCompressedInstance(string caseId, AASeq aaSeqIn, TextWriter errorStream)
        {
            AASeq aaSeqOut = new AASeq(aaSeqIn.Mixture);

            aaSeqOut.Sequence = new List <Set <char> >();
            aaSeqOut._originalAA1PositionTableOrNull = new List <string>();

            for (int iChar = 0; iChar < aaSeqIn.Count; ++iChar)
            {
                Set <char> set = aaSeqIn[iChar];
                string     originalAA1Position = aaSeqIn.OriginalAA1Position(iChar);
                if (set.Equals(Delete)) //!!!const
                {
                    continue;
                }
                if (set.Equals(Stop)) //!!!const
                {
                    if (iChar != aaSeqIn.Count - 1)
                    {
                        errorStream.WriteLine("Warning: The sequence for case id '{0}' contains a '*' before the last position", caseId);
                    }
                    break;
                }
                aaSeqOut.Sequence.Add(set);
                aaSeqOut._originalAA1PositionTableOrNull.Add(originalAA1Position);
            }
            return(aaSeqOut);
        }
Beispiel #3
0
        static public AASeq GetInstance(string aaSeqAsString, bool mixture)
        {
            AASeq aaSeq = new AASeq(mixture);

            aaSeq.Sequence = CreateSequence(aaSeqAsString);
            return(aaSeq);
        }
Beispiel #4
0
 internal IEnumerable <AASeq> SubSeqEnumeration(int merLength)
 {
     for (int startIndex = 0; startIndex <= Sequence.Count - merLength; ++startIndex)
     {
         AASeq aaSeqOut = SubSeqAA0Pos(startIndex, merLength);
         yield return(aaSeqOut);
     }
 }
Beispiel #5
0
        static public AASeq GetInstance(string aaSeqAsString, List <string> originalAA1PositionTable, bool mixture)
        {
            AASeq aaSeq = new AASeq(mixture);

            aaSeq.Sequence = CreateSequence(aaSeqAsString);
            SpecialFunctions.CheckCondition(aaSeq.Count == originalAA1PositionTable.Count, "aaSeq and position table must be same length");
            aaSeq._originalAA1PositionTableOrNull = originalAA1PositionTable;
            return(aaSeq);
        }
Beispiel #6
0
 public bool TrySubSeqAA0Pos(int aa0Pos, int merLength, out AASeq aaSeq)
 {
     if (aa0Pos < 0 || aa0Pos + merLength > this.Sequence.Count)
     {
         aaSeq = null;
         return(false);
     }
     aaSeq = SubSeqAA0Pos(aa0Pos, merLength);
     return(true);
 }
        //public void CreateSparseFile(string outputFileName, bool keepOneValueVariables)
        //{
        //    CreateSparseFile(outputFileName, keepOneValueVariables);
        //}

        private Dictionary <string, AASeq> RemoveDeletesAndStopsFromData(TextWriter textWriter)
        {
            Dictionary <string, AASeq> compressedDictionary = new Dictionary <string, AASeq>();

            foreach (KeyValuePair <string, AASeq> caseIdAndAASeq in _caseIdToAASeq)
            {
                AASeq compressedAASeq = AASeq.GetCompressedInstance(caseIdAndAASeq.Key, caseIdAndAASeq.Value, textWriter);
                compressedDictionary.Add(caseIdAndAASeq.Key, compressedAASeq);
            }
            return(compressedDictionary);
        }
 public void Add(string caseId, AASeq aaSeq)
 {
     SpecialFunctions.CheckCondition(!_caseIdToAASeq.ContainsKey(caseId), string.Format("caseId {0} appears more than once", caseId));
     if (null == SequenceLength)
     {
         SequenceLength = aaSeq.Count;
     }
     if (SequenceLength != aaSeq.Count)
     {
         Console.WriteLine("Warning: Not all amino acid sequences are of the same length");
     }
     _caseIdToAASeq.Add(caseId, aaSeq);
 }
Beispiel #9
0
        public override bool Equals(object obj)
        {
            AASeq other = obj as AASeq;

            if (other == null)
            {
                return(false);
            }
            else
            {
                return(Sequence == other.Sequence);
            }
        }
        //    /*
        //    1189MB    MEPVDPNLEPWNHPGSQPKTPCTNCYCKHCSYHCLVCFQTKGLGISYGRK
        //    J112MA    MEPVDPNLEPWNHPGSQPITACNKCYCKYCSYHCLVCFQTKGLGISYGRK
        //    1157M3M   MEPVDPNLEPWNHPGSQPKTPCNKCYCKHCSYHCLVCFQTKGLGISYGRK
        //    1195MB    MEPVDPNLEPWNHPGSQPKTPCNKCYCKYCSYHCLVCFQTKGLGISYGRK
        //     */
        static public CaseIdToAASeq GetInstance(TextReader textReader, bool mixture)
        {
            CaseIdToAASeq caseIdToAASeq = CaseIdToAASeq.GetInstance();

            foreach (Dictionary <string, string> row in SpecialFunctions.TabFileTable(textReader, "cid\taaSeq", false))
            {
                string caseId        = row["cid"];   //!!!const
                string aaSeqAsString = row["aaSeq"]; //!!!const
                AASeq  aaSeq         = AASeq.GetInstance(aaSeqAsString, mixture);
                caseIdToAASeq.Add(caseId, aaSeq);
            }

            return(caseIdToAASeq);
        }
Beispiel #11
0
        public AASeq SubSeqAA0Pos(int aa0Pos, int merLength)
        {
            List <Set <char> > subSequence = SpecialFunctions.SubList(Sequence, aa0Pos, merLength);
            AASeq aaSeqOut = new AASeq(Mixture);

            aaSeqOut.Sequence = subSequence;
            aaSeqOut._originalAA1PositionTableOrNull = new List <string>();
            for (int aa0 = aa0Pos; aa0 < aa0Pos + merLength; ++aa0)
            {
                string originalAA1Position = OriginalAA1Position(aa0);
                aaSeqOut._originalAA1PositionTableOrNull.Add(originalAA1Position);
            }
            return(aaSeqOut);
        }
        private Dictionary <string, bool> FindMerValues(string merAsString, Dictionary <string, AASeq> caseToCompressedAASeq, out Dictionary <bool, int> valueToNonZeroCount)
        {
            Regex merAsRegex = AASeq.CreateMerRegex(merAsString);

            Dictionary <string, bool> merValues = new Dictionary <string, bool>();

            valueToNonZeroCount = new Dictionary <bool, int>();
            foreach (KeyValuePair <string, AASeq> caseIdAndCompressedAASeq in caseToCompressedAASeq)
            {
                string caseId          = caseIdAndCompressedAASeq.Key;
                AASeq  compressedAASeq = caseIdAndCompressedAASeq.Value;

                bool?containsMer = compressedAASeq.ContainsMer(merAsString, merAsRegex);

                if (null != containsMer)
                {
                    merValues.Add(caseId, (bool)containsMer);
                    valueToNonZeroCount[(bool)containsMer] = 1 + SpecialFunctions.GetValueOrDefault(valueToNonZeroCount, (bool)containsMer);
                }
            }
            return(merValues);
        }
        public IEnumerable <string> SparseLineEnumeration(bool keepOneValueVariables)
        {
            if (_caseIdToAASeq.Count == 0)
            {
                Debug.Assert(SequenceLength == null); // real assert
                yield break;
            }
            SpecialFunctions.CheckCondition(SequenceLength != null, "This converter to sparse assumes all sequences have the same length");

            /*
             * n1pos	aa	pid	val
             * 880	A	3	F
             * 880	A	5	F
             * 880	A	9	F
             * 880	A	13	F
             * 880	A	14	F
             * 880	A	15	T
             * ...
             */


            for (int aa0Pos = 0; aa0Pos < (int)SequenceLength; ++aa0Pos)
            {
                Set <char> everyAminoAcid = EveryAminoAcid(aa0Pos);
                if (!keepOneValueVariables && everyAminoAcid.Count == 1)
                {
                    continue;
                }

                string posName = null;
                foreach (char aa in everyAminoAcid)
                {
                    Set <bool> valueSet = Set <bool> .GetInstance();

                    Dictionary <string, bool> caseToVal = new Dictionary <string, bool>();
                    foreach (string caseId in _caseIdToAASeq.Keys)
                    {
                        AASeq aaSeq = _caseIdToAASeq[caseId];
                        //SpecialFunctions.CheckCondition(aaSeq.IsUsingOriginalPositions(), "This converter to sparse assumes all sequences are using their original positions");
                        Set <char> strainAASet = aaSeq[aa0Pos];
                        if (posName == null)
                        {
                            posName = aaSeq.OriginalAA1Position(aa0Pos);
                        }
                        else
                        {
                            SpecialFunctions.CheckCondition(posName == aaSeq.OriginalAA1Position(aa0Pos));
                        }
                        // missing: e.g.  A/Any   or   A/AB
                        // 1: e.g. A/A
                        // 0: e.g. A/B    or  A/BCD
                        if (strainAASet.Equals(AASeq.Any))
                        {
                            //Do nothing - missing
                        }
                        else if (strainAASet.Contains(aa))
                        {
                            if (strainAASet.Count > 1)
                            {
                                if (aaSeq.Mixture)
                                {
                                    caseToVal.Add(caseId, false);
                                    valueSet.AddNewOrOld(false);
                                }
                                else
                                {
                                    // Do nothing = missing
                                }
                            }
                            else
                            {
                                caseToVal.Add(caseId, true);
                                valueSet.AddNewOrOld(true);
                            }
                        }
                        else
                        {
                            caseToVal.Add(caseId, false);
                            valueSet.AddNewOrOld(false);
                        }
                    }
                    SpecialFunctions.CheckCondition(posName != null);
                    if (keepOneValueVariables || valueSet.Count == 2)
                    {
                        foreach (KeyValuePair <string, bool> caseIdAndVal in caseToVal)
                        {
                            //string variableName = string.Format("{0}@{1}", posName, aa);
                            string variableName = string.Format("{1}@{0}", posName, aa);
                            yield return(SpecialFunctions.CreateTabString(
                                             variableName, caseIdAndVal.Key, caseIdAndVal.Value ? 1 : 0));
                        }
                    }
                }
            }
        }
Beispiel #14
0
 public bool TrySubSeqAA1Pos(int aa1Pos, int merLength, out AASeq aaSeq)
 {
     return(TrySubSeqAA0Pos(aa1Pos - 1, merLength, out aaSeq));
 }