Esempio n. 1
0
        public IEnumerable <string> SparseLineMerEnumeration(bool keepOneValueVariables, int merLength)
        {
            if (_caseIdToAASeq.Count == 0)
            {
                Debug.Assert(SequenceLengthOrNull == null); // real assert
                yield break;
            }
            Helper.CheckCondition(SequenceLengthOrNull != null, "This converter to sparse assumes all sequences have the same length");

            Dictionary <string, AASeq> caseToCompressedAASeq = RemoveDeletesAndStopsFromData(false, Console.Error);

            foreach (string mer in EveryUnambiguousStopFreeMer(merLength, caseToCompressedAASeq))
            {
                Regex merAsRegex = AASeq.CreateMerRegex(mer); //!!!look for similar code elsewhere

                foreach (string protein in EveryProtein())
                {
                    Set <bool> valueSet = Set <bool> .GetInstance();

                    Dictionary <string, bool> caseToVal = new Dictionary <string, bool>();
                    foreach (string caseId in caseToCompressedAASeq.Keys)
                    {
                        AASeq aaSeq = caseToCompressedAASeq[caseId];
                        Helper.CheckCondition(aaSeq.MixtureSemantics == MixtureSemantics.Uncertainty, "Code does not expect Mixture semantics");
                        bool?containsOrNull = aaSeq.ContainsMer(mer, merAsRegex, protein);
                        if (null == containsOrNull)
                        {
                            continue;
                        }
                        else if ((bool)containsOrNull)
                        {
                            caseToVal.Add(caseId, true);
                            valueSet.AddNewOrOld(true);
                        }
                        else
                        {
                            caseToVal.Add(caseId, false);
                            valueSet.AddNewOrOld(false);
                        }
                    }
                    if (keepOneValueVariables || valueSet.Count == 2)
                    {
                        foreach (KeyValuePair <string, bool> caseIdAndVal in caseToVal)
                        {
                            string variableName = protein + "@" + mer;
                            yield return(Helper.CreateTabString(
                                             variableName, caseIdAndVal.Key, caseIdAndVal.Value ? 1 : 0));
                        }
                    }
                }
            }
        }
Esempio n. 2
0
        //!!!same logic is elseware. Look for common heading
        private static Dictionary <string, Dictionary <string, double> > LoadReactTableUnfiltered(DbDataReader datareader, out Set <string> cidsInReactTable)
        {
            cidsInReactTable = Set <string> .GetInstance();

            Dictionary <string, Dictionary <string, double> > reactTable = new Dictionary <string, Dictionary <string, double> >();

            int indexPeptide   = datareader.GetOrdinal("peptide");
            int indexCID       = datareader.GetOrdinal("cid");
            int indexMagnitude = datareader.GetOrdinal("magnitude");

            int irecord = 0;

            while (datareader.Read())
            {
                ++irecord;

                string cid = datareader.GetString(indexCID).Trim();

                if (cid.Length == 0)
                {
                    continue;
                }

                cidsInReactTable.AddNewOrOld(cid);
                string peptide = datareader.GetString(indexPeptide).Trim();
                double amount  = datareader.GetDouble(indexMagnitude);

                Dictionary <string, double> peptideToAmount = SpecialFunctions.GetValueOrDefault(reactTable, peptide);
                peptideToAmount.Add(cid, amount);
            }

            Console.WriteLine("{0}: number of records read: {1}", "React table", irecord);

            return(reactTable);
        }
        /* From [Microsoft Research]:
         *
         *  - I’ve changed two things wrt prior corrections.  First, I’m computing relative frequencies
         *    across length per HLA rather than per supertype (there was too much variation within
         *    supertype).  Second, the formula that I gave you last was not quite right in that it did not
         *    take into account the denominator of the prior odds term.  Given p_kh, the uncorrected
         *    probability of being an epitope according to the classifier for peptide of length k and
         *    HLA h, the correction is as follows:
         *
         *  log odds  := ln (p_kh/(1-p_kh))
         *  log odds := log odds + ln(  [relFreq_kh/0.25 * (1/100)] / [1 – relFreq_kh/0.25 * (1/100)] )
         *  pk_corrected = exp(log odds) / (1 + exp(log odds))
         *
         *  (Technical notes: In training, we are assuming a prior of 1/100 for each hla and k.
         *   In the data, the prior over hla is not uniform (e.g., there is lots of A02), but we think
         *   this is sampling bias.  That is, we think the prior on being an epitope is roughly'
         *   uniform for each hla.  But, the data is fairly unbiased wrt prior on epitope of length
         *   k reacting, given HLA.  That is, biologists were looking at particular HLAs, but they
         *   then found the optimal length for the epitope, giving an unbiased view of which lengths
         *   react with which HLAs.  Thus, for every HLA, we should correct the prior as a function
         *   of length.  We used to correct by supertype, but I’m seeing too much variation within
         *   a given supertype.  To help with smoothing, I’m using a Dirichlet(1,1,1,1) prior.
         *   Dividing each relFreq by 0.25 in the above formula guarantees that the overall prior is
         *   still 1/100.)
         *
         *
         *  From: [Microsoft Research]
         *  Sent: Thursday, July 27, 2006 4:25 PM
         *
         *
         *      As we discussed, I would like to write out the weight of evidence for the epitope rather
         *      than its posterior probability.  This is logOdds minus the prior (which is implicitly 1/100
         *      in our training data).
         *
         *      The formula for weight of evidence is (assuming 4 values of K, and 99 negatives per positive)
         *
         *          priorLogOddsOfThisLengthAndHla = LogOdds((relFreq/.25) * .01);
         *          originalLogOdds = LogOdds(originalP);
         *          correctedLogOdds = originalLogOdds + priorLogOddsOfThisLengthAndHla;
         *          weightofEvidence = correctedLogOdds – LogOdds(0.01);
         *
         */
        private void CreateKToHlaToPriorLogOdds()
        {
            KToHlaToPriorLogOdds = new Dictionary <int, Dictionary <Hla, double> >();
            _hlaSet = new Set <Hla>();
            HlaFactory hlaFactory = HlaFactory.GetFactory("MixedWithB15AndA68");

            _supertypeMap = new Dictionary <string, Set <Hla> >();


            Dictionary <Hla, Dictionary <int, int> > hlaToLengthToLengthToSmoothedCount = CreateHlaToLengthToLengthToSmoothedCount();

            foreach (Hla hla in hlaToLengthToLengthToSmoothedCount.Keys)
            {
                _hlaSet.AddNewOrOld(hla);

                Dictionary <int, int> lengthToSmoothedCount = hlaToLengthToLengthToSmoothedCount[hla];
                int smoothedTotal = ComputeSmoothedTotal(lengthToSmoothedCount);

                for (int k = (int)MerLength.firstLength; k <= (int)MerLength.lastLength; ++k)
                {
                    AddToHlaToPriorLogOdds(hla, lengthToSmoothedCount, smoothedTotal, k);
                }

                AddToSupertypeMap(hla);
            }

            AssertThatEveryKHasEveryHla();
        }
Esempio n. 4
0
        private static Dictionary <string, Dictionary <string, int> > CreateMerStringToOriginalAA0PositionToCount(int merLength, TextWriter textWriterForWarnings, Dictionary <string, AASeq> caseToCompressedAASeq)
        {
            Dictionary <string, Dictionary <string, int> > merStringToOriginalAA0PositionToCount = new Dictionary <string, Dictionary <string, int> >();

            foreach (string caseId in caseToCompressedAASeq.Keys)
            {
                AASeq aaSeq = caseToCompressedAASeq[caseId];

                Set <string> SeenIt = new Set <string>();
                foreach (AASeq mer in aaSeq.SubSeqEnumeration(merLength))
                {
                    if (mer.Ambiguous)
                    {
                        continue;
                    }

                    string merString = mer.ToString();
                    if (SeenIt.Contains(merString))
                    {
                        textWriterForWarnings.WriteLine("Warning: Mer '{0}' appears again in case '{1}'", merString, caseId);
                    }
                    SeenIt.AddNewOrOld(merString);

                    string originalAA1Position = mer.OriginalAA1Position(0);

                    Dictionary <string, int> originalAA0PositionToCount = merStringToOriginalAA0PositionToCount.GetValueOrDefault(merString);
                    originalAA0PositionToCount[originalAA1Position] = 1 + originalAA0PositionToCount.GetValueOrDefault(originalAA1Position);
                }
            }
            return(merStringToOriginalAA0PositionToCount);
        }
        private void AddToSupertypeMap(Hla hla)
        {
            string supertypeAny = SetSupertypeAny(hla, HasBlanks);

            if (supertypeAny != "unknown" && supertypeAny != "none") //!!!"unknown" is a misnomer. Should be "none" or null, but don't want to change it because it is already in useful models.
            {
                Set <Hla> hlaSet = SpecialFunctions.GetValueOrDefault(_supertypeMap, supertypeAny);
                hlaSet.AddNewOrOld(hla);
            }
        }
Esempio n. 6
0
        //internal static TrueCollection GetInstanceX(IEnumerable<string> hlaCollection, Random random)
        //{
        //    TrueCollection aTrueCollection = new TrueCollection();
        //    foreach (string hla in hlaCollection)
        //    {
        //        if (random.Next(2) == 0)
        //        {
        //            aTrueCollection.Add(hla);
        //        }
        //    }
        //    return aTrueCollection;
        //}

        public Set <Hla> CreateHlaAssignmentAsSet()
        {
            Set <Hla> hlaAssignmentAsSet = Set <Hla> .GetInstance();

            foreach (Hla hla in this)
            {
                hlaAssignmentAsSet.AddNewOrOld(hla);
            }
            return(hlaAssignmentAsSet);
        }
Esempio n. 7
0
        //public bool IsUsingOriginalPositions()
        //{
        //    return _originalAA0PositionTableOrNull == null;
        //}

        internal IEnumerable <string> EveryProtein()
        {
            Set <string> proteinSet = Set <string> .GetInstance();

            foreach (string pos in _originalAA1PositionTableOrNull)
            {
                string protein = pos.Split('@')[0];
                proteinSet.AddNewOrOld(protein);
            }
            return(proteinSet);
        }
Esempio n. 8
0
        private void ReadPatientTable()
        {
            Qmrr.HlaFactory hlaFactory = Qmrr.HlaFactory.GetFactory("noConstraint");


            PatientList = new Dictionary <string, Set <Hla> >();
            foreach (Dictionary <string, string> row in SpecialFunctions.TabFileTable(PatientFileName, "pid	a1	a2	b1	b2	c1	c2", false))                        //!!!const
            {
                string    patientId = row["pid"];
                Set <Hla> hlaList   = new Set <Hla>();
                foreach (string columnName in new string[] { "a1", "a2", "b1", "b2", "c1", "c2" })                //!!!const
                {
                    hlaList.AddNewOrOld(hlaFactory.GetGroundInstance(row[columnName]));
                }
                PatientList.Add(patientId, hlaList);
            }
        }
Esempio n. 9
0
        private void CreateSwitchableHlasWithRespondingPatients()
        {
            Set <Hla> hlaSet = Set <Hla> .GetInstance();

            foreach (string patient in PatientToAnyReaction.Keys)
            {
                if (PatientList.ContainsKey(patient))
                {
                    foreach (Hla hla in PatientList[patient])
                    {
                        if (!hlaSet.Contains(hla))
                        {
                            hlaSet.AddNewOrOld(hla);
                        }
                    }
                }
            }
            SwitchableHlasOfRespondingPatients = new List <Hla>(hlaSet);
        }
        public static Dictionary <string, Dictionary <Hla, bool> > CreatePatientToHlaToYesNoDontKnow(HlaResolution hlaResolution, List <Dictionary <string, string> > expandedTable, string header, IEnumerable <Hla> hlaList)
        {
            Dictionary <string, Dictionary <Hla, bool> > patientToHlaToYesNoDontKnow = new Dictionary <string, Dictionary <Hla, bool> >();

            foreach (Hla hla in hlaList)
            {
                Dictionary <string, Set <bool> > patientToSetOfHasHlaValues = new Dictionary <string, Set <bool> >();

                foreach (Dictionary <string, string> row in expandedTable)
                {
                    bool?hasHlaOrNull = HasHla(hla, row, hlaResolution);
                    if (hasHlaOrNull != null)
                    {
                        string     patient           = row["patient"];
                        Set <bool> setOfHasHlaValues = SpecialFunctions.GetValueOrDefault(patientToSetOfHasHlaValues, patient);
                        setOfHasHlaValues.AddNewOrOld((bool)hasHlaOrNull);
                    }
                    else
                    {
                        SpecialFunctions.CheckCondition(!row.ContainsKey("weight") || double.Parse(row["weight"]) == 1);
                    }
                }

                foreach (string patient in patientToSetOfHasHlaValues.Keys)
                {
                    Set <bool> setOfHasHlaValues = patientToSetOfHasHlaValues[patient];
                    if (setOfHasHlaValues.Count == 1)
                    {
                        foreach (bool hasHlaOrNull in setOfHasHlaValues)
                        {
                            Dictionary <Hla, bool> hlaToYesNoDontKnow = SpecialFunctions.GetValueOrDefault(patientToHlaToYesNoDontKnow, patient);
                            hlaToYesNoDontKnow.Add(hla, (bool)hasHlaOrNull);
                        }
                    }
                    else
                    {
                        //Debug.WriteLine(string.Format("For patient {0} and hla {1}, skipping because of ambiguious data", patient, hla));
                    }
                }
            }
            return(patientToHlaToYesNoDontKnow);
        }
        //!!!this could be made faster by keeping track of patients with no abstract hlas
        private Dictionary <string, Set <Hla> > CreatePidToHlaSetCustom(Dictionary <string, Set <Hla> > pidToHlaSetAll, Set <Hla> bestHlaSetSoFar, Hla hla, Set <Hla> knownHlaSet
                                                                        //out Dictionary<string, Set<Hla>> pidToHlaSetCustom,
                                                                        //out Dictionary<string, Dictionary<string, double>> reactTableCustom
                                                                        )
        {
            Set <Hla> possibleCauses = bestHlaSetSoFar.Union(knownHlaSet);

            possibleCauses.AddNewOrOld(hla);

#if DEBUG
            foreach (Hla hlaPossibleCause in possibleCauses)
            {
                Debug.Assert(hlaPossibleCause.IsGround); // real assert
            }
#endif

            Dictionary <string, Set <Hla> > pidToHlaSetCustom = new Dictionary <string, Set <Hla> >();
            //reactTableCustom = new Dictionary<string, Dictionary<string, double>>();

            foreach (string pid in pidToHlaSetAll.Keys)
            {
                Set <Hla> patientHlaSet = pidToHlaSetAll[pid];


                //bestSoFar/known Hla PidContains ExcludePid?
                //B23   B25 B15??   No
                //B23   B1511   B15??   Yes

                if (!ThisPatientContainsAnAbstractHlaThatGeneralizesAPossibleCause(patientHlaSet, possibleCauses))
                {
                    pidToHlaSetCustom.Add(pid, patientHlaSet);
                    // reactTableCustom.Add(pid, ReactTableUnfiltered[pid]);
                }
                else
                {
                    //Debug.WriteLine(SpecialFunctions.CreateTabString(patientHlaSet, possibleCauses));
                }
            }

            return(pidToHlaSetCustom);
        }
Esempio n. 12
0
        internal void SetPeptideToFitUniverse(string dataset)
        {
            Qmrr.HlaFactory hlaFactory = Qmrr.HlaFactory.GetFactory("noConstraint");

            PeptideToFitUniverse = new Dictionary <string, Set <Hla> >();
            string filename = dataset + "supertypefit.txt";
            string line     = null;

            //!!!would be nice to read as a tab table, to remove redundent lines, to check that HLAs are of the right form
            using (StreamReader streamReader = File.OpenText(filename))
            {
                while (null != (line = streamReader.ReadLine()))
                {
                    string[] fields = line.Split('\t');
                    SpecialFunctions.CheckCondition(fields.Length == 2);
                    string    peptide     = fields[0];
                    Hla       hla         = hlaFactory.GetGroundInstance(fields[1]);
                    Set <Hla> fitUniverse = SpecialFunctions.GetValueOrDefault(PeptideToFitUniverse, peptide);
                    fitUniverse.AddNewOrOld(hla);
                }
            }
        }
Esempio n. 13
0
        internal Set <Hla> CreateUnivariateHlaSet(double pValueCutOff, Dictionary <string, Set <Hla> > pidToHlaSet, string peptide)
        {
            Set <Hla> univariateHlaSet = Set <Hla> .GetInstance();

            foreach (Hla hla in HlaUniverse)
            {
                int[,] fourCounts = new int[2, 2]; //C# init's to 0's
                foreach (string pid in pidToHlaSet.Keys)
                {
                    bool hasHla    = pidToHlaSet[pid].Contains(hla);
                    bool doesReact = ReactTableUnfiltered[peptide].ContainsKey(pid);
                    ++fourCounts[hasHla ? 1 : 0, doesReact ? 1 : 0];
                }

                double pValue = SpecialFunctions.FisherExactTest(fourCounts);

                if (pValue <= pValueCutOff)
                {
                    univariateHlaSet.AddNewOrOld(hla);
                }
            }
            return(univariateHlaSet);
        }
Esempio n. 14
0
        protected Set <Hla> HlaSetFromReactingPatients(Dictionary <string, Set <Hla> > pidToHlaSet, string peptide)
        {
            Dictionary <string, double> pidToReactValue = ReactTableUnfiltered[peptide];

            Set <string> patientsInHlaFile = Set <string> .GetInstance(pidToHlaSet.Keys);

            Set <string> patientsInReactFile = Set <string> .GetInstance(pidToReactValue.Keys);

            Set <string> commonPatients = patientsInReactFile.Intersection(patientsInHlaFile);

            Set <Hla> reactingPatientsHlas = Set <Hla> .GetInstance();

            foreach (string pid in commonPatients)
            {
                foreach (Hla hla in pidToHlaSet[pid])
                {
                    if (hla.IsGround)
                    {
                        reactingPatientsHlas.AddNewOrOld(hla);
                    }
                }
            }
            return(reactingPatientsHlas.Subtract(KnownTable(peptide)));
        }
Esempio n. 15
0
        private static Set <int> CreateTabulateReportInternal(
            string inputFilePattern,
            KeepTest <Dictionary <string, string> > keepTest,
            double maxPValue,
            bool auditRowIndexValues,
            ref List <Dictionary <string, string> > realRowCollectionToSort,
            ref List <double> nullValueCollectionToBeSorted,
            ref string headerSoFar)
        {
            Set <int> nullIndexSet = Set <int> .GetInstance();

            //!!!very similar code elsewhere
            RowIndexTabulator rowIndexTabulator = RowIndexTabulator.GetInstance(auditRowIndexValues);

            //RangeCollection unfilteredRowIndexRangeCollection = RangeCollection.GetInstance();

            foreach (string fileName in Directory.GetFiles(Directory.GetCurrentDirectory(), inputFilePattern))
            {
                Debug.WriteLine(fileName);
                string headerOnFile;
                bool   firstRow = true;
                foreach (Dictionary <string, string> row in SpecialFunctions.TabFileTable(fileName, /*includeWholeLine*/ true, out headerOnFile))
                {
                    if (firstRow)
                    {
                        firstRow = false;
                        if (headerSoFar == null)
                        {
                            headerSoFar = headerOnFile;
                        }
                        else if (headerSoFar != headerOnFile)
                        {
                            Console.WriteLine("Warning: The header for file {0} is different from the 1st file read in", fileName);
                        }
                    }

                    if (rowIndexTabulator.TryAdd(row, fileName) && keepTest.Test(row))
                    {
                        //int unfilteredRowIndex = ReadUnfilteredRowIndexButIfMissingUseRowIndex(row, rowIndex);

                        //unfilteredRowIndexRangeCollection.Add(unfilteredRowIndex);

                        SpecialFunctions.CheckCondition(row.ContainsKey(NullIndexColumnName), string.Format(@"When tabulating a ""{0}"" column is required. (File ""{1}"")", NullIndexColumnName, fileName));

                        int nullIndex = int.Parse(row[NullIndexColumnName]);
                        nullIndexSet.AddNewOrOld(nullIndex);

                        double pValue = AccessPValueFromPhylotreeRow(row);
                        //if (double.IsNaN(pValue))
                        //{
                        //    pValue = 1;
                        //    row["PValue"] = "1";
                        //}
                        if (pValue <= maxPValue)
                        {
                            if (nullIndex == -1)
                            {
                                realRowCollectionToSort.Add(row);
                            }
                            else
                            {
                                nullValueCollectionToBeSorted.Add(pValue);
                            }
                        }
                    }
                }
            }

            rowIndexTabulator.CheckIsComplete(inputFilePattern);

            return(nullIndexSet);
        }
Esempio n. 16
0
        public IEnumerable <string> SparseLineEnumeration(bool keepOneValueVariables)
        {
            if (_caseIdToAASeq.Count == 0)
            {
                Debug.Assert(SequenceLengthOrNull == null); // real assert
                yield break;
            }
            Helper.CheckCondition(SequenceLengthOrNull != null, "This converter to sparse assumes all sequences have the same length");

            /*
             * n1pos	aa	pid	val
             * 880	A	3	F
             * 880	A	5	F
             * 880	A	9	F
             * 880	A	13	F
             * 880	A	14	F
             * 880	A	15	T
             * ...
             */


            for (int aa0Pos = 0; aa0Pos < (int)SequenceLengthOrNull; ++aa0Pos)
            {
                Set <char> everyAminoAcid = EveryAminoAcid(aa0Pos);
                if (!keepOneValueVariables && everyAminoAcid.Count == 1)
                {
                    continue;
                }

                string posName = null;
                foreach (char aa in everyAminoAcid)
                {
                    Set <bool> valueSet = Set <bool> .GetInstance();

                    Dictionary <string, bool> caseToVal = new Dictionary <string, bool>();
                    foreach (string caseId in _caseIdToAASeq.Keys)
                    {
                        AASeq aaSeq = _caseIdToAASeq[caseId];

                        if (aa0Pos >= aaSeq.Count)
                        {
                            continue;
                        }

                        //Helper.CheckCondition(aaSeq.IsUsingOriginalPositions(), "This converter to sparse assumes all sequences are using their original positions");
                        Set <char> strainAASet = aaSeq[aa0Pos];
                        if (posName == null)
                        {
                            posName = aaSeq.OriginalAA1Position(aa0Pos);
                            //if (posName.Contains("68.3B"))
                            //{
                            //    Console.WriteLine("Found it first");
                            //}
                        }
                        else
                        {
                            Helper.CheckCondition(posName == aaSeq.OriginalAA1Position(aa0Pos));
                        }
                        // missing: e.g.  A/Any   or   A/AB
                        // 1: e.g. A/A
                        // 0: e.g. A/B	or  A/BCD
                        if (strainAASet.Equals(AASeq.Any))
                        {
                            //Do nothing - missing
                        }
                        else if (strainAASet.Contains(aa))
                        {
                            if (strainAASet.Count > 1)
                            {
                                switch (aaSeq.MixtureSemantics)
                                {
                                case MixtureSemantics.Pure:
                                    caseToVal.Add(caseId, false);
                                    valueSet.AddNewOrOld(false);
                                    break;

                                case MixtureSemantics.Uncertainty:
                                    // Do nothing = missing
                                    break;

                                case MixtureSemantics.Any:
                                    caseToVal.Add(caseId, true);
                                    valueSet.AddNewOrOld(true);
                                    break;

                                default:
                                    Helper.CheckCondition(false, "Unknown mixturesemantics " + aaSeq.MixtureSemantics.ToString());
                                    break;
                                }
                            }
                            else
                            {
                                caseToVal.Add(caseId, true);
                                valueSet.AddNewOrOld(true);
                            }
                        }
                        else
                        {
                            caseToVal.Add(caseId, false);
                            valueSet.AddNewOrOld(false);
                        }
                    }
                    Helper.CheckCondition(posName != null);
                    if (keepOneValueVariables || valueSet.Count == 2)
                    {
                        foreach (KeyValuePair <string, bool> caseIdAndVal in caseToVal)
                        {
                            string variableName = string.Format("{0}@{1}", posName, aa);
                            //string variableName = string.Format("{1}@{0}", posName, aa);
                            //if (variableName.Contains("68.3B"))
                            //{
                            //    Console.WriteLine("Found it first");
                            //}
                            yield return(Helper.CreateTabString(
                                             variableName, caseIdAndVal.Key, caseIdAndVal.Value ? 1 : 0));
                        }
                    }
                }
            }
        }
        public IEnumerable <string> SparseLineEnumeration(bool keepOneValueVariables)
        {
            if (_caseIdToAASeq.Count == 0)
            {
                Debug.Assert(SequenceLength == null); // real assert
                yield break;
            }
            SpecialFunctions.CheckCondition(SequenceLength != null, "This converter to sparse assumes all sequences have the same length");

            /*
             * n1pos	aa	pid	val
             * 880	A	3	F
             * 880	A	5	F
             * 880	A	9	F
             * 880	A	13	F
             * 880	A	14	F
             * 880	A	15	T
             * ...
             */


            for (int aa0Pos = 0; aa0Pos < (int)SequenceLength; ++aa0Pos)
            {
                Set <char> everyAminoAcid = EveryAminoAcid(aa0Pos);
                if (!keepOneValueVariables && everyAminoAcid.Count == 1)
                {
                    continue;
                }

                string posName = null;
                foreach (char aa in everyAminoAcid)
                {
                    Set <bool> valueSet = Set <bool> .GetInstance();

                    Dictionary <string, bool> caseToVal = new Dictionary <string, bool>();
                    foreach (string caseId in _caseIdToAASeq.Keys)
                    {
                        AASeq aaSeq = _caseIdToAASeq[caseId];
                        //SpecialFunctions.CheckCondition(aaSeq.IsUsingOriginalPositions(), "This converter to sparse assumes all sequences are using their original positions");
                        Set <char> strainAASet = aaSeq[aa0Pos];
                        if (posName == null)
                        {
                            posName = aaSeq.OriginalAA1Position(aa0Pos);
                        }
                        else
                        {
                            SpecialFunctions.CheckCondition(posName == aaSeq.OriginalAA1Position(aa0Pos));
                        }
                        // missing: e.g.  A/Any   or   A/AB
                        // 1: e.g. A/A
                        // 0: e.g. A/B    or  A/BCD
                        if (strainAASet.Equals(AASeq.Any))
                        {
                            //Do nothing - missing
                        }
                        else if (strainAASet.Contains(aa))
                        {
                            if (strainAASet.Count > 1)
                            {
                                if (aaSeq.Mixture)
                                {
                                    caseToVal.Add(caseId, false);
                                    valueSet.AddNewOrOld(false);
                                }
                                else
                                {
                                    // Do nothing = missing
                                }
                            }
                            else
                            {
                                caseToVal.Add(caseId, true);
                                valueSet.AddNewOrOld(true);
                            }
                        }
                        else
                        {
                            caseToVal.Add(caseId, false);
                            valueSet.AddNewOrOld(false);
                        }
                    }
                    SpecialFunctions.CheckCondition(posName != null);
                    if (keepOneValueVariables || valueSet.Count == 2)
                    {
                        foreach (KeyValuePair <string, bool> caseIdAndVal in caseToVal)
                        {
                            //string variableName = string.Format("{0}@{1}", posName, aa);
                            string variableName = string.Format("{1}@{0}", posName, aa);
                            yield return(SpecialFunctions.CreateTabString(
                                             variableName, caseIdAndVal.Key, caseIdAndVal.Value ? 1 : 0));
                        }
                    }
                }
            }
        }