예제 #1
0
        public async Task PValueMainline()
        {
            SequenceMetadata item1 = await
                                     FastALookupCient.LookupByAccessionIdAsync("Q10574");

            SequenceMetadata item2 = await
                                     FastALookupCient.LookupByAccessionIdAsync("P15172");

            var localAlignmentImpl = new SmithWatermanImplementation(
                sequenceTomatch: item1,
                targetSequence:  item2,
                scoreProvider: new SimpleScoreProvider(),
                gapOpenPenality: 1);

            var pvalueCalculator = new PValueCalculator <SmithWatermanImplementation>
                                   (
                alignmentImpl: localAlignmentImpl,
                permutationLimit: 10
                                   );

            string result = await pvalueCalculator.CalculatePValueAsync();

            Assert.IsNotNull(result);

            Console.WriteLine("--Calculated P value--");
            Console.WriteLine(result);
        }
예제 #2
0
        internal long Increase(SequenceMetadata sequence, object data)
        {
            if (this.Sequence == null)
            {
                throw new InvalidOperationException($"Missing required sequence of the '{this.Name}' DataAccess.");
            }

            return(((DataSequence)this.Sequence).Increase(sequence, data));
        }
예제 #3
0
            public long Increase(SequenceMetadata sequence, object data)
            {
                if (sequence == null)
                {
                    throw new ArgumentNullException(nameof(sequence));
                }

                return(_sequence.Increment(this.GetSequenceKey(sequence, data), sequence.Interval, sequence.Seed));
            }
예제 #4
0
 /// <summary>
 /// Creates a new instance of SmithWatermanImplementation
 /// </summary>
 /// <param name="sequenceTomatch">Sequence to match</param>
 /// <param name="targetSequence">Sequence to match against.</param>
 public SmithWatermanImplementation(
     SequenceMetadata sequenceTomatch,
     SequenceMetadata targetSequence,
     AlignmentScoreProviderBase scoreProvider,
     int gapOpenPenality) :
     base(sequenceTomatch, targetSequence, scoreProvider, gapOpenPenality)
 {
     this.IntializeSubstitutionMatrix();
 }
예제 #5
0
        internal void SetSequence(string sequence)
        {
            if (string.IsNullOrWhiteSpace(sequence))
            {
                return;
            }

            _sequence = SequenceMetadata.Parse(sequence, (name, seed, interval, references) => new SequenceMetadata(this, name, GetSeed(seed), interval, references));
        }
예제 #6
0
        public static short GetSequenceIdBySequenceName(string actorId, string sequenceName)
        {
            List <SequenceMetadata> sequences = GetSequencesByActorId(actorId);
            SequenceMetadata        metadata  = sequences.FirstOrDefault(s => s.SequenceName == sequenceName);

            if (metadata != default)
            {
                return(metadata.SequenceId);
            }

            return(0);
        }
예제 #7
0
        /// <summary>
        /// Creates instance of algorithm and scores the inputs.
        /// </summary>
        Task <int> FindScoreAsync(SequenceMetadata sequenceToMatch, SequenceMetadata targetSequence)
        {
            var algorithm =
                (T)Activator.CreateInstance(
                    typeof(T),
                    sequenceToMatch,
                    targetSequence,
                    this.alignmentImpl.ScoreProvider,
                    this.alignmentImpl.GapOpenPenality);

            AlignmentImplementationResults results = algorithm.FindOptimalAlignment();

            return(Task.FromResult <int>(results.AlignmentScore));
        }
예제 #8
0
        private void Initialize()
        {
            bool retValue = RegisterWithMessageMediator();

            if (_sequence.Metadata.ContainsKey(SequenceMetadata.SequenceMetadataLabel))
            {
                _metadata = (SequenceMetadata)_sequence.Metadata[SequenceMetadata.SequenceMetadataLabel];
            }
            else //We'll add metadata to the sequence.
            {
                _metadata = new SequenceMetadata();
                _sequence.Metadata.Add(SequenceMetadata.SequenceMetadataLabel, _metadata);
            }
        }
예제 #9
0
        /// <summary>
        /// Scans the reference sequence list and returns the specified sequence metadata if found
        /// TODO: create lookup table to make this faster?
        /// </summary>
        public bool TryGetSequence(string sequenceName, out SequenceMetadata foundSequence)
        {
            foreach (SequenceMetadata sequence in Sequences)
            {
                if (string.Equals(sequence.Name, sequenceName, StringComparison.OrdinalIgnoreCase))
                {
                    foundSequence = sequence;
                    return(true);
                }
            }

            foundSequence = null;
            return(false);
        }
예제 #10
0
        public bool Map()
        {
            if (MappedAlignment == null)
            {
                return(false);
            }
            try
            {
                rCADDataContext dc = CreateDataContext();
                NextSeqID   = dc.NextSeqIDs.Select(row => row.SeqID).First();
                AlignmentID = dc.NextAlnIDs.Select(row => row.AlnID).First();
                NextAlnID   = AlignmentID + 1;

                AlignmentSeqTypeID = dc.SequenceTypes.Where(row => row.MoleculeType.Equals(_alignment.MoleculeType) && row.GeneName.Equals(_alignment.GeneName) &&
                                                            row.GeneType.Equals(_alignment.GeneType)).First().SeqTypeID;

                var seqToTaxID = (from seq in _alignment.Sequences
                                  join taxonomyNameRow in dc.TaxonomyNames
                                  on((SequenceMetadata)seq.Metadata[SequenceMetadata.SequenceMetadataLabel]).ScientificName equals taxonomyNameRow.ScientificName
                                  select new { seq.ID, taxonomyNameRow.TaxID }).ToDictionary(match => match.ID, match => match.TaxID);

                int rootTaxID = (from taxname in dc.TaxonomyNames
                                 where taxname.ScientificName.Equals("root")
                                 select taxname.TaxID).First();

                ExtentTypeIDs = (from bar in dc.SecondaryStructureExtentTypes
                                 select new { bar.ExtentTypeID, bar.ExtentType }).ToDictionary(match => match.ExtentType, match => match.ExtentTypeID);

                foreach (var sequence in _alignment.Sequences)
                {
                    SequenceMetadata    metadata = (SequenceMetadata)sequence.Metadata[SequenceMetadata.SequenceMetadataLabel];
                    SequenceMappingData data     = new SequenceMappingData();
                    data.SeqID      = NextSeqID;
                    data.TaxID      = seqToTaxID.ContainsKey(sequence.ID) ? seqToTaxID[sequence.ID] : rootTaxID; //The sequence is mapped to the root of the Taxonomy tree if we don't have mapping info.
                    data.LocationID = dc.CellLocationInfos.Where(row => row.Description.Equals(metadata.LocationDescription)).First().LocationID;
                    sequence.Metadata.Add(rCADMappingData, data);
                    NextSeqID++;
                }
                dc.Connection.Close();
                MappedSuccessfully = true;
                return(true);
            }
            catch
            {
                return(false);
            }
        }
예제 #11
0
        public async Task SmithWatermanP15172ToP17542WithBlosum62ScoringScheme()
        {
            SequenceMetadata item1 = await
                                     FastALookupCient.LookupByAccessionIdAsync("P10085");

            SequenceMetadata item2 = await
                                     FastALookupCient.LookupByAccessionIdAsync("P15172");

            var localAlignmentImpl = new SmithWatermanImplementation(
                sequenceTomatch: item1,
                targetSequence: item2,
                scoreProvider: new Blosum62ScoreProvider(),
                gapOpenPenality: 4);

            //var localAlignmentImpl = new SmithWatermanImplementation(
            //    sequenceTomatch: "KEVLAR",
            //    targetSequence: "KNIEVIL",
            //    scoreProvider: new Blosum62ScoreProvider(),
            //    gapOpenPenality: 4);

            AlignmentImplementationResults result
                = localAlignmentImpl.FindOptimalAlignment();

            Console.WriteLine("--Optimal Alignment--");
            Console.WriteLine(result.TargetSequenceAlignment);
            Console.WriteLine(result.SearchSequenceAlignment);

            Console.WriteLine("--Optimal Score--");
            Console.WriteLine(result.AlignmentScore);

            var pvalueCalculator = new PValueCalculator <SmithWatermanImplementation>
                                   (
                alignmentImpl: localAlignmentImpl,
                permutationLimit: 1
                                   );

            string pValue = await pvalueCalculator.CalculatePValueAsync();

            Assert.IsNotNull(result);

            Console.WriteLine("--Calculated P value--");
            Console.WriteLine(pValue);

            Console.WriteLine(result.PrettyPrint());
            Console.WriteLine(result.PrettyPrintScoreMatrix());
        }
예제 #12
0
        protected override List <AnimationMetadata> Parse()
        {
            List <AnimationMetadata> animations = new List <AnimationMetadata>();

            foreach (PackFileEntry entry in Resources.XmlFiles)
            {
                if (!entry.Name.StartsWith("anikeytext"))
                {
                    continue;
                }

                XmlDocument document = Resources.XmlMemFile.GetDocument(entry.FileHeader);
                foreach (XmlNode animationNode in document.DocumentElement.ChildNodes)
                {
                    AnimationMetadata metadata = new AnimationMetadata();

                    if (animationNode.Name == "kfm")
                    {
                        metadata.ActorId = animationNode.Attributes["name"].Value;
                    }
                    foreach (XmlNode sequenceNode in animationNode)
                    {
                        if (sequenceNode.Name != "seq")
                        {
                            continue;
                        }

                        SequenceMetadata sequence = new SequenceMetadata();
                        sequence.SequenceId   = short.Parse(sequenceNode.Attributes["id"].Value);
                        sequence.SequenceName = sequenceNode.Attributes["name"].Value;
                        foreach (XmlNode keyNode in sequenceNode)
                        {
                            KeyMetadata key = new KeyMetadata();
                            key.KeyName = keyNode.Attributes["name"].Value;
                            key.KeyTime = float.Parse(keyNode.Attributes["time"].Value);
                            sequence.Keys.Add(key);
                        }
                        metadata.Sequence.Add(sequence);
                    }
                    animations.Add(metadata);
                }
            }
            return(animations);
        }
예제 #13
0
            private string GetSequenceKey(string key, out SequenceMetadata sequence)
            {
                sequence = null;

                if (string.IsNullOrEmpty(key))
                {
                    throw new ArgumentNullException(nameof(key));
                }

                var    index = key.LastIndexOfAny(new[] { ':', '.', '@' });
                object data  = null;

                if (index > 0 && key[index] == '@')
                {
                    data  = key.Substring(index + 1).Split(',', '|', '-');
                    index = key.LastIndexOfAny(new[] { ':', '.' }, index);
                }

                if (index < 0)
                {
                    throw new ArgumentException($"Invalid sequence key, the sequence key must separate the entity name and property name with a colon or a dot.");
                }

                if (!_provider.Metadata.Entities.TryGet(key.Substring(0, index), out var entity))
                {
                    throw new ArgumentException($"The '{key.Substring(0, index)}' entity specified in the sequence key does not exist.");
                }

                if (!entity.Properties.TryGet(key.Substring(index + 1), out var found) || found.IsComplex)
                {
                    throw new ArgumentException($"The '{key.Substring(index + 1)}' property specified in the sequence key does not exist or is not a simplex property.");
                }

                sequence = ((IEntitySimplexPropertyMetadata)found).Sequence;

                if (sequence == null)
                {
                    throw new ArgumentException($"The '{found.Name}' property specified in the sequence key is undefined.");
                }

                return(this.GetSequenceKey(sequence, data));
            }
예제 #14
0
        public void FastAParserMainline()
        {
            string testString = @"sp|P15172|MYOD1_HUMAN Myoblast determination protein 1 OS=H**o sapiens GN=MYOD1 PE=1 SV=3
MELLSPPLRDVDLTAPDGSLCSFATTDDFYDDPCFDSPDLRFFEDLDPRLMHVGALLKPE
EHSHFPAAVHPAPGAREDEHVRAPSGHHQAGRCLLWACKACKRKTTNADRRKAATMRERR
RLSKVNEAFETLKRCTSSNPNQRLPKVEILRNAIRYIEGLQALLRDQDAAPPGAAAAFYA
PGPLPPGRGGEHYSGDSDASSPRSNCSDGMMDYSGPPSGARRRNCYEGAYYNEAPSEPRP
GKSAAVSSLDCLSSIVERISTESPAAPALLLADVPSESPPRRQEAAAPSEGESSGDPTQS
PDAAPQCPAGANPNPIYQVL";

            SequenceMetadata item = FastAParser.ParseString(testString);

            Assert.AreEqual("P15172", item.AccessionId, "AccessionId mismatch");
            Assert.AreEqual(
                "MYOD1_HUMAN Myoblast determination protein 1 OS=H**o sapiens GN=MYOD1 PE=1 SV=3",
                item.Description,
                "AccessionId mismatch");
            Assert.IsTrue(
                item.Sequence.Contains("MELLSPPLRDVDLTAPDGSLCSFATTDDFYDDPCFDSPDLRFFEDLDPRLMHVGALLKPEEHSHFPAAVHPAPGAREDEHVRAPSGHHQAGRCLLWACKACKRKTTNADRRKAATMRERR"),
                "Sequence is wrong");
        }
예제 #15
0
        public async Task ViterbiGCPatchTenRuns()
        {
            var gcPatchParameters  = new GCPatchParameters();
            SequenceMetadata item1 = await
                                     FastALookupCient.LookupByAccessionIdAsync("GCF_000091665.1_ASM9166v1_genomic");

            var viterbigcPatch           = new ViterbiImpl(gcPatchParameters, input: item1.Sequence);
            List <ViterbiResult> results = viterbigcPatch.ExecuteViterbiAndTrain(executionCount: 10);

            for (int i = 0; i < 9; i++)
            {
                Console.WriteLine("Iteration {0}", i + 1);
                Console.WriteLine("---------------------------------------------------------------------------");
                Console.WriteLine(results[i].PrettyPrint(interestedStateIndex: 1, numberOfHits: 5));
            }

            Console.WriteLine("Iteration {0}", 10);
            Console.WriteLine("---------------------------------------------------------------------------");
            Console.WriteLine(results[9].PrettyPrintAllHits(interestedStateIndex: 1));

            Assert.IsNotNull(results);
        }
예제 #16
0
        public async Task SmithWatermanImplementationMainlineWithSimpleScoringScheme()
        {
            SequenceMetadata item1 = await
                                     FastALookupCient.LookupByAccessionIdAsync("Q10574");

            SequenceMetadata item2 = await
                                     FastALookupCient.LookupByAccessionIdAsync("P15172");

            var localAlignmentImpl = new SmithWatermanImplementation(
                sequenceTomatch: item1,
                targetSequence: item2,
                scoreProvider: new SimpleScoreProvider(),
                gapOpenPenality: 1);

            AlignmentImplementationResults result
                = localAlignmentImpl.FindOptimalAlignment();

            Console.WriteLine("--Optimal Alignment--");
            Console.WriteLine(result.TargetSequenceAlignment);
            Console.WriteLine(result.SearchSequenceAlignment);

            Console.WriteLine("--Optimal Score--");
            Console.WriteLine(result.AlignmentScore);

            Assert.AreEqual(expected: 19,
                            actual: result.AlignmentScore,
                            message: "Mismatching alignment scores");

            Assert.AreEqual(
                expected: "VE-IL-RNA-IRY-I-E-GL-QA-LL-RDQD",
                actual: result.TargetSequenceAlignment,
                message: "Mismatching target alignment sequence");

            Assert.AreEqual(
                expected: "-FE-TL-QMA-QKY-I-E-CL-SQ-IL-KQD",
                actual: result.SearchSequenceAlignment,
                message: "Mismatching target alignment sequence");
        }
        /// <summary>
        /// Creates a new instance alignment algorithm.
        /// </summary>
        public AlignmentImplementationBase(
            SequenceMetadata sequenceTomatch,
            SequenceMetadata targetSequence,
            AlignmentScoreProviderBase scoreProvider, int gapOpenPenality)
        {
            if (sequenceTomatch == null)
            {
                throw new ArgumentNullException("sequenceTomatch");
            }
            if (targetSequence == null)
            {
                throw new ArgumentNullException("targetSequence");
            }
            if (scoreProvider == null)
            {
                throw new ArgumentNullException("scoreProvider");
            }

            this.TargetSequence  = targetSequence;
            this.SequenceToMatch = sequenceTomatch;
            this.ScoreProvider   = scoreProvider;
            this.GapOpenPenality = gapOpenPenality;
        }
예제 #18
0
        public async Task ViterbiDiceRollTenRuns()
        {
            var diceRollParams     = new DiceRollParameters();
            SequenceMetadata item1 = await
                                     FastALookupCient.LookupByAccessionIdAsync("DiceRoll");

            var viterbiDiceRoll          = new ViterbiImpl(diceRollParams, input: item1.Sequence);
            List <ViterbiResult> results = viterbiDiceRoll.ExecuteViterbiAndTrain(executionCount: 10);

            for (int i = 0; i < 9; i++)
            {
                Console.WriteLine("Iteration {0}", i + 1);
                Console.WriteLine("---------------------------------------------------------------------------");
                Console.WriteLine(results[i].PrettyPrint(interestedStateIndex: 1, numberOfHits: 5));
                Console.WriteLine(results[i].StateTransitionRepresentaton);
            }

            Console.WriteLine("Iteration {0}", 10);
            Console.WriteLine("---------------------------------------------------------------------------");
            Console.WriteLine(results[9].PrettyPrintAllHits(interestedStateIndex: 1));
            Console.WriteLine(results[9].StateTransitionRepresentaton);

            Assert.IsNotNull(results);
        }
예제 #19
0
        /// <summary>
        /// Populates the genome metadata from an XML file
        /// </summary>
        public void Deserialize(string inputFilename)
        {
            // open the XML file
            inputFilename = Path.GetFullPath(inputFilename);
            string directory = Path.GetDirectoryName(inputFilename);

            Length     = 0;
            KnownBases = 0; // initial
            int refIndex = 0;
            IGenomesReferencePath iGenomesReference = IGenomesReferencePath.GetReferenceFromFastaPath(directory);

            // use StreamReader to avoid URI parsing of filename that will cause problems with
            // certain characters in the path (#).
            using (var xmlReader = XmlReader.Create(new StreamReader(inputFilename)))
            {
                while (xmlReader.Read())
                {
                    XmlNodeType nType = xmlReader.NodeType;

                    // handle
                    if (nType == XmlNodeType.Element)
                    {
                        // retrieve the genome variables
                        if (xmlReader.Name == "sequenceSizes")
                        {
                            Name = xmlReader.GetAttribute("genomeName");
                            if (iGenomesReference != null && string.IsNullOrEmpty(Name))
                            {
                                Name = iGenomesReference.ToString();
                            }
                        }

                        // retrieve the chromosome variables
                        if (xmlReader.Name == "chromosome")
                        {
                            SequenceMetadata refSeq = new SequenceMetadata
                            {
                                FastaPath = Path.Combine(directory, xmlReader.GetAttribute("fileName")),
                                Name      = xmlReader.GetAttribute("contigName"),
                                Index     = refIndex++,
                                Length    = long.Parse(xmlReader.GetAttribute("totalBases")),
                                Type      = ParseSequenceType(xmlReader.GetAttribute("type"))
                            };
                            Length += refSeq.Length;

                            refSeq.Build   = xmlReader.GetAttribute("build");
                            refSeq.Species = xmlReader.GetAttribute("species");

                            // update species and build from fasta path if in iGenomes format
                            if (iGenomesReference != null)
                            {
                                if (string.IsNullOrEmpty(refSeq.Build))
                                {
                                    refSeq.Build = iGenomesReference.Build;
                                }
                                if (string.IsNullOrEmpty(refSeq.Species))
                                {
                                    refSeq.Species = iGenomesReference.Species;
                                }
                            }

                            string isCircular = xmlReader.GetAttribute("isCircular");
                            if (!string.IsNullOrEmpty(isCircular))
                            {
                                refSeq.IsCircular = (isCircular == "true");
                            }

                            string ploidy = xmlReader.GetAttribute("ploidy");
                            if (!string.IsNullOrEmpty(ploidy))
                            {
                                refSeq.Ploidy = int.Parse(ploidy);
                            }

                            string md5 = xmlReader.GetAttribute("md5");
                            if (!string.IsNullOrEmpty(md5))
                            {
                                refSeq.Checksum = md5;
                            }

                            string knownBases = xmlReader.GetAttribute("knownBases");
                            if (!string.IsNullOrEmpty(knownBases))
                            {
                                refSeq.KnownBases = long.Parse(knownBases);
                                KnownBases       += refSeq.KnownBases;
                            }

                            Sequences.Add(refSeq);
                        }
                    }
                }
            }
        }
예제 #20
0
            private string GetSequenceKey(SequenceMetadata sequence, object data)
            {
                var key = SEQUENCE_KEY + sequence.Property.Entity.Name + "." + sequence.Property.Name;

                if (sequence.References != null && sequence.References.Length > 0)
                {
                    if (data == null)
                    {
                        throw new InvalidOperationException($"Missing required references data for the '{sequence.Name}' sequence.");
                    }

                    var    index = 0;
                    object value = null;

                    foreach (var reference in sequence.References)
                    {
                        switch (data)
                        {
                        case IEntity entity:
                            if (!entity.TryGetValue(reference.Name, out value) || value == null)
                            {
                                throw new InvalidOperationException($"The required '{reference.Name}' reference of sequence is not included in the data.");
                            }

                            break;

                        case IDictionary <string, object> genericDictionary:
                            if (!genericDictionary.TryGetValue(reference.Name, out value) || value == null)
                            {
                                throw new InvalidOperationException($"The required '{reference.Name}' reference of sequence is not included in the data.");
                            }

                            break;

                        case IDictionary classicDictionary:
                            if (!classicDictionary.Contains(reference.Name) || value == null)
                            {
                                throw new InvalidOperationException($"The required '{reference.Name}' reference of sequence is not included in the data.");
                            }

                            break;

                        default:
                            if (Zongsoft.Common.TypeExtension.IsScalarType(data.GetType()))
                            {
                                if (data.GetType().IsArray)
                                {
                                    value = ((Array)data).GetValue(index) ?? throw new InvalidOperationException($"The required '{reference.Name}' reference of sequence is not included in the data.");
                                }
                                else
                                {
                                    value = data.ToString();
                                }
                            }
                            else
                            {
                                if (Reflection.Reflector.GetValue(data, reference.Name) == null)
                                {
                                    throw new InvalidOperationException($"The required '{reference.Name}' reference of sequence is not included in the data.");
                                }
                            }

                            break;
                        }

                        if (index++ == 0)
                        {
                            key += ":";
                        }
                        else
                        {
                            key += "-";
                        }

                        key += value.ToString().Trim();
                    }
                }

                return(key);
            }
예제 #21
0
        static void Main(string[] args)
        {
            string outputLocation      = @"c:\temp\output_jeeshn.txt";
            var    proteinAccessionIds = new string[]
            {
                "P15172",
                "P17542",
                "P10085",
                "P16075",
                "P13904",
                "Q90477",
                "Q8IU24",
                "P22816",
                "Q10574",
                "O95363"
            };

            // Input data for a simple sequence
            var item1 = new SequenceMetadata
            {
                AccessionId = "X1",
                Sequence    = "deadly"
            };

            var item2 = new SequenceMetadata
            {
                AccessionId = "X2",
                Sequence    = "ddgearlyk"
            };

            using (var fileStream = File.Open(outputLocation, FileMode.Create))
                using (var streamWriter = new StreamWriter(fileStream))
                {
                    // Run the local alignment .
                    var localAlignmentImpl = new SmithWatermanImplementation(
                        sequenceTomatch: item1,
                        targetSequence: item2,
                        scoreProvider: new Blosum62ScoreProvider(),
                        gapOpenPenality: 4);

                    AlignmentImplementationResults result
                        = localAlignmentImpl.FindOptimalAlignment();

                    // "Capture" output
                    WriteToConsoleAndFile(
                        String.Format("{0} vs {1}", item1.AccessionId, item2.AccessionId), streamWriter);
                    WriteToConsoleAndFile("Alignment Score", streamWriter);
                    WriteToConsoleAndFile(result.AlignmentScore.ToString(), streamWriter);

                    WriteToConsoleAndFile("Alignment", streamWriter);
                    WriteToConsoleAndFile(result.PrettyPrint(), streamWriter);

                    WriteToConsoleAndFile("Score Matrix", streamWriter);
                    WriteToConsoleAndFile(result.PrettyPrintScoreMatrix(), streamWriter);

                    // P-value calculation
                    var pvalueCalculator = new PValueCalculator <SmithWatermanImplementation>
                                           (
                        alignmentImpl: localAlignmentImpl,
                        permutationLimit: 999
                                           );

                    //Print p values
                    string pValue = pvalueCalculator.CalculatePValueAsync().Result;
                    WriteToConsoleAndFile("Empirical p-value", streamWriter);
                    WriteToConsoleAndFile(pValue, streamWriter);
                    WriteToConsoleAndFile(String.Empty, streamWriter);

                    int[,] scoreMatrix = new int[10, 10];

                    for (int i = 0; i < proteinAccessionIds.Length; i++)
                    {
                        for (int j = 0; j < proteinAccessionIds.Length; j++)
                        {
                            // no need to compare same sequences
                            if (i != j)
                            {
                                item1 = FastALookupCient.LookupByAccessionIdAsync(proteinAccessionIds[i]).Result;

                                item2 = FastALookupCient.LookupByAccessionIdAsync(proteinAccessionIds[j]).Result;

                                localAlignmentImpl = new SmithWatermanImplementation(
                                    sequenceTomatch: item1,
                                    targetSequence: item2,
                                    scoreProvider: new Blosum62ScoreProvider(),
                                    gapOpenPenality: 4);

                                result = localAlignmentImpl.FindOptimalAlignment();

                                // "Capture" output
                                WriteToConsoleAndFile(
                                    String.Format("{0} vs {1}", item1.AccessionId, item2.AccessionId), streamWriter);
                                WriteToConsoleAndFile("Alignment Score", streamWriter);
                                WriteToConsoleAndFile(result.AlignmentScore.ToString(), streamWriter);

                                WriteToConsoleAndFile("Alignment", streamWriter);
                                WriteToConsoleAndFile(result.PrettyPrint(), streamWriter);

                                scoreMatrix[i, j] = result.AlignmentScore;

                                if (item1.AccessionId == "P15172" &&
                                    (item2.AccessionId == "Q10574" || item2.AccessionId == "O95363"))
                                {
                                    pvalueCalculator = new PValueCalculator <SmithWatermanImplementation>
                                                       (
                                        alignmentImpl: localAlignmentImpl,
                                        permutationLimit: 999
                                                       );

                                    //Print p values
                                    pValue = pvalueCalculator.CalculatePValueAsync().Result;
                                    WriteToConsoleAndFile("Empirical p-value", streamWriter);
                                    WriteToConsoleAndFile(pValue, streamWriter);
                                    WriteToConsoleAndFile(String.Empty, streamWriter);
                                }
                            }
                        }
                    }

                    var stringBuilder = new StringBuilder();
                    for (int i = 0; i <= scoreMatrix.GetUpperBound(0); i++)
                    {
                        for (int j = 0; j <= scoreMatrix.GetUpperBound(1); j++)
                        {
                            if (j >= i)
                            {
                                stringBuilder.Append(scoreMatrix[i, j].ToString().PadRight(6, ' '));
                            }
                            else
                            {
                                stringBuilder.Append("0".PadRight(6, ' '));
                            }
                        }
                        stringBuilder.AppendLine();
                    }

                    WriteToConsoleAndFile("Protein scoring matrix", streamWriter);
                    WriteToConsoleAndFile(stringBuilder.ToString(), streamWriter);

                    streamWriter.Flush();
                    fileStream.Flush();
                }
            Console.ReadLine();
        }
예제 #22
0
        private void CreateAlignmentImportFiles()
        {
            StreamWriter alignmentcolumn             = File.CreateText(_alignmentColumnFile);
            StreamWriter alignment                   = File.CreateText(_alignmentFile);
            StreamWriter alignmentsequence           = File.CreateText(_alignmentSequenceDataFile);
            StreamWriter alignmentdata               = File.CreateText(_alignmentDataFile);
            StreamWriter sequencemain                = File.CreateText(_sequenceMainFile);
            StreamWriter sequenceaccession           = File.CreateText(_sequenceAccessionFile);
            StreamWriter secondarystructurebasepairs = File.CreateText(_secondaryStructureBasePairsFile);
            StreamWriter secondarystructureextents   = File.CreateText(_secondaryStructureExtentsFile);

            //rCAD.Alignment: Entry in the alignment table for the new alignment
            alignment.WriteLine("{0}\t|\t{1}\t|\t{2}\t|\t{3}", _data.AlignmentID, _data.AlignmentSeqTypeID, _data.MappedAlignment.LogicalName, _data.MappedAlignment.Columns + 1);
            alignment.Flush();
            alignment.Close();

            //rCAD.AlignmentColumn: We have a 1 to 1 mapping of the logical and physical column numbers at the start
            for (int i = 0; i < _data.MappedAlignment.Columns; i++)
            {
                alignmentcolumn.WriteLine("{0}\t|\t{1}\t|\t{2}", _data.AlignmentID, i + 1, i + 1);
            }
            alignmentcolumn.Flush();
            alignmentcolumn.Close();

            //rCAD.AlignmentSequence, rCAD.AlignmentData, rCAD.SequenceMain, rCAD.SequenceAccession written on a per-sequence basis
            //We will do duplicate checking inside the database.
            foreach (var sequence in _data.MappedAlignment.Sequences)
            {
                SequenceMetadata    metadata        = (SequenceMetadata)sequence.Metadata[SequenceMetadata.SequenceMetadataLabel];
                SequenceMappingData mappingMetadata = (SequenceMappingData)sequence.Metadata[Mapper.rCADMappingData];
                int seqLengthMetadata = metadata.SequenceLength;
                int firstNtColNum     = -1;
                int lastNtColNumber   = -1;
                int sequenceIndex     = 1;

                for (int i = 0; i < sequence.Count; i++)
                {
                    if (!sequence[i].IsGap) //We are only actually writing the non-gap positions.
                    {
                        alignmentdata.WriteLine("{0}\t|\t{1}\t|\t{2}\t|\t{3}\t|\t{4}", mappingMetadata.SeqID, _data.AlignmentID, i + 1, sequence[i].Symbol, sequenceIndex);
                        sequenceIndex++;
                        if (firstNtColNum < 0)
                        {
                            firstNtColNum = i + 1;
                        }

                        //KJD, 1/21/2010 - A nasty little bug right here where lastNtColNumber = i + 0 SHOULD be lastNtColNumber = i + 1!
                        if (firstNtColNum > 0)
                        {
                            lastNtColNumber = i + 1; //We just set the last col num value to the last column with data we've seen.
                        }
                        //lastNtColNumber = i + 0;
                    }
                }

                if ((sequenceIndex - 1) != seqLengthMetadata)
                {
                    Console.WriteLine("Warning: Existing metadata for SeqLength ({0}) does not match number of observed nt ({1}) for {2}", seqLengthMetadata, sequenceIndex - 1, sequence.ID);
                }

                sequencemain.WriteLine("{0}\t|\t{1}\t|\t{2}\t|\t{3}\t|\t{4}\t|\t\t|\t",
                                       mappingMetadata.SeqID, mappingMetadata.TaxID, mappingMetadata.LocationID, _data.AlignmentSeqTypeID, seqLengthMetadata);
                alignmentsequence.WriteLine("{0}\t|\t{1}\t|\t{2}\t|\t{3}\t|\t{4}", mappingMetadata.SeqID, _data.AlignmentID, metadata.AlignmentRowName, firstNtColNum, lastNtColNumber);

                foreach (var gbentry in metadata.Accessions)
                {
                    sequenceaccession.WriteLine("{0}\t|\t{1}\t|\t{2}", mappingMetadata.SeqID, gbentry.Accession, gbentry.Version);
                }

                if (metadata.StructureModel != null && metadata.StructureModel.Pairs.Count() > 0)
                {
                    foreach (int fivePrime in metadata.StructureModel.Pairs.Keys)
                    {
                        secondarystructurebasepairs.WriteLine("{0}\t|\t{1}\t|\t{2}\t|\t{3}", mappingMetadata.SeqID, _data.AlignmentID, fivePrime,
                                                              metadata.StructureModel.Pairs[fivePrime]);
                    }

                    int extentID = 1;
                    foreach (var helix in metadata.StructureModel.Helices)
                    {
                        secondarystructureextents.WriteLine("{0}\t|\t{1}\t|\t{2}\t|\t{3}\t|\t{4}\t|\t{5}\t|\t{6}", mappingMetadata.SeqID, _data.AlignmentID,
                                                            extentID, 1, helix.FivePrimeStart, helix.FivePrimeEnd, _data.ExtentTypeIDs["Helix"]);
                        secondarystructureextents.WriteLine("{0}\t|\t{1}\t|\t{2}\t|\t{3}\t|\t{4}\t|\t{5}\t|\t{6}", mappingMetadata.SeqID, _data.AlignmentID,
                                                            extentID, 2, helix.ThreePrimeStart, helix.ThreePrimeEnd, _data.ExtentTypeIDs["Helix"]);
                        extentID++;
                    }

                    foreach (var hairpinloop in metadata.StructureModel.Hairpins)
                    {
                        secondarystructureextents.WriteLine("{0}\t|\t{1}\t|\t{2}\t|\t{3}\t|\t{4}\t|\t{5}\t|\t{6}", mappingMetadata.SeqID, _data.AlignmentID,
                                                            extentID, 1, hairpinloop.Loop.LoopStart, hairpinloop.Loop.LoopEnd, _data.ExtentTypeIDs["Hairpin Loop"]);
                        extentID++;
                    }

                    foreach (var internalloop in metadata.StructureModel.Internals)
                    {
                        secondarystructureextents.WriteLine("{0}\t|\t{1}\t|\t{2}\t|\t{3}\t|\t{4}\t|\t{5}\t|\t{6}", mappingMetadata.SeqID, _data.AlignmentID,
                                                            extentID, 1, internalloop.FivePrimeLoop.LoopStart, internalloop.FivePrimeLoop.LoopEnd, _data.ExtentTypeIDs["Internal Loop"]);
                        secondarystructureextents.WriteLine("{0}\t|\t{1}\t|\t{2}\t|\t{3}\t|\t{4}\t|\t{5}\t|\t{6}", mappingMetadata.SeqID, _data.AlignmentID,
                                                            extentID, 2, internalloop.ThreePrimeLoop.LoopStart, internalloop.ThreePrimeLoop.LoopEnd, _data.ExtentTypeIDs["Internal Loop"]);
                        extentID++;
                    }

                    foreach (var bulgeloop in metadata.StructureModel.Bulges)
                    {
                        secondarystructureextents.WriteLine("{0}\t|\t{1}\t|\t{2}\t|\t{3}\t|\t{4}\t|\t{5}\t|\t{6}", mappingMetadata.SeqID, _data.AlignmentID,
                                                            extentID, 1, bulgeloop.Bulge.LoopStart, bulgeloop.Bulge.LoopEnd, _data.ExtentTypeIDs["Bulge Loop"]);
                        extentID++;
                    }

                    foreach (var multistemloop in metadata.StructureModel.Stems)
                    {
                        int stemordinal = 1;
                        foreach (var stem in multistemloop.Segments)
                        {
                            secondarystructureextents.WriteLine("{0}\t|\t{1}\t|\t{2}\t|\t{3}\t|\t{4}\t|\t{5}\t|\t{6}", mappingMetadata.SeqID, _data.AlignmentID,
                                                                extentID, stemordinal, stem.LoopStart, stem.LoopEnd, _data.ExtentTypeIDs["Multistem Loop"]);
                            stemordinal++;
                        }
                        extentID++;
                    }

                    foreach (var free in metadata.StructureModel.Strands)
                    {
                        secondarystructureextents.WriteLine("{0}\t|\t{1}\t|\t{2}\t|\t{3}\t|\t{4}\t|\t{5}\t|\t{6}", mappingMetadata.SeqID, _data.AlignmentID,
                                                            extentID, 1, free.LoopStart, free.LoopEnd, _data.ExtentTypeIDs["Free"]);
                        extentID++;
                    }

                    foreach (var tail in metadata.StructureModel.Tails)
                    {
                        secondarystructureextents.WriteLine("{0}\t|\t{1}\t|\t{2}\t|\t{3}\t|\t{4}\t|\t{5}\t|\t{6}", mappingMetadata.SeqID, _data.AlignmentID,
                                                            extentID, 1, tail.LoopStart, tail.LoopEnd, _data.ExtentTypeIDs["Tail"]);
                        extentID++;
                    }

                    foreach (var knot in metadata.StructureModel.KnottedHelices)
                    {
                        secondarystructureextents.WriteLine("{0}\t|\t{1}\t|\t{2}\t|\t{3}\t|\t{4}\t|\t{5}\t|\t{6}", mappingMetadata.SeqID, _data.AlignmentID,
                                                            extentID, 1, knot.FivePrimeStart, knot.FivePrimeEnd, _data.ExtentTypeIDs["Pseudoknot Helix"]);
                        secondarystructureextents.WriteLine("{0}\t|\t{1}\t|\t{2}\t|\t{3}\t|\t{4}\t|\t{5}\t|\t{6}", mappingMetadata.SeqID, _data.AlignmentID,
                                                            extentID, 2, knot.ThreePrimeStart, knot.ThreePrimeEnd, _data.ExtentTypeIDs["Pseudoknot Helix"]);
                        extentID++;
                    }
                }
            }
            sequencemain.Flush();
            sequencemain.Close();

            alignmentsequence.Flush();
            alignmentsequence.Close();

            alignmentdata.Flush();
            alignmentdata.Close();

            sequenceaccession.Flush();
            sequenceaccession.Close();

            secondarystructurebasepairs.Flush();
            secondarystructurebasepairs.Close();

            secondarystructureextents.Flush();
            secondarystructureextents.Close();
        }