public async Task PValueMainline() { SequenceMetadata item1 = await FastALookupCient.LookupByAccessionIdAsync("Q10574"); SequenceMetadata item2 = await FastALookupCient.LookupByAccessionIdAsync("P15172"); var localAlignmentImpl = new SmithWatermanImplementation( sequenceTomatch: item1, targetSequence: item2, scoreProvider: new SimpleScoreProvider(), gapOpenPenality: 1); var pvalueCalculator = new PValueCalculator <SmithWatermanImplementation> ( alignmentImpl: localAlignmentImpl, permutationLimit: 10 ); string result = await pvalueCalculator.CalculatePValueAsync(); Assert.IsNotNull(result); Console.WriteLine("--Calculated P value--"); Console.WriteLine(result); }
internal long Increase(SequenceMetadata sequence, object data) { if (this.Sequence == null) { throw new InvalidOperationException($"Missing required sequence of the '{this.Name}' DataAccess."); } return(((DataSequence)this.Sequence).Increase(sequence, data)); }
public long Increase(SequenceMetadata sequence, object data) { if (sequence == null) { throw new ArgumentNullException(nameof(sequence)); } return(_sequence.Increment(this.GetSequenceKey(sequence, data), sequence.Interval, sequence.Seed)); }
/// <summary> /// Creates a new instance of SmithWatermanImplementation /// </summary> /// <param name="sequenceTomatch">Sequence to match</param> /// <param name="targetSequence">Sequence to match against.</param> public SmithWatermanImplementation( SequenceMetadata sequenceTomatch, SequenceMetadata targetSequence, AlignmentScoreProviderBase scoreProvider, int gapOpenPenality) : base(sequenceTomatch, targetSequence, scoreProvider, gapOpenPenality) { this.IntializeSubstitutionMatrix(); }
internal void SetSequence(string sequence) { if (string.IsNullOrWhiteSpace(sequence)) { return; } _sequence = SequenceMetadata.Parse(sequence, (name, seed, interval, references) => new SequenceMetadata(this, name, GetSeed(seed), interval, references)); }
public static short GetSequenceIdBySequenceName(string actorId, string sequenceName) { List <SequenceMetadata> sequences = GetSequencesByActorId(actorId); SequenceMetadata metadata = sequences.FirstOrDefault(s => s.SequenceName == sequenceName); if (metadata != default) { return(metadata.SequenceId); } return(0); }
/// <summary> /// Creates instance of algorithm and scores the inputs. /// </summary> Task <int> FindScoreAsync(SequenceMetadata sequenceToMatch, SequenceMetadata targetSequence) { var algorithm = (T)Activator.CreateInstance( typeof(T), sequenceToMatch, targetSequence, this.alignmentImpl.ScoreProvider, this.alignmentImpl.GapOpenPenality); AlignmentImplementationResults results = algorithm.FindOptimalAlignment(); return(Task.FromResult <int>(results.AlignmentScore)); }
private void Initialize() { bool retValue = RegisterWithMessageMediator(); if (_sequence.Metadata.ContainsKey(SequenceMetadata.SequenceMetadataLabel)) { _metadata = (SequenceMetadata)_sequence.Metadata[SequenceMetadata.SequenceMetadataLabel]; } else //We'll add metadata to the sequence. { _metadata = new SequenceMetadata(); _sequence.Metadata.Add(SequenceMetadata.SequenceMetadataLabel, _metadata); } }
/// <summary> /// Scans the reference sequence list and returns the specified sequence metadata if found /// TODO: create lookup table to make this faster? /// </summary> public bool TryGetSequence(string sequenceName, out SequenceMetadata foundSequence) { foreach (SequenceMetadata sequence in Sequences) { if (string.Equals(sequence.Name, sequenceName, StringComparison.OrdinalIgnoreCase)) { foundSequence = sequence; return(true); } } foundSequence = null; return(false); }
public bool Map() { if (MappedAlignment == null) { return(false); } try { rCADDataContext dc = CreateDataContext(); NextSeqID = dc.NextSeqIDs.Select(row => row.SeqID).First(); AlignmentID = dc.NextAlnIDs.Select(row => row.AlnID).First(); NextAlnID = AlignmentID + 1; AlignmentSeqTypeID = dc.SequenceTypes.Where(row => row.MoleculeType.Equals(_alignment.MoleculeType) && row.GeneName.Equals(_alignment.GeneName) && row.GeneType.Equals(_alignment.GeneType)).First().SeqTypeID; var seqToTaxID = (from seq in _alignment.Sequences join taxonomyNameRow in dc.TaxonomyNames on((SequenceMetadata)seq.Metadata[SequenceMetadata.SequenceMetadataLabel]).ScientificName equals taxonomyNameRow.ScientificName select new { seq.ID, taxonomyNameRow.TaxID }).ToDictionary(match => match.ID, match => match.TaxID); int rootTaxID = (from taxname in dc.TaxonomyNames where taxname.ScientificName.Equals("root") select taxname.TaxID).First(); ExtentTypeIDs = (from bar in dc.SecondaryStructureExtentTypes select new { bar.ExtentTypeID, bar.ExtentType }).ToDictionary(match => match.ExtentType, match => match.ExtentTypeID); foreach (var sequence in _alignment.Sequences) { SequenceMetadata metadata = (SequenceMetadata)sequence.Metadata[SequenceMetadata.SequenceMetadataLabel]; SequenceMappingData data = new SequenceMappingData(); data.SeqID = NextSeqID; data.TaxID = seqToTaxID.ContainsKey(sequence.ID) ? seqToTaxID[sequence.ID] : rootTaxID; //The sequence is mapped to the root of the Taxonomy tree if we don't have mapping info. data.LocationID = dc.CellLocationInfos.Where(row => row.Description.Equals(metadata.LocationDescription)).First().LocationID; sequence.Metadata.Add(rCADMappingData, data); NextSeqID++; } dc.Connection.Close(); MappedSuccessfully = true; return(true); } catch { return(false); } }
public async Task SmithWatermanP15172ToP17542WithBlosum62ScoringScheme() { SequenceMetadata item1 = await FastALookupCient.LookupByAccessionIdAsync("P10085"); SequenceMetadata item2 = await FastALookupCient.LookupByAccessionIdAsync("P15172"); var localAlignmentImpl = new SmithWatermanImplementation( sequenceTomatch: item1, targetSequence: item2, scoreProvider: new Blosum62ScoreProvider(), gapOpenPenality: 4); //var localAlignmentImpl = new SmithWatermanImplementation( // sequenceTomatch: "KEVLAR", // targetSequence: "KNIEVIL", // scoreProvider: new Blosum62ScoreProvider(), // gapOpenPenality: 4); AlignmentImplementationResults result = localAlignmentImpl.FindOptimalAlignment(); Console.WriteLine("--Optimal Alignment--"); Console.WriteLine(result.TargetSequenceAlignment); Console.WriteLine(result.SearchSequenceAlignment); Console.WriteLine("--Optimal Score--"); Console.WriteLine(result.AlignmentScore); var pvalueCalculator = new PValueCalculator <SmithWatermanImplementation> ( alignmentImpl: localAlignmentImpl, permutationLimit: 1 ); string pValue = await pvalueCalculator.CalculatePValueAsync(); Assert.IsNotNull(result); Console.WriteLine("--Calculated P value--"); Console.WriteLine(pValue); Console.WriteLine(result.PrettyPrint()); Console.WriteLine(result.PrettyPrintScoreMatrix()); }
protected override List <AnimationMetadata> Parse() { List <AnimationMetadata> animations = new List <AnimationMetadata>(); foreach (PackFileEntry entry in Resources.XmlFiles) { if (!entry.Name.StartsWith("anikeytext")) { continue; } XmlDocument document = Resources.XmlMemFile.GetDocument(entry.FileHeader); foreach (XmlNode animationNode in document.DocumentElement.ChildNodes) { AnimationMetadata metadata = new AnimationMetadata(); if (animationNode.Name == "kfm") { metadata.ActorId = animationNode.Attributes["name"].Value; } foreach (XmlNode sequenceNode in animationNode) { if (sequenceNode.Name != "seq") { continue; } SequenceMetadata sequence = new SequenceMetadata(); sequence.SequenceId = short.Parse(sequenceNode.Attributes["id"].Value); sequence.SequenceName = sequenceNode.Attributes["name"].Value; foreach (XmlNode keyNode in sequenceNode) { KeyMetadata key = new KeyMetadata(); key.KeyName = keyNode.Attributes["name"].Value; key.KeyTime = float.Parse(keyNode.Attributes["time"].Value); sequence.Keys.Add(key); } metadata.Sequence.Add(sequence); } animations.Add(metadata); } } return(animations); }
private string GetSequenceKey(string key, out SequenceMetadata sequence) { sequence = null; if (string.IsNullOrEmpty(key)) { throw new ArgumentNullException(nameof(key)); } var index = key.LastIndexOfAny(new[] { ':', '.', '@' }); object data = null; if (index > 0 && key[index] == '@') { data = key.Substring(index + 1).Split(',', '|', '-'); index = key.LastIndexOfAny(new[] { ':', '.' }, index); } if (index < 0) { throw new ArgumentException($"Invalid sequence key, the sequence key must separate the entity name and property name with a colon or a dot."); } if (!_provider.Metadata.Entities.TryGet(key.Substring(0, index), out var entity)) { throw new ArgumentException($"The '{key.Substring(0, index)}' entity specified in the sequence key does not exist."); } if (!entity.Properties.TryGet(key.Substring(index + 1), out var found) || found.IsComplex) { throw new ArgumentException($"The '{key.Substring(index + 1)}' property specified in the sequence key does not exist or is not a simplex property."); } sequence = ((IEntitySimplexPropertyMetadata)found).Sequence; if (sequence == null) { throw new ArgumentException($"The '{found.Name}' property specified in the sequence key is undefined."); } return(this.GetSequenceKey(sequence, data)); }
public void FastAParserMainline() { string testString = @"sp|P15172|MYOD1_HUMAN Myoblast determination protein 1 OS=H**o sapiens GN=MYOD1 PE=1 SV=3 MELLSPPLRDVDLTAPDGSLCSFATTDDFYDDPCFDSPDLRFFEDLDPRLMHVGALLKPE EHSHFPAAVHPAPGAREDEHVRAPSGHHQAGRCLLWACKACKRKTTNADRRKAATMRERR RLSKVNEAFETLKRCTSSNPNQRLPKVEILRNAIRYIEGLQALLRDQDAAPPGAAAAFYA PGPLPPGRGGEHYSGDSDASSPRSNCSDGMMDYSGPPSGARRRNCYEGAYYNEAPSEPRP GKSAAVSSLDCLSSIVERISTESPAAPALLLADVPSESPPRRQEAAAPSEGESSGDPTQS PDAAPQCPAGANPNPIYQVL"; SequenceMetadata item = FastAParser.ParseString(testString); Assert.AreEqual("P15172", item.AccessionId, "AccessionId mismatch"); Assert.AreEqual( "MYOD1_HUMAN Myoblast determination protein 1 OS=H**o sapiens GN=MYOD1 PE=1 SV=3", item.Description, "AccessionId mismatch"); Assert.IsTrue( item.Sequence.Contains("MELLSPPLRDVDLTAPDGSLCSFATTDDFYDDPCFDSPDLRFFEDLDPRLMHVGALLKPEEHSHFPAAVHPAPGAREDEHVRAPSGHHQAGRCLLWACKACKRKTTNADRRKAATMRERR"), "Sequence is wrong"); }
public async Task ViterbiGCPatchTenRuns() { var gcPatchParameters = new GCPatchParameters(); SequenceMetadata item1 = await FastALookupCient.LookupByAccessionIdAsync("GCF_000091665.1_ASM9166v1_genomic"); var viterbigcPatch = new ViterbiImpl(gcPatchParameters, input: item1.Sequence); List <ViterbiResult> results = viterbigcPatch.ExecuteViterbiAndTrain(executionCount: 10); for (int i = 0; i < 9; i++) { Console.WriteLine("Iteration {0}", i + 1); Console.WriteLine("---------------------------------------------------------------------------"); Console.WriteLine(results[i].PrettyPrint(interestedStateIndex: 1, numberOfHits: 5)); } Console.WriteLine("Iteration {0}", 10); Console.WriteLine("---------------------------------------------------------------------------"); Console.WriteLine(results[9].PrettyPrintAllHits(interestedStateIndex: 1)); Assert.IsNotNull(results); }
public async Task SmithWatermanImplementationMainlineWithSimpleScoringScheme() { SequenceMetadata item1 = await FastALookupCient.LookupByAccessionIdAsync("Q10574"); SequenceMetadata item2 = await FastALookupCient.LookupByAccessionIdAsync("P15172"); var localAlignmentImpl = new SmithWatermanImplementation( sequenceTomatch: item1, targetSequence: item2, scoreProvider: new SimpleScoreProvider(), gapOpenPenality: 1); AlignmentImplementationResults result = localAlignmentImpl.FindOptimalAlignment(); Console.WriteLine("--Optimal Alignment--"); Console.WriteLine(result.TargetSequenceAlignment); Console.WriteLine(result.SearchSequenceAlignment); Console.WriteLine("--Optimal Score--"); Console.WriteLine(result.AlignmentScore); Assert.AreEqual(expected: 19, actual: result.AlignmentScore, message: "Mismatching alignment scores"); Assert.AreEqual( expected: "VE-IL-RNA-IRY-I-E-GL-QA-LL-RDQD", actual: result.TargetSequenceAlignment, message: "Mismatching target alignment sequence"); Assert.AreEqual( expected: "-FE-TL-QMA-QKY-I-E-CL-SQ-IL-KQD", actual: result.SearchSequenceAlignment, message: "Mismatching target alignment sequence"); }
/// <summary> /// Creates a new instance alignment algorithm. /// </summary> public AlignmentImplementationBase( SequenceMetadata sequenceTomatch, SequenceMetadata targetSequence, AlignmentScoreProviderBase scoreProvider, int gapOpenPenality) { if (sequenceTomatch == null) { throw new ArgumentNullException("sequenceTomatch"); } if (targetSequence == null) { throw new ArgumentNullException("targetSequence"); } if (scoreProvider == null) { throw new ArgumentNullException("scoreProvider"); } this.TargetSequence = targetSequence; this.SequenceToMatch = sequenceTomatch; this.ScoreProvider = scoreProvider; this.GapOpenPenality = gapOpenPenality; }
public async Task ViterbiDiceRollTenRuns() { var diceRollParams = new DiceRollParameters(); SequenceMetadata item1 = await FastALookupCient.LookupByAccessionIdAsync("DiceRoll"); var viterbiDiceRoll = new ViterbiImpl(diceRollParams, input: item1.Sequence); List <ViterbiResult> results = viterbiDiceRoll.ExecuteViterbiAndTrain(executionCount: 10); for (int i = 0; i < 9; i++) { Console.WriteLine("Iteration {0}", i + 1); Console.WriteLine("---------------------------------------------------------------------------"); Console.WriteLine(results[i].PrettyPrint(interestedStateIndex: 1, numberOfHits: 5)); Console.WriteLine(results[i].StateTransitionRepresentaton); } Console.WriteLine("Iteration {0}", 10); Console.WriteLine("---------------------------------------------------------------------------"); Console.WriteLine(results[9].PrettyPrintAllHits(interestedStateIndex: 1)); Console.WriteLine(results[9].StateTransitionRepresentaton); Assert.IsNotNull(results); }
/// <summary> /// Populates the genome metadata from an XML file /// </summary> public void Deserialize(string inputFilename) { // open the XML file inputFilename = Path.GetFullPath(inputFilename); string directory = Path.GetDirectoryName(inputFilename); Length = 0; KnownBases = 0; // initial int refIndex = 0; IGenomesReferencePath iGenomesReference = IGenomesReferencePath.GetReferenceFromFastaPath(directory); // use StreamReader to avoid URI parsing of filename that will cause problems with // certain characters in the path (#). using (var xmlReader = XmlReader.Create(new StreamReader(inputFilename))) { while (xmlReader.Read()) { XmlNodeType nType = xmlReader.NodeType; // handle if (nType == XmlNodeType.Element) { // retrieve the genome variables if (xmlReader.Name == "sequenceSizes") { Name = xmlReader.GetAttribute("genomeName"); if (iGenomesReference != null && string.IsNullOrEmpty(Name)) { Name = iGenomesReference.ToString(); } } // retrieve the chromosome variables if (xmlReader.Name == "chromosome") { SequenceMetadata refSeq = new SequenceMetadata { FastaPath = Path.Combine(directory, xmlReader.GetAttribute("fileName")), Name = xmlReader.GetAttribute("contigName"), Index = refIndex++, Length = long.Parse(xmlReader.GetAttribute("totalBases")), Type = ParseSequenceType(xmlReader.GetAttribute("type")) }; Length += refSeq.Length; refSeq.Build = xmlReader.GetAttribute("build"); refSeq.Species = xmlReader.GetAttribute("species"); // update species and build from fasta path if in iGenomes format if (iGenomesReference != null) { if (string.IsNullOrEmpty(refSeq.Build)) { refSeq.Build = iGenomesReference.Build; } if (string.IsNullOrEmpty(refSeq.Species)) { refSeq.Species = iGenomesReference.Species; } } string isCircular = xmlReader.GetAttribute("isCircular"); if (!string.IsNullOrEmpty(isCircular)) { refSeq.IsCircular = (isCircular == "true"); } string ploidy = xmlReader.GetAttribute("ploidy"); if (!string.IsNullOrEmpty(ploidy)) { refSeq.Ploidy = int.Parse(ploidy); } string md5 = xmlReader.GetAttribute("md5"); if (!string.IsNullOrEmpty(md5)) { refSeq.Checksum = md5; } string knownBases = xmlReader.GetAttribute("knownBases"); if (!string.IsNullOrEmpty(knownBases)) { refSeq.KnownBases = long.Parse(knownBases); KnownBases += refSeq.KnownBases; } Sequences.Add(refSeq); } } } } }
private string GetSequenceKey(SequenceMetadata sequence, object data) { var key = SEQUENCE_KEY + sequence.Property.Entity.Name + "." + sequence.Property.Name; if (sequence.References != null && sequence.References.Length > 0) { if (data == null) { throw new InvalidOperationException($"Missing required references data for the '{sequence.Name}' sequence."); } var index = 0; object value = null; foreach (var reference in sequence.References) { switch (data) { case IEntity entity: if (!entity.TryGetValue(reference.Name, out value) || value == null) { throw new InvalidOperationException($"The required '{reference.Name}' reference of sequence is not included in the data."); } break; case IDictionary <string, object> genericDictionary: if (!genericDictionary.TryGetValue(reference.Name, out value) || value == null) { throw new InvalidOperationException($"The required '{reference.Name}' reference of sequence is not included in the data."); } break; case IDictionary classicDictionary: if (!classicDictionary.Contains(reference.Name) || value == null) { throw new InvalidOperationException($"The required '{reference.Name}' reference of sequence is not included in the data."); } break; default: if (Zongsoft.Common.TypeExtension.IsScalarType(data.GetType())) { if (data.GetType().IsArray) { value = ((Array)data).GetValue(index) ?? throw new InvalidOperationException($"The required '{reference.Name}' reference of sequence is not included in the data."); } else { value = data.ToString(); } } else { if (Reflection.Reflector.GetValue(data, reference.Name) == null) { throw new InvalidOperationException($"The required '{reference.Name}' reference of sequence is not included in the data."); } } break; } if (index++ == 0) { key += ":"; } else { key += "-"; } key += value.ToString().Trim(); } } return(key); }
static void Main(string[] args) { string outputLocation = @"c:\temp\output_jeeshn.txt"; var proteinAccessionIds = new string[] { "P15172", "P17542", "P10085", "P16075", "P13904", "Q90477", "Q8IU24", "P22816", "Q10574", "O95363" }; // Input data for a simple sequence var item1 = new SequenceMetadata { AccessionId = "X1", Sequence = "deadly" }; var item2 = new SequenceMetadata { AccessionId = "X2", Sequence = "ddgearlyk" }; using (var fileStream = File.Open(outputLocation, FileMode.Create)) using (var streamWriter = new StreamWriter(fileStream)) { // Run the local alignment . var localAlignmentImpl = new SmithWatermanImplementation( sequenceTomatch: item1, targetSequence: item2, scoreProvider: new Blosum62ScoreProvider(), gapOpenPenality: 4); AlignmentImplementationResults result = localAlignmentImpl.FindOptimalAlignment(); // "Capture" output WriteToConsoleAndFile( String.Format("{0} vs {1}", item1.AccessionId, item2.AccessionId), streamWriter); WriteToConsoleAndFile("Alignment Score", streamWriter); WriteToConsoleAndFile(result.AlignmentScore.ToString(), streamWriter); WriteToConsoleAndFile("Alignment", streamWriter); WriteToConsoleAndFile(result.PrettyPrint(), streamWriter); WriteToConsoleAndFile("Score Matrix", streamWriter); WriteToConsoleAndFile(result.PrettyPrintScoreMatrix(), streamWriter); // P-value calculation var pvalueCalculator = new PValueCalculator <SmithWatermanImplementation> ( alignmentImpl: localAlignmentImpl, permutationLimit: 999 ); //Print p values string pValue = pvalueCalculator.CalculatePValueAsync().Result; WriteToConsoleAndFile("Empirical p-value", streamWriter); WriteToConsoleAndFile(pValue, streamWriter); WriteToConsoleAndFile(String.Empty, streamWriter); int[,] scoreMatrix = new int[10, 10]; for (int i = 0; i < proteinAccessionIds.Length; i++) { for (int j = 0; j < proteinAccessionIds.Length; j++) { // no need to compare same sequences if (i != j) { item1 = FastALookupCient.LookupByAccessionIdAsync(proteinAccessionIds[i]).Result; item2 = FastALookupCient.LookupByAccessionIdAsync(proteinAccessionIds[j]).Result; localAlignmentImpl = new SmithWatermanImplementation( sequenceTomatch: item1, targetSequence: item2, scoreProvider: new Blosum62ScoreProvider(), gapOpenPenality: 4); result = localAlignmentImpl.FindOptimalAlignment(); // "Capture" output WriteToConsoleAndFile( String.Format("{0} vs {1}", item1.AccessionId, item2.AccessionId), streamWriter); WriteToConsoleAndFile("Alignment Score", streamWriter); WriteToConsoleAndFile(result.AlignmentScore.ToString(), streamWriter); WriteToConsoleAndFile("Alignment", streamWriter); WriteToConsoleAndFile(result.PrettyPrint(), streamWriter); scoreMatrix[i, j] = result.AlignmentScore; if (item1.AccessionId == "P15172" && (item2.AccessionId == "Q10574" || item2.AccessionId == "O95363")) { pvalueCalculator = new PValueCalculator <SmithWatermanImplementation> ( alignmentImpl: localAlignmentImpl, permutationLimit: 999 ); //Print p values pValue = pvalueCalculator.CalculatePValueAsync().Result; WriteToConsoleAndFile("Empirical p-value", streamWriter); WriteToConsoleAndFile(pValue, streamWriter); WriteToConsoleAndFile(String.Empty, streamWriter); } } } } var stringBuilder = new StringBuilder(); for (int i = 0; i <= scoreMatrix.GetUpperBound(0); i++) { for (int j = 0; j <= scoreMatrix.GetUpperBound(1); j++) { if (j >= i) { stringBuilder.Append(scoreMatrix[i, j].ToString().PadRight(6, ' ')); } else { stringBuilder.Append("0".PadRight(6, ' ')); } } stringBuilder.AppendLine(); } WriteToConsoleAndFile("Protein scoring matrix", streamWriter); WriteToConsoleAndFile(stringBuilder.ToString(), streamWriter); streamWriter.Flush(); fileStream.Flush(); } Console.ReadLine(); }
private void CreateAlignmentImportFiles() { StreamWriter alignmentcolumn = File.CreateText(_alignmentColumnFile); StreamWriter alignment = File.CreateText(_alignmentFile); StreamWriter alignmentsequence = File.CreateText(_alignmentSequenceDataFile); StreamWriter alignmentdata = File.CreateText(_alignmentDataFile); StreamWriter sequencemain = File.CreateText(_sequenceMainFile); StreamWriter sequenceaccession = File.CreateText(_sequenceAccessionFile); StreamWriter secondarystructurebasepairs = File.CreateText(_secondaryStructureBasePairsFile); StreamWriter secondarystructureextents = File.CreateText(_secondaryStructureExtentsFile); //rCAD.Alignment: Entry in the alignment table for the new alignment alignment.WriteLine("{0}\t|\t{1}\t|\t{2}\t|\t{3}", _data.AlignmentID, _data.AlignmentSeqTypeID, _data.MappedAlignment.LogicalName, _data.MappedAlignment.Columns + 1); alignment.Flush(); alignment.Close(); //rCAD.AlignmentColumn: We have a 1 to 1 mapping of the logical and physical column numbers at the start for (int i = 0; i < _data.MappedAlignment.Columns; i++) { alignmentcolumn.WriteLine("{0}\t|\t{1}\t|\t{2}", _data.AlignmentID, i + 1, i + 1); } alignmentcolumn.Flush(); alignmentcolumn.Close(); //rCAD.AlignmentSequence, rCAD.AlignmentData, rCAD.SequenceMain, rCAD.SequenceAccession written on a per-sequence basis //We will do duplicate checking inside the database. foreach (var sequence in _data.MappedAlignment.Sequences) { SequenceMetadata metadata = (SequenceMetadata)sequence.Metadata[SequenceMetadata.SequenceMetadataLabel]; SequenceMappingData mappingMetadata = (SequenceMappingData)sequence.Metadata[Mapper.rCADMappingData]; int seqLengthMetadata = metadata.SequenceLength; int firstNtColNum = -1; int lastNtColNumber = -1; int sequenceIndex = 1; for (int i = 0; i < sequence.Count; i++) { if (!sequence[i].IsGap) //We are only actually writing the non-gap positions. { alignmentdata.WriteLine("{0}\t|\t{1}\t|\t{2}\t|\t{3}\t|\t{4}", mappingMetadata.SeqID, _data.AlignmentID, i + 1, sequence[i].Symbol, sequenceIndex); sequenceIndex++; if (firstNtColNum < 0) { firstNtColNum = i + 1; } //KJD, 1/21/2010 - A nasty little bug right here where lastNtColNumber = i + 0 SHOULD be lastNtColNumber = i + 1! if (firstNtColNum > 0) { lastNtColNumber = i + 1; //We just set the last col num value to the last column with data we've seen. } //lastNtColNumber = i + 0; } } if ((sequenceIndex - 1) != seqLengthMetadata) { Console.WriteLine("Warning: Existing metadata for SeqLength ({0}) does not match number of observed nt ({1}) for {2}", seqLengthMetadata, sequenceIndex - 1, sequence.ID); } sequencemain.WriteLine("{0}\t|\t{1}\t|\t{2}\t|\t{3}\t|\t{4}\t|\t\t|\t", mappingMetadata.SeqID, mappingMetadata.TaxID, mappingMetadata.LocationID, _data.AlignmentSeqTypeID, seqLengthMetadata); alignmentsequence.WriteLine("{0}\t|\t{1}\t|\t{2}\t|\t{3}\t|\t{4}", mappingMetadata.SeqID, _data.AlignmentID, metadata.AlignmentRowName, firstNtColNum, lastNtColNumber); foreach (var gbentry in metadata.Accessions) { sequenceaccession.WriteLine("{0}\t|\t{1}\t|\t{2}", mappingMetadata.SeqID, gbentry.Accession, gbentry.Version); } if (metadata.StructureModel != null && metadata.StructureModel.Pairs.Count() > 0) { foreach (int fivePrime in metadata.StructureModel.Pairs.Keys) { secondarystructurebasepairs.WriteLine("{0}\t|\t{1}\t|\t{2}\t|\t{3}", mappingMetadata.SeqID, _data.AlignmentID, fivePrime, metadata.StructureModel.Pairs[fivePrime]); } int extentID = 1; foreach (var helix in metadata.StructureModel.Helices) { secondarystructureextents.WriteLine("{0}\t|\t{1}\t|\t{2}\t|\t{3}\t|\t{4}\t|\t{5}\t|\t{6}", mappingMetadata.SeqID, _data.AlignmentID, extentID, 1, helix.FivePrimeStart, helix.FivePrimeEnd, _data.ExtentTypeIDs["Helix"]); secondarystructureextents.WriteLine("{0}\t|\t{1}\t|\t{2}\t|\t{3}\t|\t{4}\t|\t{5}\t|\t{6}", mappingMetadata.SeqID, _data.AlignmentID, extentID, 2, helix.ThreePrimeStart, helix.ThreePrimeEnd, _data.ExtentTypeIDs["Helix"]); extentID++; } foreach (var hairpinloop in metadata.StructureModel.Hairpins) { secondarystructureextents.WriteLine("{0}\t|\t{1}\t|\t{2}\t|\t{3}\t|\t{4}\t|\t{5}\t|\t{6}", mappingMetadata.SeqID, _data.AlignmentID, extentID, 1, hairpinloop.Loop.LoopStart, hairpinloop.Loop.LoopEnd, _data.ExtentTypeIDs["Hairpin Loop"]); extentID++; } foreach (var internalloop in metadata.StructureModel.Internals) { secondarystructureextents.WriteLine("{0}\t|\t{1}\t|\t{2}\t|\t{3}\t|\t{4}\t|\t{5}\t|\t{6}", mappingMetadata.SeqID, _data.AlignmentID, extentID, 1, internalloop.FivePrimeLoop.LoopStart, internalloop.FivePrimeLoop.LoopEnd, _data.ExtentTypeIDs["Internal Loop"]); secondarystructureextents.WriteLine("{0}\t|\t{1}\t|\t{2}\t|\t{3}\t|\t{4}\t|\t{5}\t|\t{6}", mappingMetadata.SeqID, _data.AlignmentID, extentID, 2, internalloop.ThreePrimeLoop.LoopStart, internalloop.ThreePrimeLoop.LoopEnd, _data.ExtentTypeIDs["Internal Loop"]); extentID++; } foreach (var bulgeloop in metadata.StructureModel.Bulges) { secondarystructureextents.WriteLine("{0}\t|\t{1}\t|\t{2}\t|\t{3}\t|\t{4}\t|\t{5}\t|\t{6}", mappingMetadata.SeqID, _data.AlignmentID, extentID, 1, bulgeloop.Bulge.LoopStart, bulgeloop.Bulge.LoopEnd, _data.ExtentTypeIDs["Bulge Loop"]); extentID++; } foreach (var multistemloop in metadata.StructureModel.Stems) { int stemordinal = 1; foreach (var stem in multistemloop.Segments) { secondarystructureextents.WriteLine("{0}\t|\t{1}\t|\t{2}\t|\t{3}\t|\t{4}\t|\t{5}\t|\t{6}", mappingMetadata.SeqID, _data.AlignmentID, extentID, stemordinal, stem.LoopStart, stem.LoopEnd, _data.ExtentTypeIDs["Multistem Loop"]); stemordinal++; } extentID++; } foreach (var free in metadata.StructureModel.Strands) { secondarystructureextents.WriteLine("{0}\t|\t{1}\t|\t{2}\t|\t{3}\t|\t{4}\t|\t{5}\t|\t{6}", mappingMetadata.SeqID, _data.AlignmentID, extentID, 1, free.LoopStart, free.LoopEnd, _data.ExtentTypeIDs["Free"]); extentID++; } foreach (var tail in metadata.StructureModel.Tails) { secondarystructureextents.WriteLine("{0}\t|\t{1}\t|\t{2}\t|\t{3}\t|\t{4}\t|\t{5}\t|\t{6}", mappingMetadata.SeqID, _data.AlignmentID, extentID, 1, tail.LoopStart, tail.LoopEnd, _data.ExtentTypeIDs["Tail"]); extentID++; } foreach (var knot in metadata.StructureModel.KnottedHelices) { secondarystructureextents.WriteLine("{0}\t|\t{1}\t|\t{2}\t|\t{3}\t|\t{4}\t|\t{5}\t|\t{6}", mappingMetadata.SeqID, _data.AlignmentID, extentID, 1, knot.FivePrimeStart, knot.FivePrimeEnd, _data.ExtentTypeIDs["Pseudoknot Helix"]); secondarystructureextents.WriteLine("{0}\t|\t{1}\t|\t{2}\t|\t{3}\t|\t{4}\t|\t{5}\t|\t{6}", mappingMetadata.SeqID, _data.AlignmentID, extentID, 2, knot.ThreePrimeStart, knot.ThreePrimeEnd, _data.ExtentTypeIDs["Pseudoknot Helix"]); extentID++; } } } sequencemain.Flush(); sequencemain.Close(); alignmentsequence.Flush(); alignmentsequence.Close(); alignmentdata.Flush(); alignmentdata.Close(); sequenceaccession.Flush(); sequenceaccession.Close(); secondarystructurebasepairs.Flush(); secondarystructurebasepairs.Close(); secondarystructureextents.Flush(); secondarystructureextents.Close(); }