public bool Map() { if (MappedAlignment == null) { return(false); } try { rCADDataContext dc = CreateDataContext(); NextSeqID = dc.NextSeqIDs.Select(row => row.SeqID).First(); AlignmentID = dc.NextAlnIDs.Select(row => row.AlnID).First(); NextAlnID = AlignmentID + 1; AlignmentSeqTypeID = dc.SequenceTypes.Where(row => row.MoleculeType.Equals(_alignment.MoleculeType) && row.GeneName.Equals(_alignment.GeneName) && row.GeneType.Equals(_alignment.GeneType)).First().SeqTypeID; var seqToTaxID = (from seq in _alignment.Sequences join taxonomyNameRow in dc.TaxonomyNames on((SequenceMetadata)seq.Metadata[SequenceMetadata.SequenceMetadataLabel]).ScientificName equals taxonomyNameRow.ScientificName select new { seq.ID, taxonomyNameRow.TaxID }).ToDictionary(match => match.ID, match => match.TaxID); int rootTaxID = (from taxname in dc.TaxonomyNames where taxname.ScientificName.Equals("root") select taxname.TaxID).First(); ExtentTypeIDs = (from bar in dc.SecondaryStructureExtentTypes select new { bar.ExtentTypeID, bar.ExtentType }).ToDictionary(match => match.ExtentType, match => match.ExtentTypeID); foreach (var sequence in _alignment.Sequences) { SequenceMetadata metadata = (SequenceMetadata)sequence.Metadata[SequenceMetadata.SequenceMetadataLabel]; SequenceMappingData data = new SequenceMappingData(); data.SeqID = NextSeqID; data.TaxID = seqToTaxID.ContainsKey(sequence.ID) ? seqToTaxID[sequence.ID] : rootTaxID; //The sequence is mapped to the root of the Taxonomy tree if we don't have mapping info. data.LocationID = dc.CellLocationInfos.Where(row => row.Description.Equals(metadata.LocationDescription)).First().LocationID; sequence.Metadata.Add(rCADMappingData, data); NextSeqID++; } dc.Connection.Close(); MappedSuccessfully = true; return(true); } catch { return(false); } }
private void CreateAlignmentImportFiles() { StreamWriter alignmentcolumn = File.CreateText(_alignmentColumnFile); StreamWriter alignment = File.CreateText(_alignmentFile); StreamWriter alignmentsequence = File.CreateText(_alignmentSequenceDataFile); StreamWriter alignmentdata = File.CreateText(_alignmentDataFile); StreamWriter sequencemain = File.CreateText(_sequenceMainFile); StreamWriter sequenceaccession = File.CreateText(_sequenceAccessionFile); StreamWriter secondarystructurebasepairs = File.CreateText(_secondaryStructureBasePairsFile); StreamWriter secondarystructureextents = File.CreateText(_secondaryStructureExtentsFile); //rCAD.Alignment: Entry in the alignment table for the new alignment alignment.WriteLine("{0}\t|\t{1}\t|\t{2}\t|\t{3}", _data.AlignmentID, _data.AlignmentSeqTypeID, _data.MappedAlignment.LogicalName, _data.MappedAlignment.Columns + 1); alignment.Flush(); alignment.Close(); //rCAD.AlignmentColumn: We have a 1 to 1 mapping of the logical and physical column numbers at the start for (int i = 0; i < _data.MappedAlignment.Columns; i++) { alignmentcolumn.WriteLine("{0}\t|\t{1}\t|\t{2}", _data.AlignmentID, i + 1, i + 1); } alignmentcolumn.Flush(); alignmentcolumn.Close(); //rCAD.AlignmentSequence, rCAD.AlignmentData, rCAD.SequenceMain, rCAD.SequenceAccession written on a per-sequence basis //We will do duplicate checking inside the database. foreach (var sequence in _data.MappedAlignment.Sequences) { SequenceMetadata metadata = (SequenceMetadata)sequence.Metadata[SequenceMetadata.SequenceMetadataLabel]; SequenceMappingData mappingMetadata = (SequenceMappingData)sequence.Metadata[Mapper.rCADMappingData]; int seqLengthMetadata = metadata.SequenceLength; int firstNtColNum = -1; int lastNtColNumber = -1; int sequenceIndex = 1; for (int i = 0; i < sequence.Count; i++) { if (!sequence[i].IsGap) //We are only actually writing the non-gap positions. { alignmentdata.WriteLine("{0}\t|\t{1}\t|\t{2}\t|\t{3}\t|\t{4}", mappingMetadata.SeqID, _data.AlignmentID, i + 1, sequence[i].Symbol, sequenceIndex); sequenceIndex++; if (firstNtColNum < 0) { firstNtColNum = i + 1; } //KJD, 1/21/2010 - A nasty little bug right here where lastNtColNumber = i + 0 SHOULD be lastNtColNumber = i + 1! if (firstNtColNum > 0) { lastNtColNumber = i + 1; //We just set the last col num value to the last column with data we've seen. } //lastNtColNumber = i + 0; } } if ((sequenceIndex - 1) != seqLengthMetadata) { Console.WriteLine("Warning: Existing metadata for SeqLength ({0}) does not match number of observed nt ({1}) for {2}", seqLengthMetadata, sequenceIndex - 1, sequence.ID); } sequencemain.WriteLine("{0}\t|\t{1}\t|\t{2}\t|\t{3}\t|\t{4}\t|\t\t|\t", mappingMetadata.SeqID, mappingMetadata.TaxID, mappingMetadata.LocationID, _data.AlignmentSeqTypeID, seqLengthMetadata); alignmentsequence.WriteLine("{0}\t|\t{1}\t|\t{2}\t|\t{3}\t|\t{4}", mappingMetadata.SeqID, _data.AlignmentID, metadata.AlignmentRowName, firstNtColNum, lastNtColNumber); foreach (var gbentry in metadata.Accessions) { sequenceaccession.WriteLine("{0}\t|\t{1}\t|\t{2}", mappingMetadata.SeqID, gbentry.Accession, gbentry.Version); } if (metadata.StructureModel != null && metadata.StructureModel.Pairs.Count() > 0) { foreach (int fivePrime in metadata.StructureModel.Pairs.Keys) { secondarystructurebasepairs.WriteLine("{0}\t|\t{1}\t|\t{2}\t|\t{3}", mappingMetadata.SeqID, _data.AlignmentID, fivePrime, metadata.StructureModel.Pairs[fivePrime]); } int extentID = 1; foreach (var helix in metadata.StructureModel.Helices) { secondarystructureextents.WriteLine("{0}\t|\t{1}\t|\t{2}\t|\t{3}\t|\t{4}\t|\t{5}\t|\t{6}", mappingMetadata.SeqID, _data.AlignmentID, extentID, 1, helix.FivePrimeStart, helix.FivePrimeEnd, _data.ExtentTypeIDs["Helix"]); secondarystructureextents.WriteLine("{0}\t|\t{1}\t|\t{2}\t|\t{3}\t|\t{4}\t|\t{5}\t|\t{6}", mappingMetadata.SeqID, _data.AlignmentID, extentID, 2, helix.ThreePrimeStart, helix.ThreePrimeEnd, _data.ExtentTypeIDs["Helix"]); extentID++; } foreach (var hairpinloop in metadata.StructureModel.Hairpins) { secondarystructureextents.WriteLine("{0}\t|\t{1}\t|\t{2}\t|\t{3}\t|\t{4}\t|\t{5}\t|\t{6}", mappingMetadata.SeqID, _data.AlignmentID, extentID, 1, hairpinloop.Loop.LoopStart, hairpinloop.Loop.LoopEnd, _data.ExtentTypeIDs["Hairpin Loop"]); extentID++; } foreach (var internalloop in metadata.StructureModel.Internals) { secondarystructureextents.WriteLine("{0}\t|\t{1}\t|\t{2}\t|\t{3}\t|\t{4}\t|\t{5}\t|\t{6}", mappingMetadata.SeqID, _data.AlignmentID, extentID, 1, internalloop.FivePrimeLoop.LoopStart, internalloop.FivePrimeLoop.LoopEnd, _data.ExtentTypeIDs["Internal Loop"]); secondarystructureextents.WriteLine("{0}\t|\t{1}\t|\t{2}\t|\t{3}\t|\t{4}\t|\t{5}\t|\t{6}", mappingMetadata.SeqID, _data.AlignmentID, extentID, 2, internalloop.ThreePrimeLoop.LoopStart, internalloop.ThreePrimeLoop.LoopEnd, _data.ExtentTypeIDs["Internal Loop"]); extentID++; } foreach (var bulgeloop in metadata.StructureModel.Bulges) { secondarystructureextents.WriteLine("{0}\t|\t{1}\t|\t{2}\t|\t{3}\t|\t{4}\t|\t{5}\t|\t{6}", mappingMetadata.SeqID, _data.AlignmentID, extentID, 1, bulgeloop.Bulge.LoopStart, bulgeloop.Bulge.LoopEnd, _data.ExtentTypeIDs["Bulge Loop"]); extentID++; } foreach (var multistemloop in metadata.StructureModel.Stems) { int stemordinal = 1; foreach (var stem in multistemloop.Segments) { secondarystructureextents.WriteLine("{0}\t|\t{1}\t|\t{2}\t|\t{3}\t|\t{4}\t|\t{5}\t|\t{6}", mappingMetadata.SeqID, _data.AlignmentID, extentID, stemordinal, stem.LoopStart, stem.LoopEnd, _data.ExtentTypeIDs["Multistem Loop"]); stemordinal++; } extentID++; } foreach (var free in metadata.StructureModel.Strands) { secondarystructureextents.WriteLine("{0}\t|\t{1}\t|\t{2}\t|\t{3}\t|\t{4}\t|\t{5}\t|\t{6}", mappingMetadata.SeqID, _data.AlignmentID, extentID, 1, free.LoopStart, free.LoopEnd, _data.ExtentTypeIDs["Free"]); extentID++; } foreach (var tail in metadata.StructureModel.Tails) { secondarystructureextents.WriteLine("{0}\t|\t{1}\t|\t{2}\t|\t{3}\t|\t{4}\t|\t{5}\t|\t{6}", mappingMetadata.SeqID, _data.AlignmentID, extentID, 1, tail.LoopStart, tail.LoopEnd, _data.ExtentTypeIDs["Tail"]); extentID++; } foreach (var knot in metadata.StructureModel.KnottedHelices) { secondarystructureextents.WriteLine("{0}\t|\t{1}\t|\t{2}\t|\t{3}\t|\t{4}\t|\t{5}\t|\t{6}", mappingMetadata.SeqID, _data.AlignmentID, extentID, 1, knot.FivePrimeStart, knot.FivePrimeEnd, _data.ExtentTypeIDs["Pseudoknot Helix"]); secondarystructureextents.WriteLine("{0}\t|\t{1}\t|\t{2}\t|\t{3}\t|\t{4}\t|\t{5}\t|\t{6}", mappingMetadata.SeqID, _data.AlignmentID, extentID, 2, knot.ThreePrimeStart, knot.ThreePrimeEnd, _data.ExtentTypeIDs["Pseudoknot Helix"]); extentID++; } } } sequencemain.Flush(); sequencemain.Close(); alignmentsequence.Flush(); alignmentsequence.Close(); alignmentdata.Flush(); alignmentdata.Close(); sequenceaccession.Flush(); sequenceaccession.Close(); secondarystructurebasepairs.Flush(); secondarystructurebasepairs.Close(); secondarystructureextents.Flush(); secondarystructureextents.Close(); }