Beispiel #1
0
        public bool Map()
        {
            if (MappedAlignment == null)
            {
                return(false);
            }
            try
            {
                rCADDataContext dc = CreateDataContext();
                NextSeqID   = dc.NextSeqIDs.Select(row => row.SeqID).First();
                AlignmentID = dc.NextAlnIDs.Select(row => row.AlnID).First();
                NextAlnID   = AlignmentID + 1;

                AlignmentSeqTypeID = dc.SequenceTypes.Where(row => row.MoleculeType.Equals(_alignment.MoleculeType) && row.GeneName.Equals(_alignment.GeneName) &&
                                                            row.GeneType.Equals(_alignment.GeneType)).First().SeqTypeID;

                var seqToTaxID = (from seq in _alignment.Sequences
                                  join taxonomyNameRow in dc.TaxonomyNames
                                  on((SequenceMetadata)seq.Metadata[SequenceMetadata.SequenceMetadataLabel]).ScientificName equals taxonomyNameRow.ScientificName
                                  select new { seq.ID, taxonomyNameRow.TaxID }).ToDictionary(match => match.ID, match => match.TaxID);

                int rootTaxID = (from taxname in dc.TaxonomyNames
                                 where taxname.ScientificName.Equals("root")
                                 select taxname.TaxID).First();

                ExtentTypeIDs = (from bar in dc.SecondaryStructureExtentTypes
                                 select new { bar.ExtentTypeID, bar.ExtentType }).ToDictionary(match => match.ExtentType, match => match.ExtentTypeID);

                foreach (var sequence in _alignment.Sequences)
                {
                    SequenceMetadata    metadata = (SequenceMetadata)sequence.Metadata[SequenceMetadata.SequenceMetadataLabel];
                    SequenceMappingData data     = new SequenceMappingData();
                    data.SeqID      = NextSeqID;
                    data.TaxID      = seqToTaxID.ContainsKey(sequence.ID) ? seqToTaxID[sequence.ID] : rootTaxID; //The sequence is mapped to the root of the Taxonomy tree if we don't have mapping info.
                    data.LocationID = dc.CellLocationInfos.Where(row => row.Description.Equals(metadata.LocationDescription)).First().LocationID;
                    sequence.Metadata.Add(rCADMappingData, data);
                    NextSeqID++;
                }
                dc.Connection.Close();
                MappedSuccessfully = true;
                return(true);
            }
            catch
            {
                return(false);
            }
        }
Beispiel #2
0
        private void CreateAlignmentImportFiles()
        {
            StreamWriter alignmentcolumn             = File.CreateText(_alignmentColumnFile);
            StreamWriter alignment                   = File.CreateText(_alignmentFile);
            StreamWriter alignmentsequence           = File.CreateText(_alignmentSequenceDataFile);
            StreamWriter alignmentdata               = File.CreateText(_alignmentDataFile);
            StreamWriter sequencemain                = File.CreateText(_sequenceMainFile);
            StreamWriter sequenceaccession           = File.CreateText(_sequenceAccessionFile);
            StreamWriter secondarystructurebasepairs = File.CreateText(_secondaryStructureBasePairsFile);
            StreamWriter secondarystructureextents   = File.CreateText(_secondaryStructureExtentsFile);

            //rCAD.Alignment: Entry in the alignment table for the new alignment
            alignment.WriteLine("{0}\t|\t{1}\t|\t{2}\t|\t{3}", _data.AlignmentID, _data.AlignmentSeqTypeID, _data.MappedAlignment.LogicalName, _data.MappedAlignment.Columns + 1);
            alignment.Flush();
            alignment.Close();

            //rCAD.AlignmentColumn: We have a 1 to 1 mapping of the logical and physical column numbers at the start
            for (int i = 0; i < _data.MappedAlignment.Columns; i++)
            {
                alignmentcolumn.WriteLine("{0}\t|\t{1}\t|\t{2}", _data.AlignmentID, i + 1, i + 1);
            }
            alignmentcolumn.Flush();
            alignmentcolumn.Close();

            //rCAD.AlignmentSequence, rCAD.AlignmentData, rCAD.SequenceMain, rCAD.SequenceAccession written on a per-sequence basis
            //We will do duplicate checking inside the database.
            foreach (var sequence in _data.MappedAlignment.Sequences)
            {
                SequenceMetadata    metadata        = (SequenceMetadata)sequence.Metadata[SequenceMetadata.SequenceMetadataLabel];
                SequenceMappingData mappingMetadata = (SequenceMappingData)sequence.Metadata[Mapper.rCADMappingData];
                int seqLengthMetadata = metadata.SequenceLength;
                int firstNtColNum     = -1;
                int lastNtColNumber   = -1;
                int sequenceIndex     = 1;

                for (int i = 0; i < sequence.Count; i++)
                {
                    if (!sequence[i].IsGap) //We are only actually writing the non-gap positions.
                    {
                        alignmentdata.WriteLine("{0}\t|\t{1}\t|\t{2}\t|\t{3}\t|\t{4}", mappingMetadata.SeqID, _data.AlignmentID, i + 1, sequence[i].Symbol, sequenceIndex);
                        sequenceIndex++;
                        if (firstNtColNum < 0)
                        {
                            firstNtColNum = i + 1;
                        }

                        //KJD, 1/21/2010 - A nasty little bug right here where lastNtColNumber = i + 0 SHOULD be lastNtColNumber = i + 1!
                        if (firstNtColNum > 0)
                        {
                            lastNtColNumber = i + 1; //We just set the last col num value to the last column with data we've seen.
                        }
                        //lastNtColNumber = i + 0;
                    }
                }

                if ((sequenceIndex - 1) != seqLengthMetadata)
                {
                    Console.WriteLine("Warning: Existing metadata for SeqLength ({0}) does not match number of observed nt ({1}) for {2}", seqLengthMetadata, sequenceIndex - 1, sequence.ID);
                }

                sequencemain.WriteLine("{0}\t|\t{1}\t|\t{2}\t|\t{3}\t|\t{4}\t|\t\t|\t",
                                       mappingMetadata.SeqID, mappingMetadata.TaxID, mappingMetadata.LocationID, _data.AlignmentSeqTypeID, seqLengthMetadata);
                alignmentsequence.WriteLine("{0}\t|\t{1}\t|\t{2}\t|\t{3}\t|\t{4}", mappingMetadata.SeqID, _data.AlignmentID, metadata.AlignmentRowName, firstNtColNum, lastNtColNumber);

                foreach (var gbentry in metadata.Accessions)
                {
                    sequenceaccession.WriteLine("{0}\t|\t{1}\t|\t{2}", mappingMetadata.SeqID, gbentry.Accession, gbentry.Version);
                }

                if (metadata.StructureModel != null && metadata.StructureModel.Pairs.Count() > 0)
                {
                    foreach (int fivePrime in metadata.StructureModel.Pairs.Keys)
                    {
                        secondarystructurebasepairs.WriteLine("{0}\t|\t{1}\t|\t{2}\t|\t{3}", mappingMetadata.SeqID, _data.AlignmentID, fivePrime,
                                                              metadata.StructureModel.Pairs[fivePrime]);
                    }

                    int extentID = 1;
                    foreach (var helix in metadata.StructureModel.Helices)
                    {
                        secondarystructureextents.WriteLine("{0}\t|\t{1}\t|\t{2}\t|\t{3}\t|\t{4}\t|\t{5}\t|\t{6}", mappingMetadata.SeqID, _data.AlignmentID,
                                                            extentID, 1, helix.FivePrimeStart, helix.FivePrimeEnd, _data.ExtentTypeIDs["Helix"]);
                        secondarystructureextents.WriteLine("{0}\t|\t{1}\t|\t{2}\t|\t{3}\t|\t{4}\t|\t{5}\t|\t{6}", mappingMetadata.SeqID, _data.AlignmentID,
                                                            extentID, 2, helix.ThreePrimeStart, helix.ThreePrimeEnd, _data.ExtentTypeIDs["Helix"]);
                        extentID++;
                    }

                    foreach (var hairpinloop in metadata.StructureModel.Hairpins)
                    {
                        secondarystructureextents.WriteLine("{0}\t|\t{1}\t|\t{2}\t|\t{3}\t|\t{4}\t|\t{5}\t|\t{6}", mappingMetadata.SeqID, _data.AlignmentID,
                                                            extentID, 1, hairpinloop.Loop.LoopStart, hairpinloop.Loop.LoopEnd, _data.ExtentTypeIDs["Hairpin Loop"]);
                        extentID++;
                    }

                    foreach (var internalloop in metadata.StructureModel.Internals)
                    {
                        secondarystructureextents.WriteLine("{0}\t|\t{1}\t|\t{2}\t|\t{3}\t|\t{4}\t|\t{5}\t|\t{6}", mappingMetadata.SeqID, _data.AlignmentID,
                                                            extentID, 1, internalloop.FivePrimeLoop.LoopStart, internalloop.FivePrimeLoop.LoopEnd, _data.ExtentTypeIDs["Internal Loop"]);
                        secondarystructureextents.WriteLine("{0}\t|\t{1}\t|\t{2}\t|\t{3}\t|\t{4}\t|\t{5}\t|\t{6}", mappingMetadata.SeqID, _data.AlignmentID,
                                                            extentID, 2, internalloop.ThreePrimeLoop.LoopStart, internalloop.ThreePrimeLoop.LoopEnd, _data.ExtentTypeIDs["Internal Loop"]);
                        extentID++;
                    }

                    foreach (var bulgeloop in metadata.StructureModel.Bulges)
                    {
                        secondarystructureextents.WriteLine("{0}\t|\t{1}\t|\t{2}\t|\t{3}\t|\t{4}\t|\t{5}\t|\t{6}", mappingMetadata.SeqID, _data.AlignmentID,
                                                            extentID, 1, bulgeloop.Bulge.LoopStart, bulgeloop.Bulge.LoopEnd, _data.ExtentTypeIDs["Bulge Loop"]);
                        extentID++;
                    }

                    foreach (var multistemloop in metadata.StructureModel.Stems)
                    {
                        int stemordinal = 1;
                        foreach (var stem in multistemloop.Segments)
                        {
                            secondarystructureextents.WriteLine("{0}\t|\t{1}\t|\t{2}\t|\t{3}\t|\t{4}\t|\t{5}\t|\t{6}", mappingMetadata.SeqID, _data.AlignmentID,
                                                                extentID, stemordinal, stem.LoopStart, stem.LoopEnd, _data.ExtentTypeIDs["Multistem Loop"]);
                            stemordinal++;
                        }
                        extentID++;
                    }

                    foreach (var free in metadata.StructureModel.Strands)
                    {
                        secondarystructureextents.WriteLine("{0}\t|\t{1}\t|\t{2}\t|\t{3}\t|\t{4}\t|\t{5}\t|\t{6}", mappingMetadata.SeqID, _data.AlignmentID,
                                                            extentID, 1, free.LoopStart, free.LoopEnd, _data.ExtentTypeIDs["Free"]);
                        extentID++;
                    }

                    foreach (var tail in metadata.StructureModel.Tails)
                    {
                        secondarystructureextents.WriteLine("{0}\t|\t{1}\t|\t{2}\t|\t{3}\t|\t{4}\t|\t{5}\t|\t{6}", mappingMetadata.SeqID, _data.AlignmentID,
                                                            extentID, 1, tail.LoopStart, tail.LoopEnd, _data.ExtentTypeIDs["Tail"]);
                        extentID++;
                    }

                    foreach (var knot in metadata.StructureModel.KnottedHelices)
                    {
                        secondarystructureextents.WriteLine("{0}\t|\t{1}\t|\t{2}\t|\t{3}\t|\t{4}\t|\t{5}\t|\t{6}", mappingMetadata.SeqID, _data.AlignmentID,
                                                            extentID, 1, knot.FivePrimeStart, knot.FivePrimeEnd, _data.ExtentTypeIDs["Pseudoknot Helix"]);
                        secondarystructureextents.WriteLine("{0}\t|\t{1}\t|\t{2}\t|\t{3}\t|\t{4}\t|\t{5}\t|\t{6}", mappingMetadata.SeqID, _data.AlignmentID,
                                                            extentID, 2, knot.ThreePrimeStart, knot.ThreePrimeEnd, _data.ExtentTypeIDs["Pseudoknot Helix"]);
                        extentID++;
                    }
                }
            }
            sequencemain.Flush();
            sequencemain.Close();

            alignmentsequence.Flush();
            alignmentsequence.Close();

            alignmentdata.Flush();
            alignmentdata.Close();

            sequenceaccession.Flush();
            sequenceaccession.Close();

            secondarystructurebasepairs.Flush();
            secondarystructurebasepairs.Close();

            secondarystructureextents.Flush();
            secondarystructureextents.Close();
        }