private void LoadRowMetadata(Match headerline)
 {
     if (headerline.Groups["Statusin"].Value.Equals("in"))
     {
         SequenceMetadata metadata = new SequenceMetadata();
         metadata.AlignmentRowName = headerline.Groups["Rowname"].Value;
         _alignmentHeaderIndex.Add(metadata.AlignmentRowName, metadata);
     }
 }
Пример #2
0
        private void LoadSequenceMetadata()
        {
            //Associate available metadata with Sequence objects.
            var seqmetadata = from e in _manifestRoot.Descendants(SequenceMetadata.SequenceMetadataLabel)
                              join s in _alignment.Sequences
                              on e.Element(SequenceMetadata.AlignmentRowNameLabel).Value equals s.ID
                              select new { e, s };

            //Load the metadata
            foreach (var sm in seqmetadata)
            {
                sm.s.Metadata.Add(SequenceMetadata.SequenceMetadataLabel, new SequenceMetadata());
                SequenceMetadata metadata = (SequenceMetadata)sm.s.Metadata[SequenceMetadata.SequenceMetadataLabel];

                if (sm.e.Element(SequenceMetadata.ScientificNameLabel) != null)
                {
                    metadata.ScientificName = sm.e.Element(SequenceMetadata.ScientificNameLabel).Value;
                }

                if (sm.e.Element(SequenceMetadata.TaxIDLabel) != null)
                {
                    int taxID = 0;
                    Int32.TryParse(sm.e.Element(SequenceMetadata.TaxIDLabel).Value, out taxID);
                    metadata.TaxID = taxID;
                }

                if (sm.e.Element(SequenceMetadata.LineageLabel) != null)
                {
                    metadata.Lineage = sm.e.Element(SequenceMetadata.LineageLabel).Value;
                }

                if (sm.e.Element(SequenceMetadata.SequenceLengthLabel) != null)
                {
                    int seqLength = 0;
                    Int32.TryParse(sm.e.Element(SequenceMetadata.SequenceLengthLabel).Value, out seqLength);
                    metadata.SequenceLength = seqLength;
                }

                if (sm.e.Element(SequenceMetadata.LocationDescriptionLabel) != null)
                {
                    metadata.LocationDescription = sm.e.Element(SequenceMetadata.LocationDescriptionLabel).Value;
                }

                if (sm.e.Element(SequenceMetadata.AlignmentRowNameLabel) != null)
                {
                    metadata.AlignmentRowName = sm.e.Element(SequenceMetadata.AlignmentRowNameLabel).Value;
                }

                if (sm.e.Element(SequenceMetadata.AccessionsLabel) != null)
                {
                    var accessions = from accession in sm.e.Element(SequenceMetadata.AccessionsLabel).Descendants(SequenceMetadata.GenbankAccessionLabel)
                                     select new GenBankVersion
                    {
                        Accession = accession.Element(SequenceMetadata.GenbankAccessionIDLabel).Value,
                        Version   = accession.Element(SequenceMetadata.GenbankAccessionVersionLabel).Value
                    };
                    foreach (var accession in accessions)
                    {
                        metadata.Accessions.Add(accession);
                    }
                }

                if (sm.e.Element(SequenceMetadata.StructureModelLabel) != null)
                {
                    StructureModel strmodel = new StructureModel(metadata.SequenceLength);
                    var            pairs    = from pair in sm.e.Element(SequenceMetadata.StructureModelLabel).Descendants(SequenceMetadata.StructureModelPairLabel)
                                              select pair;
                    foreach (var pair in pairs)
                    {
                        int fivePrimeIndex, threePrimeIndex;
                        if (Int32.TryParse(pair.Element(SequenceMetadata.StructureModelPairFivePrimeIndexLabel).Value, out fivePrimeIndex) &&
                            Int32.TryParse(pair.Element(SequenceMetadata.StructureModelPairThreePrimeIndexLabel).Value, out threePrimeIndex))
                        {
                            strmodel.Pairs.Add(fivePrimeIndex, threePrimeIndex);
                        }
                    }
                    strmodel.DecomposeStructure();
                    metadata.StructureModel = strmodel;
                }
            }
        }
Пример #3
0
        private XElement WriteSequenceMetadata(ISequence sequence)
        {
            XElement output = new XElement(SequenceMetadata.SequenceMetadataLabel);

            if (sequence.Metadata.ContainsKey(SequenceMetadata.SequenceMetadataLabel))
            {
                SequenceMetadata seqMetadata = (SequenceMetadata)sequence.Metadata[SequenceMetadata.SequenceMetadataLabel];
                if (seqMetadata.ScientificName != null)
                {
                    output.Add(new XElement(SequenceMetadata.ScientificNameLabel, seqMetadata.ScientificName));
                }
                output.Add(new XElement(SequenceMetadata.SequenceLengthLabel, seqMetadata.SequenceLength));
                output.Add(new XElement(SequenceMetadata.LineageLabel, seqMetadata.Lineage));
                if (seqMetadata.AlignmentRowName != null)
                {
                    output.Add(new XElement(SequenceMetadata.AlignmentRowNameLabel, seqMetadata.AlignmentRowName));
                }
                if (seqMetadata.LocationDescription != null)
                {
                    output.Add(new XElement(SequenceMetadata.LocationDescriptionLabel, seqMetadata.LocationDescription));
                }
                if (seqMetadata.Accessions.Count() > 0)
                {
                    XElement accession = new XElement(SequenceMetadata.AccessionsLabel);
                    foreach (var acc in seqMetadata.Accessions)
                    {
                        XElement gbacc = new XElement(SequenceMetadata.GenbankAccessionLabel);
                        gbacc.Add(new XElement(SequenceMetadata.GenbankAccessionIDLabel, acc.Accession));
                        gbacc.Add(new XElement(SequenceMetadata.GenbankAccessionVersionLabel, acc.Version));
                        accession.Add(gbacc);
                    }
                    output.Add(accession);
                }

                if (seqMetadata.StructureModel != null)
                {
                    var model = from pair in seqMetadata.StructureModel.Pairs
                                select new XElement(SequenceMetadata.StructureModelPairLabel,
                                                    new XElement(SequenceMetadata.StructureModelPairFivePrimeIndexLabel, pair.Key),
                                                    new XElement(SequenceMetadata.StructureModelPairThreePrimeIndexLabel, pair.Value));
                    XElement strmodelxml = new XElement(SequenceMetadata.StructureModelLabel);
                    strmodelxml.Add(model.ToArray());
                    output.Add(strmodelxml);
                }

                /*if (seqMetadata.StructureModels.Count() > 0)
                 * {
                 *  var models = from strmodel in seqMetadata.StructureModels
                 *               select new XElement(SequenceMetadata.StructureModelLabel,
                 *                          from pair in strmodel.Pairs
                 *                          select new XElement(SequenceMetadata.StructureModelPairLabel,
                 *                                      new XElement(SequenceMetadata.StructureModelPairFivePrimeIndexLabel, pair.Key),
                 *                                      new XElement(SequenceMetadata.StructureModelPairThreePrimeIndexLabel, pair.Value)));
                 *
                 *  XElement strmodels = new XElement(SequenceMetadata.StructureModelsLabel);
                 *  strmodels.Add(models.ToArray());
                 *  output.Add(strmodels);
                 * }*/
                return(output);
            }
            else
            {
                return(null);
            }
        }
        private void LoadSequenceData(Match seqdataline)
        {
            if (_alignmentHeaderIndex.ContainsKey(seqdataline.Groups["Rowname"].Value))
            {
                SequenceMetadata metadata = _alignmentHeaderIndex[seqdataline.Groups["Rowname"].Value];
                ISequence        sequence;
                int startIndex = Int32.Parse(seqdataline.Groups["Startindex"].Value);
                if (startIndex == 0)
                {
                    sequence = new Sequence(RnaAlphabet.Instance)
                    {
                        ID        = metadata.AlignmentRowName,
                        DisplayID = metadata.AlignmentRowName
                    };
                    sequence.Metadata.Add(SequenceMetadata.SequenceMetadataLabel, metadata);
                    _alignmentSequenceIndex.Add(metadata.AlignmentRowName, sequence);
                    _sequences.Add(sequence);
                }
                else
                {
                    sequence = _alignmentSequenceIndex[seqdataline.Groups["Rowname"].Value];
                }

                if (sequence.Count < startIndex)
                {
                    //Becuase AE2 format has blanks, we might have to pad
                    for (int i = 0; i < (startIndex - sequence.Count); i++)
                    {
                        sequence.Add(RnaAlphabet.Instance.Gap);
                    }
                }

                int           lineidx = 0;
                ISequenceItem nextElement;
                char[]        octalholder = new char[3];
                string        seqdata     = seqdataline.Groups["Seqdata"].Value;
                while (lineidx < seqdata.Length)
                {
                    if (seqdata[lineidx] == '\\')
                    {
                        octalholder[0] = seqdata[lineidx + 1];
                        octalholder[1] = seqdata[lineidx + 2];
                        octalholder[2] = seqdata[lineidx + 3];
                        nextElement    = sequence.Alphabet.LookupBySymbol(ConvertOctal(octalholder));
                        lineidx        = lineidx + 4;
                    }
                    else
                    {
                        nextElement = sequence.Alphabet.LookupBySymbol(seqdata[lineidx]);
                        lineidx     = lineidx + 1;
                    }

                    if (nextElement == null)
                    {
                        sequence.Add(RnaAlphabet.Instance.Gap);
                    }
                    else
                    {
                        if (!nextElement.IsGap)
                        {
                            metadata.SequenceLength++;
                        }
                        sequence.Add(nextElement);
                    }
                }
            }
        }