private void LoadRowMetadata(Match headerline) { if (headerline.Groups["Statusin"].Value.Equals("in")) { SequenceMetadata metadata = new SequenceMetadata(); metadata.AlignmentRowName = headerline.Groups["Rowname"].Value; _alignmentHeaderIndex.Add(metadata.AlignmentRowName, metadata); } }
private void LoadSequenceMetadata() { //Associate available metadata with Sequence objects. var seqmetadata = from e in _manifestRoot.Descendants(SequenceMetadata.SequenceMetadataLabel) join s in _alignment.Sequences on e.Element(SequenceMetadata.AlignmentRowNameLabel).Value equals s.ID select new { e, s }; //Load the metadata foreach (var sm in seqmetadata) { sm.s.Metadata.Add(SequenceMetadata.SequenceMetadataLabel, new SequenceMetadata()); SequenceMetadata metadata = (SequenceMetadata)sm.s.Metadata[SequenceMetadata.SequenceMetadataLabel]; if (sm.e.Element(SequenceMetadata.ScientificNameLabel) != null) { metadata.ScientificName = sm.e.Element(SequenceMetadata.ScientificNameLabel).Value; } if (sm.e.Element(SequenceMetadata.TaxIDLabel) != null) { int taxID = 0; Int32.TryParse(sm.e.Element(SequenceMetadata.TaxIDLabel).Value, out taxID); metadata.TaxID = taxID; } if (sm.e.Element(SequenceMetadata.LineageLabel) != null) { metadata.Lineage = sm.e.Element(SequenceMetadata.LineageLabel).Value; } if (sm.e.Element(SequenceMetadata.SequenceLengthLabel) != null) { int seqLength = 0; Int32.TryParse(sm.e.Element(SequenceMetadata.SequenceLengthLabel).Value, out seqLength); metadata.SequenceLength = seqLength; } if (sm.e.Element(SequenceMetadata.LocationDescriptionLabel) != null) { metadata.LocationDescription = sm.e.Element(SequenceMetadata.LocationDescriptionLabel).Value; } if (sm.e.Element(SequenceMetadata.AlignmentRowNameLabel) != null) { metadata.AlignmentRowName = sm.e.Element(SequenceMetadata.AlignmentRowNameLabel).Value; } if (sm.e.Element(SequenceMetadata.AccessionsLabel) != null) { var accessions = from accession in sm.e.Element(SequenceMetadata.AccessionsLabel).Descendants(SequenceMetadata.GenbankAccessionLabel) select new GenBankVersion { Accession = accession.Element(SequenceMetadata.GenbankAccessionIDLabel).Value, Version = accession.Element(SequenceMetadata.GenbankAccessionVersionLabel).Value }; foreach (var accession in accessions) { metadata.Accessions.Add(accession); } } if (sm.e.Element(SequenceMetadata.StructureModelLabel) != null) { StructureModel strmodel = new StructureModel(metadata.SequenceLength); var pairs = from pair in sm.e.Element(SequenceMetadata.StructureModelLabel).Descendants(SequenceMetadata.StructureModelPairLabel) select pair; foreach (var pair in pairs) { int fivePrimeIndex, threePrimeIndex; if (Int32.TryParse(pair.Element(SequenceMetadata.StructureModelPairFivePrimeIndexLabel).Value, out fivePrimeIndex) && Int32.TryParse(pair.Element(SequenceMetadata.StructureModelPairThreePrimeIndexLabel).Value, out threePrimeIndex)) { strmodel.Pairs.Add(fivePrimeIndex, threePrimeIndex); } } strmodel.DecomposeStructure(); metadata.StructureModel = strmodel; } } }
private XElement WriteSequenceMetadata(ISequence sequence) { XElement output = new XElement(SequenceMetadata.SequenceMetadataLabel); if (sequence.Metadata.ContainsKey(SequenceMetadata.SequenceMetadataLabel)) { SequenceMetadata seqMetadata = (SequenceMetadata)sequence.Metadata[SequenceMetadata.SequenceMetadataLabel]; if (seqMetadata.ScientificName != null) { output.Add(new XElement(SequenceMetadata.ScientificNameLabel, seqMetadata.ScientificName)); } output.Add(new XElement(SequenceMetadata.SequenceLengthLabel, seqMetadata.SequenceLength)); output.Add(new XElement(SequenceMetadata.LineageLabel, seqMetadata.Lineage)); if (seqMetadata.AlignmentRowName != null) { output.Add(new XElement(SequenceMetadata.AlignmentRowNameLabel, seqMetadata.AlignmentRowName)); } if (seqMetadata.LocationDescription != null) { output.Add(new XElement(SequenceMetadata.LocationDescriptionLabel, seqMetadata.LocationDescription)); } if (seqMetadata.Accessions.Count() > 0) { XElement accession = new XElement(SequenceMetadata.AccessionsLabel); foreach (var acc in seqMetadata.Accessions) { XElement gbacc = new XElement(SequenceMetadata.GenbankAccessionLabel); gbacc.Add(new XElement(SequenceMetadata.GenbankAccessionIDLabel, acc.Accession)); gbacc.Add(new XElement(SequenceMetadata.GenbankAccessionVersionLabel, acc.Version)); accession.Add(gbacc); } output.Add(accession); } if (seqMetadata.StructureModel != null) { var model = from pair in seqMetadata.StructureModel.Pairs select new XElement(SequenceMetadata.StructureModelPairLabel, new XElement(SequenceMetadata.StructureModelPairFivePrimeIndexLabel, pair.Key), new XElement(SequenceMetadata.StructureModelPairThreePrimeIndexLabel, pair.Value)); XElement strmodelxml = new XElement(SequenceMetadata.StructureModelLabel); strmodelxml.Add(model.ToArray()); output.Add(strmodelxml); } /*if (seqMetadata.StructureModels.Count() > 0) * { * var models = from strmodel in seqMetadata.StructureModels * select new XElement(SequenceMetadata.StructureModelLabel, * from pair in strmodel.Pairs * select new XElement(SequenceMetadata.StructureModelPairLabel, * new XElement(SequenceMetadata.StructureModelPairFivePrimeIndexLabel, pair.Key), * new XElement(SequenceMetadata.StructureModelPairThreePrimeIndexLabel, pair.Value))); * * XElement strmodels = new XElement(SequenceMetadata.StructureModelsLabel); * strmodels.Add(models.ToArray()); * output.Add(strmodels); * }*/ return(output); } else { return(null); } }
private void LoadSequenceData(Match seqdataline) { if (_alignmentHeaderIndex.ContainsKey(seqdataline.Groups["Rowname"].Value)) { SequenceMetadata metadata = _alignmentHeaderIndex[seqdataline.Groups["Rowname"].Value]; ISequence sequence; int startIndex = Int32.Parse(seqdataline.Groups["Startindex"].Value); if (startIndex == 0) { sequence = new Sequence(RnaAlphabet.Instance) { ID = metadata.AlignmentRowName, DisplayID = metadata.AlignmentRowName }; sequence.Metadata.Add(SequenceMetadata.SequenceMetadataLabel, metadata); _alignmentSequenceIndex.Add(metadata.AlignmentRowName, sequence); _sequences.Add(sequence); } else { sequence = _alignmentSequenceIndex[seqdataline.Groups["Rowname"].Value]; } if (sequence.Count < startIndex) { //Becuase AE2 format has blanks, we might have to pad for (int i = 0; i < (startIndex - sequence.Count); i++) { sequence.Add(RnaAlphabet.Instance.Gap); } } int lineidx = 0; ISequenceItem nextElement; char[] octalholder = new char[3]; string seqdata = seqdataline.Groups["Seqdata"].Value; while (lineidx < seqdata.Length) { if (seqdata[lineidx] == '\\') { octalholder[0] = seqdata[lineidx + 1]; octalholder[1] = seqdata[lineidx + 2]; octalholder[2] = seqdata[lineidx + 3]; nextElement = sequence.Alphabet.LookupBySymbol(ConvertOctal(octalholder)); lineidx = lineidx + 4; } else { nextElement = sequence.Alphabet.LookupBySymbol(seqdata[lineidx]); lineidx = lineidx + 1; } if (nextElement == null) { sequence.Add(RnaAlphabet.Instance.Gap); } else { if (!nextElement.IsGap) { metadata.SequenceLength++; } sequence.Add(nextElement); } } } }