/// <summary> /// Gets the sub features depending on the location information. /// </summary> /// <param name="sequenceFeatures">SequenceFeatures instance.</param> public List <FeatureItem> GetSubFeatures(SequenceFeatures sequenceFeatures) { List <FeatureItem> subFeatures = new List <FeatureItem>(); if (sequenceFeatures != null) { int start = this.Location.LocationStart; int end = this.Location.LocationEnd; foreach (FeatureItem item in sequenceFeatures.All) { int subItemStart = item.Location.LocationStart; int subItemEnd = item.Location.LocationEnd; if (subItemStart >= start && subItemEnd <= end && string.IsNullOrEmpty(item.Location.Accession)) { // do not add items with the same start and end positions. if (item != this) { subFeatures.Add(item); } } } } return(subFeatures); }
/// <summary> /// Private Constructor for clone method. /// </summary> /// <param name="other">SequenceFeatures instance to clone.</param> private SequenceFeatures(SequenceFeatures other) { All = new List<FeatureItem>(); foreach (FeatureItem feature in other.All) { All.Add(feature.Clone()); } }
/// <summary> /// Private Constructor for clone method. /// </summary> /// <param name="other">SequenceFeatures instance to clone.</param> private SequenceFeatures(SequenceFeatures other) { All = new List <FeatureItem>(); foreach (FeatureItem feature in other.All) { All.Add(feature.Clone()); } }
/// <summary> /// Parses the GenBank features from the GenBank file. /// </summary> /// <param name="line">parse line</param> /// <param name="sequence">The sequence.</param> /// <param name="stream">The stream reader.</param> /// <returns>The parsed line.</returns> private string ParseFeatures(string line, ref Sequence sequence, StreamReader stream) { ILocationBuilder locBuilder = LocationBuilder; if (locBuilder == null) { throw new InvalidOperationException(Properties.Resource.NullLocationBuild); } // set data indent for features string lineData; // The sub-items of a feature are referred to as qualifiers. These do not have unique // keys, so they are stored as lists in the SubItems dictionary. SequenceFeatures features = new SequenceFeatures(); IList <FeatureItem> featureList = features.All; while (line != null) { string lineHeader = GetLineHeader(line, FeatureDataIndent); if (String.IsNullOrEmpty(line) || lineHeader == "FEATURES") { line = GoToNextLine(line, stream); continue; } if (line[0] != ' ') { // start of non-feature text break; } if (lineHeader == null) { string message = Properties.Resource.GenbankEmptyFeature; Trace.Report(message); throw new InvalidDataException(message); } // check for multi-line location string lineData = GetLineData(line, FeatureDataIndent); string featureKey = lineHeader; string location = lineData; line = GoToNextLine(line, stream); lineData = GetLineData(line, FeatureDataIndent); lineHeader = GetLineHeader(line, FeatureDataIndent); while ((line != null) && (lineHeader == string.Empty) && (lineData != string.Empty) && !lineData.StartsWith("/", StringComparison.Ordinal)) { location += lineData; GetLineData(line, FeatureDataIndent); line = GoToNextLine(line, stream); lineData = GetLineData(line, FeatureDataIndent); lineHeader = GetLineHeader(line, FeatureDataIndent); } // create features as MetadataListItems FeatureItem feature = new FeatureItem(featureKey, locBuilder.GetLocation(location)); // process the list of qualifiers, which are each in the form of // /key="value" string qualifierKey = string.Empty; string qualifierValue = string.Empty; bool quotationMarkStarted = false; while (line != null) { lineData = GetLineData(line, FeatureDataIndent); lineHeader = GetLineHeader(line, FeatureDataIndent); if ((lineHeader == string.Empty) && (lineData != null)) { // '/' denotes a continuation of the previous line // Note that, if there are multiple lines of qualifierValue, // sometimes a line break will happen such that a "/" which is // part of the qualifierValue will start a continuation line. // This is identified by verifying open and closing double quotes. if (lineData.StartsWith("/", StringComparison.Ordinal) && !quotationMarkStarted) { // new qualifier; save previous if this isn't the first if (!String.IsNullOrEmpty(qualifierKey)) { AddQualifierToFeature(feature, qualifierKey, qualifierValue); } // set the key and value of this qualifier int equalsIndex = lineData.IndexOf('='); if (equalsIndex < 0) { // no value, just key (this is allowed, see NC_005213.gbk) qualifierKey = lineData.Substring(1); qualifierValue = string.Empty; } else if (equalsIndex > 0) { qualifierKey = lineData.Substring(1, equalsIndex - 1); qualifierValue = lineData.Substring(equalsIndex + 1); quotationMarkStarted = qualifierValue[0] == '"'; if (qualifierValue[qualifierValue.Length - 1] == '"') { quotationMarkStarted = false; } } else { string message = String.Format( CultureInfo.CurrentCulture, Properties.Resource.GenbankInvalidFeature, line); Trace.Report(message); throw new InvalidDataException(message); } } else { // Continuation of previous line; "note" gets a line break, and // everything else except "translation" and "transl_except" gets a // space to separate words. if (qualifierKey == "note") { qualifierValue += Environment.NewLine; } else if (qualifierKey != "translation" && qualifierKey != "transl_except") { qualifierValue += " "; } qualifierValue += lineData; if (qualifierValue[qualifierValue.Length - 1] == '"') { quotationMarkStarted = false; } } line = GoToNextLine(line, stream); } else if (line.StartsWith("\t", StringComparison.Ordinal)) { // this seems to be data corruption; but BioPerl test set includes // (old, 2003) NT_021877.gbk which has this problem, so we // handle it ApplicationLog.WriteLine("WARN: nonstandard line format at line {0}: '{1}'", lineNumber, line); qualifierValue += " " + line.Trim(); if (qualifierValue[qualifierValue.Length - 1] == '"') { quotationMarkStarted = false; } line = GoToNextLine(line, stream); } else { break; } } // add last qualifier if (!String.IsNullOrEmpty(qualifierKey)) { AddQualifierToFeature(feature, qualifierKey, qualifierValue); } // still add feature, even if it has no qualifiers featureList.Add(StandardFeatureMap.GetStandardFeatureItem(feature)); } if (featureList.Count > 0) { ((GenBankMetadata)sequence.Metadata[Helper.GenBankMetadataKey]).Features = features; } return(line); }
/// <summary> /// Validate addition of GenBank features. /// </summary> /// <param name="nodeName">xml node name.</param> private void ValidateGenBankSubFeatures(string nodeName) { // Get Values from XML node. string firstKey = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.FirstKey); string secondKey = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.SecondKey); string thirdKey = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.ThirdFeatureKey); string fourthKey = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.FourthKey); string fifthKey = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.FifthKey); string firstLocation = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.FirstLocation); string secondLocation = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.SecondLocation); string thirdLocation = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.ThirdLocation); string fourthLocation = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.FourthLocation); string fifthLocation = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.FifthLocation); string featuresCount = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.MainFeaturesCount); string secondCount = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.SecondCount); string thirdCount = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.ThirdCount); // Create a feature items var seqFeatures = new SequenceFeatures(); var firstItem = new FeatureItem(firstKey, firstLocation); var secondItem = new FeatureItem(secondKey, secondLocation); var thirdItem = new FeatureItem(thirdKey, thirdLocation); var fourthItem = new FeatureItem(fourthKey, fourthLocation); var fifthItem = new FeatureItem(fifthKey, fifthLocation); seqFeatures.All.Add(firstItem); seqFeatures.All.Add(secondItem); seqFeatures.All.Add(thirdItem); seqFeatures.All.Add(fourthItem); seqFeatures.All.Add(fifthItem); // Validate sub features . List<FeatureItem> subFeatures = firstItem.GetSubFeatures(seqFeatures); Assert.AreEqual(Convert.ToInt32(featuresCount, null), subFeatures.Count); subFeatures = secondItem.GetSubFeatures(seqFeatures); Assert.AreEqual(Convert.ToInt32(secondCount, null), subFeatures.Count); subFeatures = thirdItem.GetSubFeatures(seqFeatures); Assert.AreEqual(Convert.ToInt32(thirdCount, null), subFeatures.Count); }
/// <summary> /// Parses the GenBank features from the GenBank file. /// </summary> /// <param name="line">parse line</param> /// <param name="sequence">The sequence.</param> /// <param name="stream">The stream reader.</param> /// <returns>The parsed line.</returns> private string ParseFeatures(string line, ref Sequence sequence, StreamReader stream) { ILocationBuilder locBuilder = LocationBuilder; if (locBuilder == null) { throw new InvalidOperationException(Properties.Resource.NullLocationBuild); } // set data indent for features string lineData; // The sub-items of a feature are referred to as qualifiers. These do not have unique // keys, so they are stored as lists in the SubItems dictionary. SequenceFeatures features = new SequenceFeatures(); IList<FeatureItem> featureList = features.All; while (line != null) { string lineHeader = GetLineHeader(line, FeatureDataIndent); if (String.IsNullOrEmpty(line) || lineHeader == "FEATURES") { line = GoToNextLine(line, stream); continue; } if (line[0] != ' ') { // start of non-feature text break; } if (lineHeader == null) { string message = Properties.Resource.GenbankEmptyFeature; Trace.Report(message); throw new InvalidDataException(message); } // check for multi-line location string lineData = GetLineData(line, FeatureDataIndent); string featureKey = lineHeader; string location = lineData; line = GoToNextLine(line, stream); lineData = GetLineData(line, FeatureDataIndent); lineHeader = GetLineHeader(line, FeatureDataIndent); while ((line != null) && (lineHeader == string.Empty) && (lineData != string.Empty) && !lineData.StartsWith("/", StringComparison.Ordinal)) { location += lineData; GetLineData(line, FeatureDataIndent); line = GoToNextLine(line, stream); lineData = GetLineData(line, FeatureDataIndent); lineHeader = GetLineHeader(line, FeatureDataIndent); } // create features as MetadataListItems FeatureItem feature = new FeatureItem(featureKey, locBuilder.GetLocation(location)); // process the list of qualifiers, which are each in the form of // /key="value" string qualifierKey = string.Empty; string qualifierValue = string.Empty; bool quotationMarkStarted = false; while (line != null) { lineData = GetLineData(line, FeatureDataIndent); lineHeader = GetLineHeader(line, FeatureDataIndent); if ((lineHeader == string.Empty) && (lineData != null)) { // '/' denotes a continuation of the previous line // Note that, if there are multiple lines of qualifierValue, // sometimes a line break will happen such that a "/" which is // part of the qualifierValue will start a continuation line. // This is identified by verifying open and closing double quotes. if (lineData.StartsWith("/", StringComparison.Ordinal) && !quotationMarkStarted) { // new qualifier; save previous if this isn't the first if (!String.IsNullOrEmpty(qualifierKey)) { AddQualifierToFeature(feature, qualifierKey, qualifierValue); } // set the key and value of this qualifier int equalsIndex = lineData.IndexOf('='); if (equalsIndex < 0) { // no value, just key (this is allowed, see NC_005213.gbk) qualifierKey = lineData.Substring(1); qualifierValue = string.Empty; } else if (equalsIndex > 0) { qualifierKey = lineData.Substring(1, equalsIndex - 1); qualifierValue = lineData.Substring(equalsIndex + 1); quotationMarkStarted = qualifierValue[0] == '"'; if (qualifierValue[qualifierValue.Length - 1] == '"') { quotationMarkStarted = false; } } else { string message = String.Format( CultureInfo.CurrentCulture, Properties.Resource.GenbankInvalidFeature, line); Trace.Report(message); throw new InvalidDataException(message); } } else { // Continuation of previous line; "note" gets a line break, and // everything else except "translation" and "transl_except" gets a // space to separate words. if (qualifierKey == "note") { qualifierValue += Environment.NewLine; } else if (qualifierKey != "translation" && qualifierKey != "transl_except") { qualifierValue += " "; } qualifierValue += lineData; if (qualifierValue[qualifierValue.Length - 1] == '"') { quotationMarkStarted = false; } } line = GoToNextLine(line, stream); } else if (line.StartsWith("\t", StringComparison.Ordinal)) { // this seems to be data corruption; but BioPerl test set includes // (old, 2003) NT_021877.gbk which has this problem, so we // handle it ApplicationLog.WriteLine("WARN: nonstandard line format at line {0}: '{1}'", lineNumber, line); qualifierValue += " " + line.Trim(); if (qualifierValue[qualifierValue.Length - 1] == '"') { quotationMarkStarted = false; } line = GoToNextLine(line, stream); } else { break; } } // add last qualifier if (!String.IsNullOrEmpty(qualifierKey)) { AddQualifierToFeature(feature, qualifierKey, qualifierValue); } // still add feature, even if it has no qualifiers featureList.Add(StandardFeatureMap.GetStandardFeatureItem(feature)); } if (featureList.Count > 0) { ((GenBankMetadata)sequence.Metadata[Helper.GenBankMetadataKey]).Features = features; } return line; }
private void ParseFeatures(BioTextReader bioReader, ref Sequence sequence) { ILocationBuilder locBuilder = LocationBuilder; if (locBuilder == null) { throw new InvalidOperationException(Resource.NullLocationBuild); } // set data indent for features bioReader.DataIndent = _featureDataIndent; // The sub-items of a feature are referred to as qualifiers. These do not have unique // keys, so they are stored as lists in the SubItems dictionary. SequenceFeatures features = new SequenceFeatures(); IList <FeatureItem> featureList = features.All; while (bioReader.HasLines) { if (String.IsNullOrEmpty(bioReader.Line) || bioReader.LineHeader == "FEATURES") { bioReader.GoToNextLine(); continue; } if (bioReader.Line[0] != ' ') { // start of non-feature text break; } if (!bioReader.LineHasHeader) { string message = Properties.Resource.GenbankEmptyFeature; Trace.Report(message); throw new InvalidDataException(message); } // check for multi-line location string string featureKey = bioReader.LineHeader; string location = bioReader.LineData; bioReader.GoToNextLine(); while (bioReader.HasLines && !bioReader.LineHasHeader && bioReader.LineHasData && !bioReader.LineData.StartsWith("/", StringComparison.Ordinal)) { location += bioReader.LineData; bioReader.GoToNextLine(); } // create features as MetadataListItems FeatureItem feature = new FeatureItem(featureKey, locBuilder.GetLocation(location)); // process the list of qualifiers, which are each in the form of // /key="value" string qualifierKey = string.Empty; string qualifierValue = string.Empty; while (bioReader.HasLines) { if (!bioReader.LineHasHeader && bioReader.LineHasData) { // '/' denotes a continuation of the previous line if (bioReader.LineData.StartsWith("/", StringComparison.Ordinal)) { // new qualifier; save previous if this isn't the first if (!String.IsNullOrEmpty(qualifierKey)) { AddQualifierToFeature(feature, qualifierKey, qualifierValue); } // set the key and value of this qualifier int equalsIndex = bioReader.LineData.IndexOf('='); if (equalsIndex < 0) { // no value, just key (this is allowed, see NC_005213.gbk) qualifierKey = bioReader.LineData.Substring(1); qualifierValue = string.Empty; } else if (equalsIndex > 0) { qualifierKey = bioReader.LineData.Substring(1, equalsIndex - 1); qualifierValue = bioReader.LineData.Substring(equalsIndex + 1); } else { string message = String.Format( CultureInfo.CurrentCulture, Properties.Resource.GenbankInvalidFeature, bioReader.Line); Trace.Report(message); throw new InvalidDataException(message); } } else { // Continuation of previous line; "note" gets a line break, and // everything else except "translation" and "transl_except" gets a // space to separate words. if (qualifierKey == "note") { qualifierValue += Environment.NewLine; } else if (qualifierKey != "translation" && qualifierKey != "transl_except") { qualifierValue += " "; } qualifierValue += bioReader.LineData; } bioReader.GoToNextLine(); } else if (bioReader.Line.StartsWith("\t", StringComparison.Ordinal)) { // this seems to be data corruption; but BioPerl test set includes // (old, 2003) NT_021877.gbk which has this problem, so we // handle it ApplicationLog.WriteLine("WARN: nonstandard line format at line {0}: '{1}'", bioReader.LineNumber, bioReader.Line); qualifierValue += " " + bioReader.Line.Trim(); bioReader.GoToNextLine(); } else { break; } } // add last qualifier if (!String.IsNullOrEmpty(qualifierKey)) { AddQualifierToFeature(feature, qualifierKey, qualifierValue); } // still add feature, even if it has no qualifiers featureList.Add(StandardFeatureMap.GetStandardFeatureItem(feature)); } if (featureList.Count > 0) { ((GenBankMetadata)sequence.Metadata[Helper.GenBankMetadataKey]).Features = features; } }
/// <summary> /// Gets the sub features depending on the location information. /// </summary> /// <param name="sequenceFeatures">SequenceFeatures instance.</param> public List<FeatureItem> GetSubFeatures(SequenceFeatures sequenceFeatures) { List<FeatureItem> subFeatures = new List<FeatureItem>(); if (sequenceFeatures != null) { int start = this.Location.LocationStart; int end = this.Location.LocationEnd; foreach (FeatureItem item in sequenceFeatures.All) { int subItemStart = item.Location.LocationStart; int subItemEnd = item.Location.LocationEnd; if (subItemStart >= start && subItemEnd <= end && string.IsNullOrEmpty(item.Location.Accession)) { // do not add items with the same start and end positions. if (item != this) { subFeatures.Add(item); } } } } return subFeatures; }