/// <summary> /// Parses the GenBank features from the GenBank file. /// </summary> /// <param name="line">parse line</param> /// <param name="sequence">The sequence.</param> /// <param name="stream">The stream reader.</param> /// <returns>The parsed line.</returns> private string ParseFeatures(string line, ref Sequence sequence, StreamReader stream) { ILocationBuilder locBuilder = LocationBuilder; if (locBuilder == null) { throw new InvalidOperationException(Properties.Resource.NullLocationBuild); } // set data indent for features string lineData; // The sub-items of a feature are referred to as qualifiers. These do not have unique // keys, so they are stored as lists in the SubItems dictionary. SequenceFeatures features = new SequenceFeatures(); IList <FeatureItem> featureList = features.All; while (line != null) { string lineHeader = GetLineHeader(line, FeatureDataIndent); if (String.IsNullOrEmpty(line) || lineHeader == "FEATURES") { line = GoToNextLine(line, stream); continue; } if (line[0] != ' ') { // start of non-feature text break; } if (lineHeader == null) { string message = Properties.Resource.GenbankEmptyFeature; Trace.Report(message); throw new InvalidDataException(message); } // check for multi-line location string lineData = GetLineData(line, FeatureDataIndent); string featureKey = lineHeader; string location = lineData; line = GoToNextLine(line, stream); lineData = GetLineData(line, FeatureDataIndent); lineHeader = GetLineHeader(line, FeatureDataIndent); while ((line != null) && (lineHeader == string.Empty) && (lineData != string.Empty) && !lineData.StartsWith("/", StringComparison.Ordinal)) { location += lineData; GetLineData(line, FeatureDataIndent); line = GoToNextLine(line, stream); lineData = GetLineData(line, FeatureDataIndent); lineHeader = GetLineHeader(line, FeatureDataIndent); } // create features as MetadataListItems FeatureItem feature = new FeatureItem(featureKey, locBuilder.GetLocation(location)); // process the list of qualifiers, which are each in the form of // /key="value" string qualifierKey = string.Empty; string qualifierValue = string.Empty; bool quotationMarkStarted = false; while (line != null) { lineData = GetLineData(line, FeatureDataIndent); lineHeader = GetLineHeader(line, FeatureDataIndent); if ((lineHeader == string.Empty) && (lineData != null)) { // '/' denotes a continuation of the previous line // Note that, if there are multiple lines of qualifierValue, // sometimes a line break will happen such that a "/" which is // part of the qualifierValue will start a continuation line. // This is identified by verifying open and closing double quotes. if (lineData.StartsWith("/", StringComparison.Ordinal) && !quotationMarkStarted) { // new qualifier; save previous if this isn't the first if (!String.IsNullOrEmpty(qualifierKey)) { AddQualifierToFeature(feature, qualifierKey, qualifierValue); } // set the key and value of this qualifier int equalsIndex = lineData.IndexOf('='); if (equalsIndex < 0) { // no value, just key (this is allowed, see NC_005213.gbk) qualifierKey = lineData.Substring(1); qualifierValue = string.Empty; } else if (equalsIndex > 0) { qualifierKey = lineData.Substring(1, equalsIndex - 1); qualifierValue = lineData.Substring(equalsIndex + 1); quotationMarkStarted = qualifierValue[0] == '"'; if (qualifierValue[qualifierValue.Length - 1] == '"') { quotationMarkStarted = false; } } else { string message = String.Format( CultureInfo.CurrentCulture, Properties.Resource.GenbankInvalidFeature, line); Trace.Report(message); throw new InvalidDataException(message); } } else { // Continuation of previous line; "note" gets a line break, and // everything else except "translation" and "transl_except" gets a // space to separate words. if (qualifierKey == "note") { qualifierValue += Environment.NewLine; } else if (qualifierKey != "translation" && qualifierKey != "transl_except") { qualifierValue += " "; } qualifierValue += lineData; if (qualifierValue[qualifierValue.Length - 1] == '"') { quotationMarkStarted = false; } } line = GoToNextLine(line, stream); } else if (line.StartsWith("\t", StringComparison.Ordinal)) { // this seems to be data corruption; but BioPerl test set includes // (old, 2003) NT_021877.gbk which has this problem, so we // handle it ApplicationLog.WriteLine("WARN: nonstandard line format at line {0}: '{1}'", lineNumber, line); qualifierValue += " " + line.Trim(); if (qualifierValue[qualifierValue.Length - 1] == '"') { quotationMarkStarted = false; } line = GoToNextLine(line, stream); } else { break; } } // add last qualifier if (!String.IsNullOrEmpty(qualifierKey)) { AddQualifierToFeature(feature, qualifierKey, qualifierValue); } // still add feature, even if it has no qualifiers featureList.Add(StandardFeatureMap.GetStandardFeatureItem(feature)); } if (featureList.Count > 0) { ((GenBankMetadata)sequence.Metadata[Helper.GenBankMetadataKey]).Features = features; } return(line); }
/// <summary> /// Creates a new FeatureItem that is a copy of the current FeatureItem. /// </summary> /// <returns>A new FeatureItem that is a copy of this FeatureItem.</returns> public FeatureItem Clone() { return(StandardFeatureMap.GetStandardFeatureItem(new FeatureItem(this))); }
private void ParseFeatures(BioTextReader bioReader, ref Sequence sequence) { ILocationBuilder locBuilder = LocationBuilder; if (locBuilder == null) { throw new InvalidOperationException(Resource.NullLocationBuild); } // set data indent for features bioReader.DataIndent = _featureDataIndent; // The sub-items of a feature are referred to as qualifiers. These do not have unique // keys, so they are stored as lists in the SubItems dictionary. SequenceFeatures features = new SequenceFeatures(); IList <FeatureItem> featureList = features.All; while (bioReader.HasLines) { if (String.IsNullOrEmpty(bioReader.Line) || bioReader.LineHeader == "FEATURES") { bioReader.GoToNextLine(); continue; } if (bioReader.Line[0] != ' ') { // start of non-feature text break; } if (!bioReader.LineHasHeader) { string message = Properties.Resource.GenbankEmptyFeature; Trace.Report(message); throw new InvalidDataException(message); } // check for multi-line location string string featureKey = bioReader.LineHeader; string location = bioReader.LineData; bioReader.GoToNextLine(); while (bioReader.HasLines && !bioReader.LineHasHeader && bioReader.LineHasData && !bioReader.LineData.StartsWith("/", StringComparison.Ordinal)) { location += bioReader.LineData; bioReader.GoToNextLine(); } // create features as MetadataListItems FeatureItem feature = new FeatureItem(featureKey, locBuilder.GetLocation(location)); // process the list of qualifiers, which are each in the form of // /key="value" string qualifierKey = string.Empty; string qualifierValue = string.Empty; while (bioReader.HasLines) { if (!bioReader.LineHasHeader && bioReader.LineHasData) { // '/' denotes a continuation of the previous line if (bioReader.LineData.StartsWith("/", StringComparison.Ordinal)) { // new qualifier; save previous if this isn't the first if (!String.IsNullOrEmpty(qualifierKey)) { AddQualifierToFeature(feature, qualifierKey, qualifierValue); } // set the key and value of this qualifier int equalsIndex = bioReader.LineData.IndexOf('='); if (equalsIndex < 0) { // no value, just key (this is allowed, see NC_005213.gbk) qualifierKey = bioReader.LineData.Substring(1); qualifierValue = string.Empty; } else if (equalsIndex > 0) { qualifierKey = bioReader.LineData.Substring(1, equalsIndex - 1); qualifierValue = bioReader.LineData.Substring(equalsIndex + 1); } else { string message = String.Format( CultureInfo.CurrentCulture, Properties.Resource.GenbankInvalidFeature, bioReader.Line); Trace.Report(message); throw new InvalidDataException(message); } } else { // Continuation of previous line; "note" gets a line break, and // everything else except "translation" and "transl_except" gets a // space to separate words. if (qualifierKey == "note") { qualifierValue += Environment.NewLine; } else if (qualifierKey != "translation" && qualifierKey != "transl_except") { qualifierValue += " "; } qualifierValue += bioReader.LineData; } bioReader.GoToNextLine(); } else if (bioReader.Line.StartsWith("\t", StringComparison.Ordinal)) { // this seems to be data corruption; but BioPerl test set includes // (old, 2003) NT_021877.gbk which has this problem, so we // handle it ApplicationLog.WriteLine("WARN: nonstandard line format at line {0}: '{1}'", bioReader.LineNumber, bioReader.Line); qualifierValue += " " + bioReader.Line.Trim(); bioReader.GoToNextLine(); } else { break; } } // add last qualifier if (!String.IsNullOrEmpty(qualifierKey)) { AddQualifierToFeature(feature, qualifierKey, qualifierValue); } // still add feature, even if it has no qualifiers featureList.Add(StandardFeatureMap.GetStandardFeatureItem(feature)); } if (featureList.Count > 0) { ((GenBankMetadata)sequence.Metadata[Helper.GenBankMetadataKey]).Features = features; } }