Пример #1
0
        /// <summary>
        /// Parses the GenBank features from the GenBank file.
        /// </summary>
        /// <param name="line">parse line</param>
        /// <param name="sequence">The sequence.</param>
        /// <param name="stream">The stream reader.</param>
        /// <returns>The parsed line.</returns>
        private string ParseFeatures(string line, ref Sequence sequence, StreamReader stream)
        {
            ILocationBuilder locBuilder = LocationBuilder;

            if (locBuilder == null)
            {
                throw new InvalidOperationException(Properties.Resource.NullLocationBuild);
            }

            // set data indent for features
            string lineData;

            // The sub-items of a feature are referred to as qualifiers.  These do not have unique
            // keys, so they are stored as lists in the SubItems dictionary.
            SequenceFeatures    features    = new SequenceFeatures();
            IList <FeatureItem> featureList = features.All;

            while (line != null)
            {
                string lineHeader = GetLineHeader(line, FeatureDataIndent);
                if (String.IsNullOrEmpty(line) || lineHeader == "FEATURES")
                {
                    line = GoToNextLine(line, stream);
                    continue;
                }

                if (line[0] != ' ')
                {
                    // start of non-feature text
                    break;
                }

                if (lineHeader == null)
                {
                    string message = Properties.Resource.GenbankEmptyFeature;
                    Trace.Report(message);
                    throw new InvalidDataException(message);
                }

                // check for multi-line location string
                lineData = GetLineData(line, FeatureDataIndent);
                string featureKey = lineHeader;
                string location   = lineData;
                line       = GoToNextLine(line, stream);
                lineData   = GetLineData(line, FeatureDataIndent);
                lineHeader = GetLineHeader(line, FeatureDataIndent);
                while ((line != null) && (lineHeader == string.Empty) &&
                       (lineData != string.Empty) && !lineData.StartsWith("/", StringComparison.Ordinal))
                {
                    location += lineData;
                    GetLineData(line, FeatureDataIndent);
                    line       = GoToNextLine(line, stream);
                    lineData   = GetLineData(line, FeatureDataIndent);
                    lineHeader = GetLineHeader(line, FeatureDataIndent);
                }

                // create features as MetadataListItems
                FeatureItem feature = new FeatureItem(featureKey, locBuilder.GetLocation(location));

                // process the list of qualifiers, which are each in the form of
                // /key="value"
                string qualifierKey         = string.Empty;
                string qualifierValue       = string.Empty;
                bool   quotationMarkStarted = false;

                while (line != null)
                {
                    lineData   = GetLineData(line, FeatureDataIndent);
                    lineHeader = GetLineHeader(line, FeatureDataIndent);
                    if ((lineHeader == string.Empty) && (lineData != null))
                    {
                        // '/' denotes a continuation of the previous line
                        // Note that, if there are multiple lines of qualifierValue,
                        // sometimes a line break will happen such that a "/" which is
                        // part of the qualifierValue will start a continuation line.
                        // This is identified by verifying open and closing double quotes.
                        if (lineData.StartsWith("/", StringComparison.Ordinal) && !quotationMarkStarted)
                        {
                            // new qualifier; save previous if this isn't the first
                            if (!String.IsNullOrEmpty(qualifierKey))
                            {
                                AddQualifierToFeature(feature, qualifierKey, qualifierValue);
                            }

                            // set the key and value of this qualifier
                            int equalsIndex = lineData.IndexOf('=');
                            if (equalsIndex < 0)
                            {
                                // no value, just key (this is allowed, see NC_005213.gbk)
                                qualifierKey   = lineData.Substring(1);
                                qualifierValue = string.Empty;
                            }
                            else if (equalsIndex > 0)
                            {
                                qualifierKey         = lineData.Substring(1, equalsIndex - 1);
                                qualifierValue       = lineData.Substring(equalsIndex + 1);
                                quotationMarkStarted = qualifierValue[0] == '"';
                                if (qualifierValue[qualifierValue.Length - 1] == '"')
                                {
                                    quotationMarkStarted = false;
                                }
                            }
                            else
                            {
                                string message = String.Format(
                                    CultureInfo.CurrentCulture,
                                    Properties.Resource.GenbankInvalidFeature,
                                    line);
                                Trace.Report(message);
                                throw new InvalidDataException(message);
                            }
                        }
                        else
                        {
                            // Continuation of previous line; "note" gets a line break, and
                            // everything else except "translation" and "transl_except" gets a
                            // space to separate words.
                            if (qualifierKey == "note")
                            {
                                qualifierValue += Environment.NewLine;
                            }
                            else if (qualifierKey != "translation" && qualifierKey != "transl_except")
                            {
                                qualifierValue += " ";
                            }

                            qualifierValue += lineData;
                            if (qualifierValue[qualifierValue.Length - 1] == '"')
                            {
                                quotationMarkStarted = false;
                            }
                        }

                        line = GoToNextLine(line, stream);
                    }
                    else if (line.StartsWith("\t", StringComparison.Ordinal))
                    {
                        // this seems to be data corruption; but BioPerl test set includes
                        // (old, 2003) NT_021877.gbk which has this problem, so we
                        // handle it
                        ApplicationLog.WriteLine("WARN: nonstandard line format at line {0}: '{1}'", lineNumber, line);
                        qualifierValue += " " + line.Trim();
                        if (qualifierValue[qualifierValue.Length - 1] == '"')
                        {
                            quotationMarkStarted = false;
                        }

                        line = GoToNextLine(line, stream);
                    }
                    else
                    {
                        break;
                    }
                }

                // add last qualifier
                if (!String.IsNullOrEmpty(qualifierKey))
                {
                    AddQualifierToFeature(feature, qualifierKey, qualifierValue);
                }

                // still add feature, even if it has no qualifiers
                featureList.Add(StandardFeatureMap.GetStandardFeatureItem(feature));
            }

            if (featureList.Count > 0)
            {
                ((GenBankMetadata)sequence.Metadata[Helper.GenBankMetadataKey]).Features = features;
            }

            return(line);
        }
Пример #2
0
 /// <summary>
 /// Creates a new FeatureItem that is a copy of the current FeatureItem.
 /// </summary>
 /// <returns>A new FeatureItem that is a copy of this FeatureItem.</returns>
 public FeatureItem Clone()
 {
     return(StandardFeatureMap.GetStandardFeatureItem(new FeatureItem(this)));
 }
Пример #3
0
        private void ParseFeatures(BioTextReader bioReader, ref Sequence sequence)
        {
            ILocationBuilder locBuilder = LocationBuilder;

            if (locBuilder == null)
            {
                throw new InvalidOperationException(Resource.NullLocationBuild);
            }

            // set data indent for features
            bioReader.DataIndent = _featureDataIndent;

            // The sub-items of a feature are referred to as qualifiers.  These do not have unique
            // keys, so they are stored as lists in the SubItems dictionary.
            SequenceFeatures    features    = new SequenceFeatures();
            IList <FeatureItem> featureList = features.All;

            while (bioReader.HasLines)
            {
                if (String.IsNullOrEmpty(bioReader.Line) || bioReader.LineHeader == "FEATURES")
                {
                    bioReader.GoToNextLine();
                    continue;
                }

                if (bioReader.Line[0] != ' ')
                {
                    // start of non-feature text
                    break;
                }

                if (!bioReader.LineHasHeader)
                {
                    string message = Properties.Resource.GenbankEmptyFeature;
                    Trace.Report(message);
                    throw new InvalidDataException(message);
                }

                // check for multi-line location string
                string featureKey = bioReader.LineHeader;
                string location   = bioReader.LineData;
                bioReader.GoToNextLine();
                while (bioReader.HasLines && !bioReader.LineHasHeader &&
                       bioReader.LineHasData && !bioReader.LineData.StartsWith("/", StringComparison.Ordinal))
                {
                    location += bioReader.LineData;
                    bioReader.GoToNextLine();
                }

                // create features as MetadataListItems
                FeatureItem feature = new FeatureItem(featureKey, locBuilder.GetLocation(location));

                // process the list of qualifiers, which are each in the form of
                // /key="value"
                string qualifierKey   = string.Empty;
                string qualifierValue = string.Empty;
                while (bioReader.HasLines)
                {
                    if (!bioReader.LineHasHeader && bioReader.LineHasData)
                    {
                        // '/' denotes a continuation of the previous line
                        if (bioReader.LineData.StartsWith("/", StringComparison.Ordinal))
                        {
                            // new qualifier; save previous if this isn't the first
                            if (!String.IsNullOrEmpty(qualifierKey))
                            {
                                AddQualifierToFeature(feature, qualifierKey, qualifierValue);
                            }

                            // set the key and value of this qualifier
                            int equalsIndex = bioReader.LineData.IndexOf('=');
                            if (equalsIndex < 0)
                            {
                                // no value, just key (this is allowed, see NC_005213.gbk)
                                qualifierKey   = bioReader.LineData.Substring(1);
                                qualifierValue = string.Empty;
                            }
                            else if (equalsIndex > 0)
                            {
                                qualifierKey   = bioReader.LineData.Substring(1, equalsIndex - 1);
                                qualifierValue = bioReader.LineData.Substring(equalsIndex + 1);
                            }
                            else
                            {
                                string message = String.Format(
                                    CultureInfo.CurrentCulture,
                                    Properties.Resource.GenbankInvalidFeature,
                                    bioReader.Line);
                                Trace.Report(message);
                                throw new InvalidDataException(message);
                            }
                        }
                        else
                        {
                            // Continuation of previous line; "note" gets a line break, and
                            // everything else except "translation" and "transl_except" gets a
                            // space to separate words.
                            if (qualifierKey == "note")
                            {
                                qualifierValue += Environment.NewLine;
                            }
                            else if (qualifierKey != "translation" && qualifierKey != "transl_except")
                            {
                                qualifierValue += " ";
                            }

                            qualifierValue += bioReader.LineData;
                        }

                        bioReader.GoToNextLine();
                    }
                    else if (bioReader.Line.StartsWith("\t", StringComparison.Ordinal))
                    {
                        // this seems to be data corruption; but BioPerl test set includes
                        // (old, 2003) NT_021877.gbk which has this problem, so we
                        // handle it
                        ApplicationLog.WriteLine("WARN: nonstandard line format at line {0}: '{1}'",
                                                 bioReader.LineNumber, bioReader.Line);
                        qualifierValue += " " + bioReader.Line.Trim();
                        bioReader.GoToNextLine();
                    }
                    else
                    {
                        break;
                    }
                }

                // add last qualifier
                if (!String.IsNullOrEmpty(qualifierKey))
                {
                    AddQualifierToFeature(feature, qualifierKey, qualifierValue);
                }

                // still add feature, even if it has no qualifiers
                featureList.Add(StandardFeatureMap.GetStandardFeatureItem(feature));
            }

            if (featureList.Count > 0)
            {
                ((GenBankMetadata)sequence.Metadata[Helper.GenBankMetadataKey]).Features = features;
            }
        }