Ejemplo n.º 1
0
        /// <summary>
        /// Returns standard feature class instance, if the key in the specified feature item is found 
        /// in the map; otherwise returns the specified feature item itself.
        /// For example:
        /// If the specified feature item has the key "Gene" then this method returns instance of the Gene class
        /// with data copied from the specified item.
        /// </summary>
        /// <param name="item">Feature item instance to which the standard feature item instance is needed.</param>
        /// <returns>If found returns appropriate class instance for the specified feature item, otherwise returns 
        /// the specified item itself.</returns>
        public static FeatureItem GetStandardFeatureItem(FeatureItem item)
        {
            if (item == null)
            {
                throw new ArgumentNullException("item");
            }

            Type type = null;
            if (featureMap.ContainsKey(item.Key))
            {
                type = featureMap[item.Key];
            }

            if (type != null)
            {
                FeatureItem newItem = (FeatureItem)Activator.CreateInstance(type, item.Location);

                foreach (KeyValuePair<string, List<string>> kvp in item.Qualifiers)
                {
                    newItem.Qualifiers.Add(kvp.Key, kvp.Value);
                }

                item = newItem;
            }

            return item;
        }
Ejemplo n.º 2
0
 /// <summary>
 /// Private Constructor for clone method.
 /// </summary>
 /// <param name="other">FeatureItem instance to clone.</param>
 protected FeatureItem(FeatureItem other)
 {
     Key = other.Key;
     this.Location = other.Location.Clone();
     Qualifiers = new Dictionary<string, List<string>>();
     foreach (KeyValuePair<string, List<string>> kvp in other.Qualifiers)
     {
         if (kvp.Value != null)
         {
             Qualifiers.Add(kvp.Key, new List<string>(kvp.Value));
         }
         else
         {
             Qualifiers.Add(kvp.Key, null);
         }
     }
 }
Ejemplo n.º 3
0
        /// <summary>
        /// Returns standard feature class instance, if the key in the specified feature item is found
        /// in the map; otherwise returns the specified feature item itself.
        /// For example:
        /// If the specified feature item has the key "Gene" then this method returns instance of the Gene class
        /// with data copied from the specified item.
        /// </summary>
        /// <param name="item">Feature item instance to which the standard feature item instance is needed.</param>
        /// <returns>If found returns appropriate class instance for the specified feature item, otherwise returns
        /// the specified item itself.</returns>
        public static FeatureItem GetStandardFeatureItem(FeatureItem item)
        {
            Type type = null;

            if (featureMap.ContainsKey(item.Key))
            {
                type = featureMap[item.Key];
            }

            if (type != null)
            {
                FeatureItem newItem = (FeatureItem)Activator.CreateInstance(type, item.Location);

                foreach (KeyValuePair <string, List <string> > kvp in item.Qualifiers)
                {
                    newItem.Qualifiers.Add(kvp.Key, kvp.Value);
                }

                item = newItem;
            }

            return(item);
        }
Ejemplo n.º 4
0
        /// <summary>
        /// Helper method to parse the feature's qualifiers of gen bank data.
        /// </summary>
        /// <param name="metadata">feature object</param>
        /// <param name="cellRange">Range of cells</param>
        /// <param name="rowIndex">Current index of row</param>
        /// <returns>Index of row</returns>
        private static int ParseQualifiers(FeatureItem featureItem, object[,] cellRange, int rowIndex)
        {
            string message = string.Empty;
            string key;
            string subKey;
            string value = string.Empty;

            while (rowIndex < cellRange.GetLength(0))
            {
                if (3 > cellRange.GetLength(1))
                {
                    message = String.Format(
                                CultureInfo.InvariantCulture,
                                Resources.UnrecognizedGenBankMetadataFormat,
                                REFERENCE);
                    throw new FormatException(message);
                }

                if (null != cellRange[rowIndex, KeyColumnIndex])
                {
                    key = cellRange[rowIndex, KeyColumnIndex].ToString().ToUpperInvariant();
                    if (!string.IsNullOrWhiteSpace(key))
                    {
                        break;
                    }
                }

                subKey = cellRange[rowIndex, SubKeyColumnIndex] != null ? cellRange[rowIndex, SubKeyColumnIndex].ToString() : string.Empty;
                if (string.IsNullOrWhiteSpace(subKey))
                {
                    rowIndex++;
                    continue;
                }

                value = cellRange[rowIndex, ValueColumnIndex] != null ? cellRange[rowIndex, ValueColumnIndex].ToString() : string.Empty;

                if (!featureItem.Qualifiers.ContainsKey(subKey))
                {
                    featureItem.Qualifiers[subKey] = new List<string>();
                }

                featureItem.Qualifiers[subKey].Add(value);

                rowIndex++;
            }

            return rowIndex;
        }
Ejemplo n.º 5
0
        /// <summary>
        /// Parses the GenBank features from the GenBank file.
        /// </summary>
        /// <param name="line">parse line</param>
        /// <param name="sequence">The sequence.</param>
        /// <param name="stream">The stream reader.</param>
        /// <returns>The parsed line.</returns>
        private string ParseFeatures(string line, ref Sequence sequence, StreamReader stream)
        {
            ILocationBuilder locBuilder = LocationBuilder;

            if (locBuilder == null)
            {
                throw new InvalidOperationException(Properties.Resource.NullLocationBuild);
            }

            // set data indent for features
            string lineData;

            // The sub-items of a feature are referred to as qualifiers.  These do not have unique
            // keys, so they are stored as lists in the SubItems dictionary.
            SequenceFeatures    features    = new SequenceFeatures();
            IList <FeatureItem> featureList = features.All;

            while (line != null)
            {
                string lineHeader = GetLineHeader(line, FeatureDataIndent);
                if (String.IsNullOrEmpty(line) || lineHeader == "FEATURES")
                {
                    line = GoToNextLine(line, stream);
                    continue;
                }

                if (line[0] != ' ')
                {
                    // start of non-feature text
                    break;
                }

                if (lineHeader == null)
                {
                    string message = Properties.Resource.GenbankEmptyFeature;
                    Trace.Report(message);
                    throw new InvalidDataException(message);
                }

                // check for multi-line location string
                lineData = GetLineData(line, FeatureDataIndent);
                string featureKey = lineHeader;
                string location   = lineData;
                line       = GoToNextLine(line, stream);
                lineData   = GetLineData(line, FeatureDataIndent);
                lineHeader = GetLineHeader(line, FeatureDataIndent);
                while ((line != null) && (lineHeader == string.Empty) &&
                       (lineData != string.Empty) && !lineData.StartsWith("/", StringComparison.Ordinal))
                {
                    location += lineData;
                    GetLineData(line, FeatureDataIndent);
                    line       = GoToNextLine(line, stream);
                    lineData   = GetLineData(line, FeatureDataIndent);
                    lineHeader = GetLineHeader(line, FeatureDataIndent);
                }

                // create features as MetadataListItems
                FeatureItem feature = new FeatureItem(featureKey, locBuilder.GetLocation(location));

                // process the list of qualifiers, which are each in the form of
                // /key="value"
                string qualifierKey         = string.Empty;
                string qualifierValue       = string.Empty;
                bool   quotationMarkStarted = false;

                while (line != null)
                {
                    lineData   = GetLineData(line, FeatureDataIndent);
                    lineHeader = GetLineHeader(line, FeatureDataIndent);
                    if ((lineHeader == string.Empty) && (lineData != null))
                    {
                        // '/' denotes a continuation of the previous line
                        // Note that, if there are multiple lines of qualifierValue,
                        // sometimes a line break will happen such that a "/" which is
                        // part of the qualifierValue will start a continuation line.
                        // This is identified by verifying open and closing double quotes.
                        if (lineData.StartsWith("/", StringComparison.Ordinal) && !quotationMarkStarted)
                        {
                            // new qualifier; save previous if this isn't the first
                            if (!String.IsNullOrEmpty(qualifierKey))
                            {
                                AddQualifierToFeature(feature, qualifierKey, qualifierValue);
                            }

                            // set the key and value of this qualifier
                            int equalsIndex = lineData.IndexOf('=');
                            if (equalsIndex < 0)
                            {
                                // no value, just key (this is allowed, see NC_005213.gbk)
                                qualifierKey   = lineData.Substring(1);
                                qualifierValue = string.Empty;
                            }
                            else if (equalsIndex > 0)
                            {
                                qualifierKey         = lineData.Substring(1, equalsIndex - 1);
                                qualifierValue       = lineData.Substring(equalsIndex + 1);
                                quotationMarkStarted = qualifierValue[0] == '"';
                                if (qualifierValue[qualifierValue.Length - 1] == '"')
                                {
                                    quotationMarkStarted = false;
                                }
                            }
                            else
                            {
                                string message = String.Format(
                                    CultureInfo.CurrentCulture,
                                    Properties.Resource.GenbankInvalidFeature,
                                    line);
                                Trace.Report(message);
                                throw new InvalidDataException(message);
                            }
                        }
                        else
                        {
                            // Continuation of previous line; "note" gets a line break, and
                            // everything else except "translation" and "transl_except" gets a
                            // space to separate words.
                            if (qualifierKey == "note")
                            {
                                qualifierValue += Environment.NewLine;
                            }
                            else if (qualifierKey != "translation" && qualifierKey != "transl_except")
                            {
                                qualifierValue += " ";
                            }

                            qualifierValue += lineData;
                            if (qualifierValue[qualifierValue.Length - 1] == '"')
                            {
                                quotationMarkStarted = false;
                            }
                        }

                        line = GoToNextLine(line, stream);
                    }
                    else if (line.StartsWith("\t", StringComparison.Ordinal))
                    {
                        // this seems to be data corruption; but BioPerl test set includes
                        // (old, 2003) NT_021877.gbk which has this problem, so we
                        // handle it
                        ApplicationLog.WriteLine("WARN: nonstandard line format at line {0}: '{1}'", lineNumber, line);
                        qualifierValue += " " + line.Trim();
                        if (qualifierValue[qualifierValue.Length - 1] == '"')
                        {
                            quotationMarkStarted = false;
                        }

                        line = GoToNextLine(line, stream);
                    }
                    else
                    {
                        break;
                    }
                }

                // add last qualifier
                if (!String.IsNullOrEmpty(qualifierKey))
                {
                    AddQualifierToFeature(feature, qualifierKey, qualifierValue);
                }

                // still add feature, even if it has no qualifiers
                featureList.Add(StandardFeatureMap.GetStandardFeatureItem(feature));
            }

            if (featureList.Count > 0)
            {
                ((GenBankMetadata)sequence.Metadata[Helper.GenBankMetadataKey]).Features = features;
            }

            return(line);
        }
Ejemplo n.º 6
0
        /// <summary>
        ///     Validate addition of GenBank features.
        /// </summary>
        /// <param name="nodeName">xml node name.</param>
        private void ValidateAdditionGenBankFeatures(string nodeName)
        {
            // Get Values from XML node.
            string filePath = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.FilePathNode);
            string alphabetName = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.AlphabetNameNode);
            string expectedSequence = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.ExpectedSequenceNode);
            string addFirstKey = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.FirstKey);
            string addSecondKey = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.SecondKey);
            string addFirstLocation = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.FirstLocation);
            string addSecondLocation = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.SecondLocation);
            string addFirstQualifier = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.FirstQualifier);
            string addSecondQualifier = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.SecondQualifier);

            ISequenceParser parser1 = new GenBankParser();
            {
                IEnumerable<ISequence> seqList1 = parser1.Parse(filePath);
                var localBuilderObj = new LocationBuilder();

                string tempFileName = Path.GetTempFileName();
                string expectedUpdatedSequence =
                    expectedSequence.Replace("\r", "").Replace("\n", "").Replace(" ", "");
                var orgSeq = new Sequence(Utility.GetAlphabet(alphabetName),
                                          expectedUpdatedSequence);
                orgSeq.ID = seqList1.ElementAt(0).ID;

                orgSeq.Metadata.Add(Constants.GenBank,
                                    seqList1.ElementAt(0).Metadata[Constants.GenBank]);

                ISequenceFormatter formatterObj = new GenBankFormatter();
                {
                    formatterObj.Format(orgSeq, tempFileName);

                    // parse GenBank file.
                    var parserObj = new GenBankParser();
                    {
                        IEnumerable<ISequence> seqList = parserObj.Parse(tempFileName);

                        ISequence seq = seqList.ElementAt(0);
                        var metadata = (GenBankMetadata) seq.Metadata[Constants.GenBank];

                        // Add a new features to Genbank features list.
                        metadata.Features = new SequenceFeatures();
                        var feature = new FeatureItem(addFirstKey, addFirstLocation);
                        var qualifierValues = new List<string>();
                        qualifierValues.Add(addFirstQualifier);
                        qualifierValues.Add(addFirstQualifier);
                        feature.Qualifiers.Add(addFirstQualifier, qualifierValues);
                        metadata.Features.All.Add(feature);

                        feature = new FeatureItem(addSecondKey, addSecondLocation);
                        qualifierValues = new List<string>();
                        qualifierValues.Add(addSecondQualifier);
                        qualifierValues.Add(addSecondQualifier);
                        feature.Qualifiers.Add(addSecondQualifier, qualifierValues);
                        metadata.Features.All.Add(feature);

                        // Validate added GenBank features.
                        Assert.AreEqual(metadata.Features.All[0].Key.ToString(null), addFirstKey);
                        Assert.AreEqual(
                            localBuilderObj.GetLocationString(metadata.Features.All[0].Location),
                            addFirstLocation);
                        Assert.AreEqual(metadata.Features.All[1].Key.ToString(null), addSecondKey);
                        Assert.AreEqual(localBuilderObj.GetLocationString(metadata.Features.All[1].Location),
                                        addSecondLocation);

                        parserObj.Close();
                    }

                    File.Delete(tempFileName);
                }
            }
        }
Ejemplo n.º 7
0
        /// <summary>
        ///     Validate addition of GenBank features.
        /// </summary>
        /// <param name="nodeName">xml node name.</param>
        private void ValidateGenBankSubFeatures(string nodeName)
        {
            // Get Values from XML node.
            string firstKey = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.FirstKey);
            string secondKey = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.SecondKey);
            string thirdKey = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.ThirdFeatureKey);
            string fourthKey = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.FourthKey);
            string fifthKey = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.FifthKey);
            string firstLocation = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.FirstLocation);
            string secondLocation = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.SecondLocation);
            string thirdLocation = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.ThirdLocation);
            string fourthLocation = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.FourthLocation);
            string fifthLocation = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.FifthLocation);
            string featuresCount = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.MainFeaturesCount);
            string secondCount = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.SecondCount);
            string thirdCount = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.ThirdCount);

            // Create a feature items
            var seqFeatures = new SequenceFeatures();
            var firstItem = new FeatureItem(firstKey, firstLocation);
            var secondItem = new FeatureItem(secondKey, secondLocation);
            var thirdItem = new FeatureItem(thirdKey, thirdLocation);
            var fourthItem = new FeatureItem(fourthKey, fourthLocation);
            var fifthItem = new FeatureItem(fifthKey, fifthLocation);

            seqFeatures.All.Add(firstItem);
            seqFeatures.All.Add(secondItem);
            seqFeatures.All.Add(thirdItem);
            seqFeatures.All.Add(fourthItem);
            seqFeatures.All.Add(fifthItem);

            // Validate sub features .
            List<FeatureItem> subFeatures = firstItem.GetSubFeatures(seqFeatures);
            Assert.AreEqual(Convert.ToInt32(featuresCount, null), subFeatures.Count);
            subFeatures = secondItem.GetSubFeatures(seqFeatures);
            Assert.AreEqual(Convert.ToInt32(secondCount, null), subFeatures.Count);
            subFeatures = thirdItem.GetSubFeatures(seqFeatures);
            Assert.AreEqual(Convert.ToInt32(thirdCount, null), subFeatures.Count);
        }
Ejemplo n.º 8
0
        /// <summary>
        /// Parses the GenBank features from the GenBank file.
        /// </summary>
        /// <param name="line">parse line</param>
        /// <param name="sequence">The sequence.</param>
        /// <param name="stream">The stream reader.</param>
        /// <returns>The parsed line.</returns>
        private string ParseFeatures(string line, ref Sequence sequence, StreamReader stream)
        {
            ILocationBuilder locBuilder = LocationBuilder;
            if (locBuilder == null)
            {
                throw new InvalidOperationException(Properties.Resource.NullLocationBuild);
            }

            // set data indent for features
            string lineData;

            // The sub-items of a feature are referred to as qualifiers.  These do not have unique
            // keys, so they are stored as lists in the SubItems dictionary.
            SequenceFeatures features = new SequenceFeatures();
            IList<FeatureItem> featureList = features.All;

            while (line != null)
            {
                string lineHeader = GetLineHeader(line, FeatureDataIndent);
                if (String.IsNullOrEmpty(line) || lineHeader == "FEATURES")
                {
                    line = GoToNextLine(line, stream);
                    continue;
                }

                if (line[0] != ' ')
                {
                    // start of non-feature text
                    break;
                }

                if (lineHeader == null)
                {
                    string message = Properties.Resource.GenbankEmptyFeature;
                    Trace.Report(message);
                    throw new InvalidDataException(message);
                }

                // check for multi-line location string
                lineData = GetLineData(line, FeatureDataIndent);
                string featureKey = lineHeader;
                string location = lineData;
                line = GoToNextLine(line, stream);
                lineData = GetLineData(line, FeatureDataIndent);
                lineHeader = GetLineHeader(line, FeatureDataIndent);
                while ((line != null) && (lineHeader == string.Empty) &&
                    (lineData != string.Empty) && !lineData.StartsWith("/", StringComparison.Ordinal))
                {
                    location += lineData;
                    GetLineData(line, FeatureDataIndent);
                    line = GoToNextLine(line, stream);
                    lineData = GetLineData(line, FeatureDataIndent);
                    lineHeader = GetLineHeader(line, FeatureDataIndent);
                }

                // create features as MetadataListItems
                FeatureItem feature = new FeatureItem(featureKey, locBuilder.GetLocation(location));

                // process the list of qualifiers, which are each in the form of
                // /key="value"
                string qualifierKey = string.Empty;
                string qualifierValue = string.Empty;
                bool quotationMarkStarted = false;

                while (line != null)
                {
                    lineData = GetLineData(line, FeatureDataIndent);
                    lineHeader = GetLineHeader(line, FeatureDataIndent);
                    if ((lineHeader == string.Empty) && (lineData != null))
                    {
                        // '/' denotes a continuation of the previous line
                        // Note that, if there are multiple lines of qualifierValue, 
                        // sometimes a line break will happen such that a "/" which is 
                        // part of the qualifierValue will start a continuation line. 
                        // This is identified by verifying open and closing double quotes.
                        if (lineData.StartsWith("/", StringComparison.Ordinal) && !quotationMarkStarted)
                        {
                            // new qualifier; save previous if this isn't the first
                            if (!String.IsNullOrEmpty(qualifierKey))
                            {
                                AddQualifierToFeature(feature, qualifierKey, qualifierValue);
                            }

                            // set the key and value of this qualifier
                            int equalsIndex = lineData.IndexOf('=');
                            if (equalsIndex < 0)
                            {
                                // no value, just key (this is allowed, see NC_005213.gbk)
                                qualifierKey = lineData.Substring(1);
                                qualifierValue = string.Empty;
                            }
                            else if (equalsIndex > 0)
                            {
                                qualifierKey = lineData.Substring(1, equalsIndex - 1);
                                qualifierValue = lineData.Substring(equalsIndex + 1);
                                quotationMarkStarted = qualifierValue[0] == '"';
                                if (qualifierValue[qualifierValue.Length - 1] == '"')
                                {
                                    quotationMarkStarted = false;
                                }
                            }
                            else
                            {
                                string message = String.Format(
                                        CultureInfo.CurrentCulture,
                                        Properties.Resource.GenbankInvalidFeature,
                                        line);
                                Trace.Report(message);
                                throw new InvalidDataException(message);
                            }
                        }
                        else
                        {
                            // Continuation of previous line; "note" gets a line break, and
                            // everything else except "translation" and "transl_except" gets a
                            // space to separate words.
                            if (qualifierKey == "note")
                            {
                                qualifierValue += Environment.NewLine;
                            }
                            else if (qualifierKey != "translation" && qualifierKey != "transl_except")
                            {
                                qualifierValue += " ";
                            }

                            qualifierValue += lineData;
                            if (qualifierValue[qualifierValue.Length - 1] == '"')
                            {
                                quotationMarkStarted = false;
                            }
                        }

                        line = GoToNextLine(line, stream);
                    }
                    else if (line.StartsWith("\t", StringComparison.Ordinal))
                    {
                        // this seems to be data corruption; but BioPerl test set includes
                        // (old, 2003) NT_021877.gbk which has this problem, so we
                        // handle it
                        ApplicationLog.WriteLine("WARN: nonstandard line format at line {0}: '{1}'", lineNumber, line);
                        qualifierValue += " " + line.Trim();
                        if (qualifierValue[qualifierValue.Length - 1] == '"')
                        {
                            quotationMarkStarted = false;
                        }

                        line = GoToNextLine(line, stream);
                    }
                    else
                    {
                        break;
                    }
                }

                // add last qualifier
                if (!String.IsNullOrEmpty(qualifierKey))
                {
                    AddQualifierToFeature(feature, qualifierKey, qualifierValue);
                }

                // still add feature, even if it has no qualifiers
                featureList.Add(StandardFeatureMap.GetStandardFeatureItem(feature));
            }

            if (featureList.Count > 0)
            {
                ((GenBankMetadata)sequence.Metadata[Helper.GenBankMetadataKey]).Features = features;
            }

            return line;
        }
Ejemplo n.º 9
0
        /// <summary>
        /// Adds a qualifier to the feature object. The sub-items of a feature are referred to as qualifiers.  These do not have unique
        /// keys, so they are stored as lists in the SubItems dictionary.
        /// </summary>
        /// <param name="feature">The feature to which qualifier is to be added.</param>
        /// <param name="qualifierKey">The qualifier key to be added to the feature.</param>
        /// <param name="qualifierValue">The qualifier value.</param>
        private static void AddQualifierToFeature(FeatureItem feature, string qualifierKey, string qualifierValue)
        {
            if (!feature.Qualifiers.ContainsKey(qualifierKey))
            {
                feature.Qualifiers[qualifierKey] = new List<string>();
            }

            feature.Qualifiers[qualifierKey].Add(qualifierValue);
        }
Ejemplo n.º 10
0
        /// <summary>
        /// Returns list of citation references in this metadata which are referred in the specified feature.
        /// </summary>
        /// <param name="item">Feature Item.</param>
        public List<CitationReference> GetCitationsReferredInFeature(FeatureItem item)
        {
            List<CitationReference> list = new List<CitationReference>();
            if (item == null || !item.Qualifiers.ContainsKey(StandardQualifierNames.Citation))
            {
                return list;
            }

            foreach (string str in item.Qualifiers[StandardQualifierNames.Citation])
            {
                if (!string.IsNullOrEmpty(str))
                {
                    string strCitationNumber = str.Replace("[", string.Empty).Replace("]", string.Empty);
                    int citationNumber = -1;
                    if (int.TryParse(strCitationNumber, out citationNumber))
                    {
                        CitationReference citation = References.FirstOrDefault(F => F.Number == citationNumber);
                        if (citation != null && !list.Contains(citation))
                        {
                            list.Add(citation);
                        }
                    }
                }
            }

            return list;
        }
Ejemplo n.º 11
0
        private void ParseFeatures(BioTextReader bioReader, ref Sequence sequence)
        {
            ILocationBuilder locBuilder = LocationBuilder;

            if (locBuilder == null)
            {
                throw new InvalidOperationException(Resource.NullLocationBuild);
            }

            // set data indent for features
            bioReader.DataIndent = _featureDataIndent;

            // The sub-items of a feature are referred to as qualifiers.  These do not have unique
            // keys, so they are stored as lists in the SubItems dictionary.
            SequenceFeatures    features    = new SequenceFeatures();
            IList <FeatureItem> featureList = features.All;

            while (bioReader.HasLines)
            {
                if (String.IsNullOrEmpty(bioReader.Line) || bioReader.LineHeader == "FEATURES")
                {
                    bioReader.GoToNextLine();
                    continue;
                }

                if (bioReader.Line[0] != ' ')
                {
                    // start of non-feature text
                    break;
                }

                if (!bioReader.LineHasHeader)
                {
                    string message = Properties.Resource.GenbankEmptyFeature;
                    Trace.Report(message);
                    throw new InvalidDataException(message);
                }

                // check for multi-line location string
                string featureKey = bioReader.LineHeader;
                string location   = bioReader.LineData;
                bioReader.GoToNextLine();
                while (bioReader.HasLines && !bioReader.LineHasHeader &&
                       bioReader.LineHasData && !bioReader.LineData.StartsWith("/", StringComparison.Ordinal))
                {
                    location += bioReader.LineData;
                    bioReader.GoToNextLine();
                }

                // create features as MetadataListItems
                FeatureItem feature = new FeatureItem(featureKey, locBuilder.GetLocation(location));

                // process the list of qualifiers, which are each in the form of
                // /key="value"
                string qualifierKey   = string.Empty;
                string qualifierValue = string.Empty;
                while (bioReader.HasLines)
                {
                    if (!bioReader.LineHasHeader && bioReader.LineHasData)
                    {
                        // '/' denotes a continuation of the previous line
                        if (bioReader.LineData.StartsWith("/", StringComparison.Ordinal))
                        {
                            // new qualifier; save previous if this isn't the first
                            if (!String.IsNullOrEmpty(qualifierKey))
                            {
                                AddQualifierToFeature(feature, qualifierKey, qualifierValue);
                            }

                            // set the key and value of this qualifier
                            int equalsIndex = bioReader.LineData.IndexOf('=');
                            if (equalsIndex < 0)
                            {
                                // no value, just key (this is allowed, see NC_005213.gbk)
                                qualifierKey   = bioReader.LineData.Substring(1);
                                qualifierValue = string.Empty;
                            }
                            else if (equalsIndex > 0)
                            {
                                qualifierKey   = bioReader.LineData.Substring(1, equalsIndex - 1);
                                qualifierValue = bioReader.LineData.Substring(equalsIndex + 1);
                            }
                            else
                            {
                                string message = String.Format(
                                    CultureInfo.CurrentCulture,
                                    Properties.Resource.GenbankInvalidFeature,
                                    bioReader.Line);
                                Trace.Report(message);
                                throw new InvalidDataException(message);
                            }
                        }
                        else
                        {
                            // Continuation of previous line; "note" gets a line break, and
                            // everything else except "translation" and "transl_except" gets a
                            // space to separate words.
                            if (qualifierKey == "note")
                            {
                                qualifierValue += Environment.NewLine;
                            }
                            else if (qualifierKey != "translation" && qualifierKey != "transl_except")
                            {
                                qualifierValue += " ";
                            }

                            qualifierValue += bioReader.LineData;
                        }

                        bioReader.GoToNextLine();
                    }
                    else if (bioReader.Line.StartsWith("\t", StringComparison.Ordinal))
                    {
                        // this seems to be data corruption; but BioPerl test set includes
                        // (old, 2003) NT_021877.gbk which has this problem, so we
                        // handle it
                        ApplicationLog.WriteLine("WARN: nonstandard line format at line {0}: '{1}'",
                                                 bioReader.LineNumber, bioReader.Line);
                        qualifierValue += " " + bioReader.Line.Trim();
                        bioReader.GoToNextLine();
                    }
                    else
                    {
                        break;
                    }
                }

                // add last qualifier
                if (!String.IsNullOrEmpty(qualifierKey))
                {
                    AddQualifierToFeature(feature, qualifierKey, qualifierValue);
                }

                // still add feature, even if it has no qualifiers
                featureList.Add(StandardFeatureMap.GetStandardFeatureItem(feature));
            }

            if (featureList.Count > 0)
            {
                ((GenBankMetadata)sequence.Metadata[Helper.GenBankMetadataKey]).Features = features;
            }
        }
Ejemplo n.º 12
0
        public void ValidateAdditionSingleGenBankQualifier()
        {
            // Get Values from XML node.
            string filePath = utilityObj.xmlUtil.GetTextValue(
                Constants.MediumSizeRNAGenBankFeaturesNode, Constants.FilePathNode);
            string addFirstKey = utilityObj.xmlUtil.GetTextValue(
                Constants.MediumSizeRNAGenBankFeaturesNode, Constants.FirstKey);
            string addFirstLocation = utilityObj.xmlUtil.GetTextValue(
                Constants.MediumSizeRNAGenBankFeaturesNode, Constants.FirstLocation);
            string addFirstQualifier = utilityObj.xmlUtil.GetTextValue(
                Constants.MediumSizeRNAGenBankFeaturesNode, Constants.FirstQualifier);
            string addSecondQualifier = utilityObj.xmlUtil.GetTextValue(
                Constants.MediumSizeRNAGenBankFeaturesNode, Constants.SecondQualifier);

            // Parse a GenBank file.            
            ISequenceParser parserObj = new GenBankParser();
            {
                IEnumerable<ISequence> seqList = parserObj.Parse(filePath);
                var locBuilder = new LocationBuilder();

                // Validate Minus35Signal feature all qualifiers.
                var metadata =
                    (GenBankMetadata) seqList.ElementAt(0).Metadata[Constants.GenBank];

                // Add a new features to Genbank features list.
                metadata.Features = new SequenceFeatures();
                var feature = new FeatureItem(addFirstKey, addFirstLocation);
                var qualifierValues = new List<string>();
                qualifierValues.Add(addFirstQualifier);
                qualifierValues.Add(addFirstQualifier);
                feature.Qualifiers.Add(addFirstQualifier, qualifierValues);
                metadata.Features.All.Add(feature);

                qualifierValues = new List<string>();
                qualifierValues.Add(addSecondQualifier);
                qualifierValues.Add(addSecondQualifier);
                feature.Qualifiers.Add(addSecondQualifier, qualifierValues);
                metadata.Features.All.Add(feature);

                // Validate added GenBank features.
                Assert.AreEqual(
                    metadata.Features.All[0].Key.ToString(null), addFirstKey);
                Assert.AreEqual(
                    locBuilder.GetLocationString(metadata.Features.All[0].Location),
                    addFirstLocation);
            }
        }