Пример #1
0
        /// <summary>
        /// Parses a locus string into a <see cref="GenBankLocusInfo"/>.
        /// </summary>
        /// <param name="locusText">Locus text.</param>
        /// <returns>
        /// Locus containing the info in the passed in string.
        /// </returns>
        public GenBankLocusInfo Parse(string locusText)
        {
            this.line = locusText;

            var locus        = new GenBankLocusInfo();
            var tokenParsers = GetLocusTokenParsers(locus);

            foreach (string token in line.Split(' '))
            {
                if (string.IsNullOrEmpty(token))
                {
                    continue;
                }

                string token1 = token;
                Func <string, bool> usedParser = tokenParsers.FirstOrDefault(parser => parser(token1));

                if (usedParser == null)
                {
                    Trace.Report(string.Format(CultureInfo.InvariantCulture,
                                               Resource.GenBankFailedToParseLocusTokenFormat, token, this.line));
                }

                tokenParsers.Remove(usedParser);
            }

            if (String.IsNullOrEmpty(locus.SequenceType))
            {
                string message = String.Format(Resource.GenBankUnknownLocusFormat, this.line);
                Trace.Report(message);
                throw new Exception(message);
            }

            if (locus.SequenceType.ToLowerInvariant() == "aa")
            {
                locus.MoleculeType = MoleculeType.Protein;
            }

            return(locus);
        }
Пример #2
0
        private void WriteLocus(ISequence sequence, TextWriter writer)
        {
            // determine molecule and seqiemce type
            GenBankMetadata metadata = (GenBankMetadata)sequence.Metadata[Helper.GenBankMetadataKey];

            GenBankLocusInfo locusInfo = null;

            if (metadata != null)
            {
                locusInfo = metadata.Locus;
            }

            string molType = sequence.MoleculeType.ToString();
            string seqType;

            if (sequence.MoleculeType != MoleculeType.Invalid)
            {
                if (molType == MoleculeType.Protein.ToString())
                {
                    seqType = "aa";
                    molType = string.Empty; // protein files don't use molecule type
                }
                else
                {
                    seqType = "bp";
                }
            }
            else
            {
                if (sequence.Alphabet == Alphabets.Protein)
                {
                    seqType = "aa";
                    molType = string.Empty; // protein files don't use molecule type
                }
                else
                {
                    seqType = "bp";

                    if (sequence.Alphabet == Alphabets.DNA)
                    {
                        molType = MoleculeType.DNA.ToString();
                    }
                    else
                    {
                        molType = MoleculeType.RNA.ToString();
                    }
                }
            }

            // retrieve metadata fields
            string   strandType     = string.Empty;
            string   strandTopology = string.Empty;
            string   division       = string.Empty;
            DateTime date           = DateTime.Now;

            if (locusInfo != null)
            {
                strandType = Helper.GetStrandType(locusInfo.Strand);

                strandTopology = Helper.GetStrandTopology(locusInfo.StrandTopology);
                if (locusInfo.DivisionCode != SequenceDivisionCode.None)
                {
                    division = locusInfo.DivisionCode.ToString();
                }

                date = locusInfo.Date;
            }

            writer.WriteLine("{0,-12}{1,-16} {2,11} {3} {4,3}{5,-6}  {6,-8} {7,3} {8}",
                             "LOCUS",
                             sequence.ID,
                             sequence.Count,
                             seqType,
                             strandType,
                             molType,
                             strandTopology,
                             division,
                             date.ToString("dd-MMM-yyyy").ToUpper());
        }
Пример #3
0
        /// <summary>
        /// The LOCUS format has defined positions for each individual value in the LOCUS but through experimentation
        /// and some reading this format is not followed.  Instead we have to parse each token and interpret which value
        /// each token belongs too.  Luckily there is a standard set of values for all but the DATE and LOCUS ID, which we can
        /// infer based on the string.
        /// </summary>
        /// <param name="locus"></param>
        /// <returns></returns>
        private static List <Func <string, bool> > GetLocusTokenParsers(GenBankLocusInfo locus)
        {
            //
            // The order of token parsers matter, the items which we know definitions for must be parsed
            // before those which are inferred.
            //
            return
                (new List <Func <string, bool> >
            {
                //
                // 1. LOCUS Token
                //

                token => token == "LOCUS",

                //
                // 2. Strand Topology
                //

                token =>
                {
                    locus.StrandTopology = FirstOrDefault(
                        LocusConstants.MoleculeTopologies,
                        topology => topology.Key.ToLowerInvariant() == token.ToLowerInvariant(),
                        new KeyValuePair <string, SequenceStrandTopology>("", SequenceStrandTopology.None)).Value;

                    return locus.StrandTopology != SequenceStrandTopology.None;
                },

                //
                // 3. Strand & Molecule Definition Token
                //

                token =>
                {
                    //
                    // Strand and molecule definition are one token defining two seperate attributes, such
                    // as ds-DNA so the parsing occurs on one token.
                    //

                    string s = token.ToLowerInvariant();

                    locus.Strand = FirstOrDefault(
                        LocusConstants.SequenceStrandTypes,
                        strand => s.StartsWith(strand.Key.ToLowerInvariant()),
                        new KeyValuePair <string, SequenceStrandType>("", SequenceStrandType.None)).Value;

                    if (locus.Strand != SequenceStrandType.None)
                    {
                        token = token.Remove(0,
                                             LocusConstants.SequenceStrandTypes.First(
                                                 strand => strand.Value == locus.Strand).Key.Length);
                    }

                    locus.MoleculeType = FirstOrDefault(
                        LocusConstants.MoleculeTypes,
                        moleculeType => token.ToLowerInvariant() == moleculeType.Key.ToLowerInvariant(),
                        new KeyValuePair <string, MoleculeType>("", MoleculeType.Invalid)).Value;

                    return locus.MoleculeType != MoleculeType.Invalid ||
                    locus.Strand != SequenceStrandType.None;
                },

                //
                // 4. Division Code
                //

                token =>
                {
                    locus.DivisionCode = FirstOrDefault(
                        LocusConstants.SequenceDivisionCodes,
                        divisionCode => divisionCode.Key.ToLowerInvariant() == token.ToLowerInvariant(),
                        new KeyValuePair <string, SequenceDivisionCode>("", SequenceDivisionCode.None)).Value;

                    return locus.DivisionCode != SequenceDivisionCode.None;
                },

                //
                // 4. Sequence Length
                //

                token =>
                {
                    int length;
                    bool result = int.TryParse(token, out length);
                    if (result)
                    {
                        locus.SequenceLength = length;
                    }
                    return result;
                },

                //
                // 5. Sequence Type
                //

                token =>
                {
                    locus.SequenceType = LocusConstants.SequenceTypes.FirstOrDefault(
                        sequenceType => sequenceType.ToLowerInvariant() == token.ToLowerInvariant());

                    return !string.IsNullOrEmpty(locus.SequenceType);
                },

                //
                // 6. Date
                //

                token =>
                {
                    DateTime dateTime;
                    bool result = DateTime.TryParse(token, out dateTime);
                    if (result)
                    {
                        locus.Date = dateTime;
                    }
                    return result;
                },

                //
                // 7. Sequence Name / ID
                //

                token =>
                {
                    locus.Name = token;
                    return true;
                }
            });
        }