コード例 #1
0
ファイル: Mapper.cs プロジェクト: Plankankul/RNA-Analysis
 public Mapper(SequenceAlignment alignment, string connectionString, string databaseName)
 {
     DatabaseName       = databaseName;
     ConnectionString   = connectionString;
     MappedAlignment    = alignment;
     MappedSuccessfully = false;
 }
コード例 #2
0
    static void Main(string[] args)
    {
        List <string> alphabet = new List <string>(new[] { "H", "A", "P", "L", "E", "-" });

        string[] X = { "", "A", "P", "P", "L", "E" };
        string[] Y = { "", "H", "A", "P", "E" };

        SequenceAlignment seqAlign = new SequenceAlignment(X, Y);

        seqAlign.ScoreMatrix(alphabet, 1, -1, -1);

        Dictionary <string, Dictionary <string, int> > scores = seqAlign.scores;

        foreach (string k in alphabet)
        {
            Console.Write(k + " ");
            foreach (KeyValuePair <string, int> p in scores[k])
            {
                Console.Write(p.Key + " " + p.Value + " ");
            }
            Console.WriteLine();
        }

        Console.WriteLine();

        seqAlign.AlignScore();

        Dictionary <string, string> ans = seqAlign.AlignSequence();

        Console.WriteLine(ans["X"]);
        Console.WriteLine(ans["Y"]);
        Console.ReadLine();
    }
コード例 #3
0
        public void ValidateSequenceAlignmentToString()
        {
            ISequenceAligner aligner       = SequenceAligners.NeedlemanWunsch;
            IAlphabet        alphabet      = Alphabets.Protein;
            string           origSequence1 = "KRIPKSQNLRSIHSIFPFLEDKLSHLN";
            string           origSequence2 = "LNIPSLITLNKSIYVFSKRKKRLSGFLHN";

            // Create input sequences
            var inputSequences = new List <ISequence>();

            inputSequences.Add(new Sequence(alphabet, origSequence1));
            inputSequences.Add(new Sequence(alphabet, origSequence2));

            // Get aligned sequences
            IList <ISequenceAlignment> alignments = aligner.Align(inputSequences);
            ISequenceAlignment         alignment  = new SequenceAlignment();

            for (int ialigned = 0; ialigned < alignments[0].AlignedSequences.Count; ialigned++)
            {
                alignment.AlignedSequences.Add(alignments[0].AlignedSequences[ialigned]);
            }

            foreach (string key in alignments[0].Metadata.Keys)
            {
                alignment.Metadata.Add(key, alignments[0].Metadata[key]);
            }

            string actualSequenceAlignmentString   = alignment.ToString();
            string ExpectedSequenceAlignmentString = this.utilityObj.xmlUtil.GetTextValue(Constants.ToStringNodeName,
                                                                                          Constants
                                                                                          .SequenceAlignmentExpectedNode);

            Assert.AreEqual(ExpectedSequenceAlignmentString.Replace("\\r\\n", ""),
                            actualSequenceAlignmentString.Replace(System.Environment.NewLine, ""));
        }
コード例 #4
0
        public void TestSequenceAlignmentToString()
        {
            ISequenceAligner aligner       = SequenceAligners.NeedlemanWunsch;
            IAlphabet        alphabet      = Alphabets.Protein;
            const string     origSequence1 = "KRIPKSQNLRSIHSIFPFLEDKLSHLN";
            const string     origSequence2 = "LNIPSLITLNKSIYVFSKRKKRLSGFLHN";

            // Create input sequences
            var inputSequences = new List <ISequence>
            {
                new Sequence(alphabet, origSequence1),
                new Sequence(alphabet, origSequence2)
            };

            // Get aligned sequences
            IList <ISequenceAlignment> alignments = aligner.Align(inputSequences);
            ISequenceAlignment         alignment  = new SequenceAlignment();

            foreach (var alignedSequence in alignments[0].AlignedSequences)
            {
                alignment.AlignedSequences.Add(alignedSequence);
            }

            const string expected = "XXIPXXXXLXXXXXXFXXXXXXLSXXLHN\r\n" +
                                    "KRIPKSQNLRSIHSIFPFLEDKLSHL--N\r\n" +
                                    "LNIPSLITLNKSIYVFSKRKKRLSGFLHN\r\n\r\n";

            Assert.AreEqual(expected.Replace("\r\n", Environment.NewLine), alignment.ToString());
        }
コード例 #5
0
        public void TestSequenceAlignmentToString()
        {
            ISequenceAligner aligner       = SequenceAligners.NeedlemanWunsch;
            IAlphabet        alphabet      = Alphabets.Protein;
            string           origSequence1 = "KRIPKSQNLRSIHSIFPFLEDKLSHLN";
            string           origSequence2 = "LNIPSLITLNKSIYVFSKRKKRLSGFLHN";

            // Create input sequences
            List <ISequence> inputSequences = new List <ISequence>();

            inputSequences.Add(new Sequence(alphabet, origSequence1));
            inputSequences.Add(new Sequence(alphabet, origSequence2));

            // Get aligned sequences
            IList <ISequenceAlignment> alignments = aligner.Align(inputSequences);
            ISequenceAlignment         alignment  = new SequenceAlignment();

            for (int ialigned = 0; ialigned < alignments[0].AlignedSequences.Count; ialigned++)
            {
                alignment.AlignedSequences.Add(alignments[0].AlignedSequences[ialigned]);
            }

            foreach (string key in alignments[0].Metadata.Keys)
            {
                alignment.Metadata.Add(key, alignments[0].Metadata[key]);
            }

            string actualSequenceAlignmentString   = alignment.ToString();
            string ExpectedSequenceAlignmentString = "XXIPXXXXLXXXXXXFXXXXXXLSGFXXN\r\nKRIPKSQNLRSIHSIFPFLEDKLS--HLN\r\nLNIPSLITLNKSIYVFSKRKKRLSGFLHN\r\n\r\n";

            Assert.AreEqual(ExpectedSequenceAlignmentString, actualSequenceAlignmentString);
        }
コード例 #6
0
        public void Write(IEnumerable <Read> reads, Stream targetStream)
        {
            if (reads == null)
            {
                reads = new Read[] {}
            }
            ;
            var _samFormatter        = samOutput ? (IFormatter <ISequenceAlignment>) new SAMFormatter() : new BAMFormatter();
            var _samAlignedSequences = reads.Select(SamReadsConverter.Convert).ToList();
            var _sa = new SequenceAlignment();

            foreach (var _samAlignedSequence in _samAlignedSequences)
            {
                _sa.AlignedSequences.Add(_samAlignedSequence);
            }
            var _refSequenceName = "1";

            if (_samAlignedSequences.Count != 0)
            {
                _refSequenceName = _samAlignedSequences[0].RName;
            }
            _sa.Metadata.Add("SAMAlignmentHeader",
                             new SAMAlignmentHeader
            {
                ReferenceSequences =
                {
                    new ReferenceSequenceInfo
                    {
                        Name = _refSequenceName
                    }
                }
            });
            _samFormatter.Format(targetStream, _sa);
        }
    }
コード例 #7
0
 public AlignmentViewModel(SequenceAlignment alignment)
 {
     _alignment       = alignment;
     _sequences       = new MTObservableCollection <SequenceViewModel>();
     MapToRCADCommand = new DelegatingCommand <rCADConnection>(MapAlignmentToRCAD, (a) => (!MappingToRCAD && !IsMappedToRCAD && a != null));
     //MapToRCADCommand = new DelegatingCommand<string>(MapAlignmentToRCAD, (a) => (!MappingToRCAD && !IsMappedToRCAD && a != null));
     LoadToRCADCommand = new DelegatingCommand <string>(LoadAlignmentToRCAD, (a) => (!MappingToRCAD && IsMappedToRCAD && _alignmentToRCADMapping != null && !LoadingToRCADFailed && !LoadingToRCAD && !IsLoadedToRCAD));
     //LoadToRCADCommand = new DelegatingCommand(LoadAlignmentToRCAD, () => (!MappingToRCAD && IsMappedToRCAD && _alignmentToRCADMapping != null && !LoadingToRCADFailed && !LoadingToRCAD && !IsLoadedToRCAD));
     Initialize();
 }
コード例 #8
0
        public void ValidateSequenceAlignmentGetObjectData()
        {
            SerializationInfo info =
                new SerializationInfo(typeof(Sequence),
                                      new FormatterConverter());
            StreamingContext context =
                new StreamingContext(StreamingContextStates.All);

            SequenceAlignment seqAlignmentObj = new SequenceAlignment();

            seqAlignmentObj.GetObjectData(info, context);
            Assert.AreEqual(4, info.MemberCount);
        }
コード例 #9
0
        private void LoadAlignmentWorkerCompleted(object sender, RunWorkerCompletedEventArgs e)
        {
            if (e.Result == null)
            {
                LoadingAlignmentStatusMessage = LOADING_ALIGNMENT_FAILED_MESSAGE;
                _loadingAlignment             = false;
                CommandManager.InvalidateRequerySuggested();
                return;
            }

            SequenceAlignment aln = e.Result as SequenceAlignment;

            _loadingAlignment = false;
            CommandManager.InvalidateRequerySuggested();
            LoadingAlignmentStatusMessage = FINISHED_LOADING_ALIGNMENT_MESSAGE;
            Alignment = new AlignmentViewModel(aln);
        }
コード例 #10
0
        /// <summary>
        ///     Validate sequence alignment instance using different aligners
        /// </summary>
        /// <param name="nodeName">xml node name</param>
        /// <param name="aligner">sw/nw/pw aligners</param>
        private void ValidateSequenceAlignmentCtor(string nodeName, ISequenceAligner aligner)
        {
            IAlphabet alphabet = Utility.GetAlphabet(this.utilityObj.xmlUtil.GetTextValue(nodeName,
                                                                                          Constants.AlphabetNameNode));
            string origSequence1 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SequenceNode1);
            string origSequence2 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SequenceNode2);

            // Create input sequences
            var inputSequences = new List <ISequence>();

            inputSequences.Add(new Sequence(alphabet, origSequence1));
            inputSequences.Add(new Sequence(alphabet, origSequence2));

            // Get aligned sequences
            IList <ISequenceAlignment> alignments = aligner.Align(inputSequences);
            ISequenceAlignment         alignment  = new SequenceAlignment();

            for (int ialigned = 0; ialigned < alignments[0].AlignedSequences.Count; ialigned++)
            {
                alignment.AlignedSequences.Add(alignments[0].AlignedSequences[ialigned]);
            }

            foreach (string key in alignments[0].Metadata.Keys)
            {
                alignment.Metadata.Add(key, alignments[0].Metadata[key]);
            }

            // Validate the properties
            for (int ialigned = 0; ialigned < alignments[0].AlignedSequences.Count; ialigned++)
            {
                Assert.AreEqual(alignments[0].AlignedSequences[ialigned].Sequences[0].ToString(),
                                alignment.AlignedSequences[ialigned].Sequences[0].ToString());
            }

            foreach (string key in alignments[0].Metadata.Keys)
            {
                Assert.AreEqual(alignments[0].Metadata[key], alignment.Metadata[key]);
            }

            ApplicationLog.WriteLine(@"Alignment BVT : Validation of sequence alignment  ctor completed successfully");
        }
コード例 #11
0
        private void LoadAlignmentWorker(object sender, DoWorkEventArgs e)
        {
            if (e.Argument == null)
            {
                return;
            }
            SequenceAlignmentLoaderArgs args = e.Argument as SequenceAlignmentLoaderArgs;

            if (args != null)
            {
                if (args.AlignmentFile == null)
                {
                    return;
                }
                if (args.AlignmentType == AlignmentType.CRW)
                {
                    ISequenceAlignmentLoader loader = new CRWSequenceAlignmentLoader();
                    SequenceAlignment        aln    = loader.Load(args.AlignmentFile);
                    e.Result = aln;
                }
            }
        }
コード例 #12
0
        /// <summary>
        /// Parses a single Nexus text from a reader into a sequence.
        /// </summary>
        /// <param name="bioReader">A reader for a biological sequence text.</param>
        /// <param name="isReadOnly">
        /// Flag to indicate whether the resulting sequence alignment should be in readonly mode or not.
        /// If this flag is set to true then the resulting sequence's isReadOnly property
        /// will be set to true, otherwise it will be set to false.</param>
        /// <returns>A new Sequence instance containing parsed data.</returns>
        protected ISequenceAlignment ParseOneWithSpecificFormat(BioTextReader bioReader, bool isReadOnly)
        {
            if (bioReader == null)
            {
                throw new ArgumentNullException("bioReader");
            }

            ParseHeader(bioReader);

            string             message           = string.Empty;
            ISequenceAlignment sequenceAlignment = new SequenceAlignment();

            sequenceAlignment.AlignedSequences.Add(new AlignedSequence());
            IList <string> ids       = null;
            bool           isInBlock = true;

            if (bioReader.Line.StartsWith("begin", StringComparison.OrdinalIgnoreCase))
            {
                while (bioReader.HasLines && isInBlock)
                {
                    if (string.IsNullOrEmpty(bioReader.Line.Trim()))
                    {
                        bioReader.GoToNextLine();
                        continue;
                    }

                    string blockName = GetTokens(bioReader.Line)[1];

                    switch (blockName.ToUpper(CultureInfo.InvariantCulture))
                    {
                    case "TAXA":
                    case "TAXA;":
                        // This block contains the count of sequence & title of each sequence
                        ids = (IList <string>)ParseTaxaBlock(bioReader);

                        break;

                    case "CHARACTERS":
                    case "CHARACTERS;":
                        // Block contains sequences
                        Dictionary <string, string> dataSet = ParseCharacterBlock(bioReader, ids);

                        IAlphabet alignmentAlphabet = null;
                        string    data = string.Empty;

                        foreach (string ID in ids)
                        {
                            IAlphabet alphabet = Alphabet;
                            Sequence  sequence = null;
                            data = dataSet[ID];

                            if (null == alphabet)
                            {
                                alphabet = _basicParser.IdentifyAlphabet(alphabet, data);

                                if (null == alphabet)
                                {
                                    message = string.Format(
                                        CultureInfo.InvariantCulture,
                                        Resource.InvalidSymbolInString,
                                        data);
                                    throw new InvalidDataException(message);
                                }
                                else
                                {
                                    if (null == alignmentAlphabet)
                                    {
                                        alignmentAlphabet = alphabet;
                                    }
                                    else
                                    {
                                        if (alignmentAlphabet != alphabet)
                                        {
                                            message = string.Format(
                                                CultureInfo.InvariantCulture,
                                                Properties.Resource.SequenceAlphabetMismatch);
                                            throw new InvalidDataException(message);
                                        }
                                    }
                                }
                            }

                            if (Encoding == null)
                            {
                                sequence = new Sequence(alphabet, data);
                            }
                            else
                            {
                                sequence = new Sequence(alphabet, Encoding, data);
                            }

                            sequence.IsReadOnly = isReadOnly;
                            sequence.ID         = ID;
                            sequenceAlignment.AlignedSequences[0].Sequences.Add(sequence);
                        }

                        break;

                    case "END":
                    case "END;":
                        // Have reached the end of block
                        isInBlock = false;

                        break;

                    default:
                        // skip this block
                        while (bioReader.HasLines)
                        {
                            bioReader.GoToNextLine();
                            if (0 == string.Compare(bioReader.Line, "end;", StringComparison.OrdinalIgnoreCase))
                            {
                                break;
                            }
                        }

                        break;
                    }

                    bioReader.GoToNextLine();
                }
            }

            return(sequenceAlignment);
        }
コード例 #13
0
        /// <summary>
        /// Parses a single biological sequence alignment text from a stream.
        /// </summary>
        /// <param name="reader">Reader</param>
        /// <returns>Sequence</returns>
        private ISequenceAlignment ParseOne(StreamReader reader)
        {
            // no empty files allowed
            if (line == null)
            {
                ReadNextLine(reader);
            }

            if (line == null)
            {
                throw new InvalidDataException(Properties.Resource.IONoTextToParse);
            }

            if (!line.StartsWith("CLUSTAL", StringComparison.OrdinalIgnoreCase))
            {
                throw new InvalidDataException(
                          string.Format(CultureInfo.CurrentCulture, Properties.Resource.INVALID_INPUT_FILE, this.Name));
            }

            ReadNextLine(reader);  // Skip blank lines until we get to the first block.

            // Now that we're at the first block, one or more blank lines are the block separators, which we'll need.
            skipBlankLines = false;

            var       mapIdToSequence   = new Dictionary <string, Tuple <ISequence, List <byte> > >();
            IAlphabet alignmentAlphabet = null;
            bool      isFirstBlock      = true;
            bool      inBlock           = false;
            var       endOfBlockSymbols = new HashSet <char> {
                '*', ' ', '.', '+', ':'
            };

            while (reader.Peek() != -1)
            {
                // Blank line or consensus line signals end of block.
                if (String.IsNullOrEmpty(line) || line.ToCharArray().All(endOfBlockSymbols.Contains))
                {
                    if (inBlock)
                    {
                        // Blank line signifies end of block
                        inBlock      = false;
                        isFirstBlock = false;
                    }
                }
                else // It's not a blank or consensus line.
                {
                    // It's a data line in a block.
                    // Lines begin with sequence id, then the sequence segment, and optionally a number, which we will ignore
                    string[] tokens   = line.Split((char[])null, StringSplitOptions.RemoveEmptyEntries); // (char[])null uses whitespace delimiters
                    string   id       = tokens[0];
                    string   data     = tokens[1].ToUpperInvariant();
                    byte[]   byteData = Encoding.UTF8.GetBytes(data);
                    Tuple <ISequence, List <byte> > sequenceTuple;
                    IAlphabet alphabet = Alphabet;

                    inBlock = true;
                    if (isFirstBlock)
                    {
                        if (null == alphabet)
                        {
                            alphabet = Alphabets.AutoDetectAlphabet(byteData, 0, byteData.Length, alphabet);

                            if (null == alphabet)
                            {
                                throw new InvalidDataException(string.Format(
                                                                   CultureInfo.InvariantCulture,
                                                                   Properties.Resource.InvalidSymbolInString,
                                                                   data));
                            }

                            if (null == alignmentAlphabet)
                            {
                                alignmentAlphabet = alphabet;
                            }
                            else
                            {
                                if (alignmentAlphabet != alphabet)
                                {
                                    throw new InvalidDataException(string.Format(
                                                                       CultureInfo.CurrentCulture,
                                                                       Properties.Resource.SequenceAlphabetMismatch));
                                }
                            }
                        }

                        sequenceTuple = new Tuple <ISequence, List <byte> >(
                            new Sequence(alphabet, "")
                        {
                            ID = id
                        },
                            new List <byte>());
                        sequenceTuple.Item2.AddRange(byteData);

                        mapIdToSequence.Add(id, sequenceTuple);
                    }
                    else
                    {
                        if (!mapIdToSequence.ContainsKey(id))
                        {
                            throw new InvalidDataException(string.Format(CultureInfo.CurrentCulture, Properties.Resource.ClustalUnknownSequence, id));
                        }

                        sequenceTuple = mapIdToSequence[id];
                        sequenceTuple.Item2.AddRange(byteData);
                    }
                }

                ReadNextLine(reader);
            }

            var sequenceAlignment = new SequenceAlignment();
            var alignedSequence   = new AlignedSequence();

            sequenceAlignment.AlignedSequences.Add(alignedSequence);
            foreach (var alignmentSequenceTuple in mapIdToSequence.Values)
            {
                alignedSequence.Sequences.Add(
                    new Sequence(alignmentSequenceTuple.Item1.Alphabet, alignmentSequenceTuple.Item2.ToArray())
                {
                    ID = alignmentSequenceTuple.Item1.ID
                });
            }

            return(sequenceAlignment);
        }
コード例 #14
0
        /// <summary>
        /// Parses a single biological sequence alignment text from a reader.
        /// </summary>
        /// <param name="reader">A reader for a biological sequence alignment text.</param>
        /// <returns>The parsed ISequenceAlignment object.</returns>
        public ISequenceAlignment ParseOne(TextReader reader)
        {
            string message = string.Empty;

            if (reader == null)
            {
                throw new ArgumentNullException("reader");
            }

            if (line == null)
            {
                ReadNextLine(reader);
            }

            // no empty files allowed
            if (line == null)
            {
                throw new InvalidDataException(Properties.Resource.IONoTextToParse);
            }

            // Parse first line
            IList <string> tokens = line.Split((char[])null, StringSplitOptions.RemoveEmptyEntries);

            if (2 != tokens.Count)
            {
                message = string.Format(CultureInfo.CurrentCulture, Properties.Resource.INVALID_INPUT_FILE, this.Name);
                throw new InvalidDataException(message);
            }

            bool isFirstBlock   = true;
            int  sequenceCount  = 0;
            int  sequenceLength = 0;
            IList <Tuple <Sequence, List <byte> > > data = new List <Tuple <Sequence, List <byte> > >();
            string id             = string.Empty;
            string sequenceString = string.Empty;
            Tuple <Sequence, List <byte> > sequence = null;
            IAlphabet alignmentAlphabet             = null;

            sequenceCount  = Int32.Parse(tokens[0], CultureInfo.InvariantCulture);
            sequenceLength = Int32.Parse(tokens[1], CultureInfo.InvariantCulture);

            ReadNextLine(reader);  // Skip blank lines until we get to the first block.

            // Now that we're at the first block, one or more blank lines are the block separators, which we'll need.
            skipBlankLines = false;

            while (reader.Peek() != -1)
            {
                if (string.IsNullOrWhiteSpace(line))
                {
                    ReadNextLine(reader);
                    continue;
                }

                for (int index = 0; index < sequenceCount; index++)
                {
                    if (isFirstBlock)
                    {
                        // First 10 characters are sequence ID, remaining is the first block of sequence
                        // Note that both may contain whitespace, and there may be no whitespace between them.
                        if (line.Length <= 10)
                        {
                            message = string.Format(CultureInfo.CurrentCulture, Properties.Resource.INVALID_INPUT_FILE, this.Name);
                            throw new Exception(message);
                        }
                        id             = line.Substring(0, 10).Trim();
                        sequenceString = line.Substring(10).Replace(" ", "");
                        byte[] sequenceBytes = System.Text.ASCIIEncoding.ASCII.GetBytes(sequenceString);

                        IAlphabet alphabet = Alphabet;
                        if (null == alphabet)
                        {
                            alphabet = Alphabets.AutoDetectAlphabet(sequenceBytes, 0, sequenceBytes.Length, alphabet);

                            if (null == alphabet)
                            {
                                message = string.Format(
                                    CultureInfo.InvariantCulture,
                                    Properties.Resource.InvalidSymbolInString,
                                    sequenceString);
                                throw new InvalidDataException(message);
                            }
                            else
                            {
                                if (null == alignmentAlphabet)
                                {
                                    alignmentAlphabet = alphabet;
                                }
                                else
                                {
                                    if (alignmentAlphabet != alphabet)
                                    {
                                        throw new InvalidDataException(Properties.Resource.SequenceAlphabetMismatch);
                                    }
                                }
                            }
                        }

                        Tuple <Sequence, List <byte> > sequenceStore = new Tuple <Sequence, List <byte> >(
                            new Sequence(alphabet, string.Empty)
                        {
                            ID = id
                        },
                            new List <byte>());

                        sequenceStore.Item2.AddRange(sequenceBytes);
                        data.Add(sequenceStore);
                    }
                    else
                    {
                        sequence = data[index];
                        byte[] sequenceBytes = System.Text.ASCIIEncoding.ASCII.GetBytes(line.Replace(" ", ""));
                        sequence.Item2.AddRange(sequenceBytes);
                    }

                    ReadNextLine(reader);
                }

                // Reset the first block flag
                isFirstBlock = false;
            }

            // Validate for the count of sequence
            if (sequenceCount != data.Count)
            {
                throw new InvalidDataException(Properties.Resource.SequenceCountMismatch);
            }

            SequenceAlignment sequenceAlignment = new SequenceAlignment();

            sequenceAlignment.AlignedSequences.Add(new AlignedSequence());

            foreach (var dataSequence in data)
            {
                // Validate for the count of sequence
                if (sequenceLength != dataSequence.Item2.Count)
                {
                    throw new InvalidDataException(Properties.Resource.SequenceLengthMismatch);
                }

                sequenceAlignment.AlignedSequences[0].Sequences.Add(
                    new Sequence(dataSequence.Item1.Alphabet, dataSequence.Item2.ToArray())
                {
                    ID = dataSequence.Item1.ID
                });
            }

            return(sequenceAlignment);
        }
コード例 #15
0
        /// <summary>
        /// Parses a single Phylip text from a reader into a sequence.
        /// 1. First link has Count of Taxa and length of each sequence
        /// 2. Sequences
        ///     a. First ten character are ID
        ///     b. Sequence itself
        /// </summary>
        /// <param name="bioReader">A reader for a biological sequence text.</param>
        /// <param name="isReadOnly">
        /// Flag to indicate whether the resulting sequence alignment should be in readonly mode or not.
        /// If this flag is set to true then the resulting sequence's isReadOnly property
        /// will be set to true, otherwise it will be set to false.</param>
        /// <returns>A new Sequence Alignment instance containing parsed data.</returns>
        protected ISequenceAlignment ParseOneWithSpecificFormat(BioTextReader bioReader, bool isReadOnly)
        {
            if (bioReader == null)
            {
                throw new ArgumentNullException("bioReader");
            }

            string message = string.Empty;

            // Parse first line
            IList <string> tokens = GetTokens(bioReader.Line);

            if (2 != tokens.Count)
            {
                message = string.Format(CultureInfo.CurrentCulture, Resource.INVAILD_INPUT_FILE, this.Name);
                throw new InvalidDataException(message);
            }

            bool             isFirstBlock      = true;
            int              sequenceCount     = 0;
            int              sequenceLength    = 0;
            IList <Sequence> data              = new List <Sequence>();
            string           id                = string.Empty;
            string           sequenceString    = string.Empty;
            Sequence         sequence          = null;
            IAlphabet        alignmentAlphabet = null;

            sequenceCount  = Int32.Parse(tokens[0], CultureInfo.InvariantCulture);
            sequenceLength = Int32.Parse(tokens[1], CultureInfo.InvariantCulture);

            bioReader.GoToNextLine();  // Skip blank lines until we get to the first block.

            // Now that we're at the first block, one or more blank lines are the block separators, which we'll need.
            bioReader.SkipBlankLines = false;

            while (bioReader.HasLines)
            {
                if (string.IsNullOrEmpty(bioReader.Line.Trim()))
                {
                    bioReader.GoToNextLine();
                    continue;
                }

                for (int index = 0; index < sequenceCount; index++)
                {
                    if (isFirstBlock)
                    {
                        tokens = GetTokens(bioReader.Line);

                        if (1 == tokens.Count)
                        {
                            id             = tokens[0].Substring(0, 10);
                            sequenceString = tokens[0].Substring(10);
                        }
                        else
                        {
                            id             = tokens[0];
                            sequenceString = tokens[1];
                        }

                        IAlphabet alphabet = Alphabet;
                        if (null == alphabet)
                        {
                            alphabet = _basicParser.IdentifyAlphabet(alphabet, sequenceString);

                            if (null == alphabet)
                            {
                                message = string.Format(
                                    CultureInfo.InvariantCulture,
                                    Resource.InvalidSymbolInString,
                                    sequenceString);
                                throw new InvalidDataException(message);
                            }
                            else
                            {
                                if (null == alignmentAlphabet)
                                {
                                    alignmentAlphabet = alphabet;
                                }
                                else
                                {
                                    if (alignmentAlphabet != alphabet)
                                    {
                                        message = Properties.Resource.SequenceAlphabetMismatch;
                                        throw new InvalidDataException(message);
                                    }
                                }
                            }
                        }

                        if (Encoding == null)
                        {
                            sequence = new Sequence(alphabet, sequenceString);
                        }
                        else
                        {
                            sequence = new Sequence(alphabet, Encoding, sequenceString);
                        }

                        sequence.ID         = id;
                        sequence.IsReadOnly = false;
                        data.Add(sequence);
                    }
                    else
                    {
                        sequence = data[index];
                        sequence.InsertRange(sequence.Count, bioReader.Line.Trim());
                    }

                    bioReader.GoToNextLine();
                }

                // Reset the first block flag
                isFirstBlock = false;
            }

            // Validate for the count of sequence
            if (sequenceCount != data.Count)
            {
                throw new InvalidDataException(Properties.Resource.SequenceCountMismatch);
            }

            SequenceAlignment sequenceAlignment = new SequenceAlignment();

            sequenceAlignment.AlignedSequences.Add(new AlignedSequence());

            foreach (Sequence dataSequence in data)
            {
                dataSequence.IsReadOnly = isReadOnly;

                // Validate for the count of sequence
                if (sequenceLength != dataSequence.Count)
                {
                    throw new InvalidDataException(Properties.Resource.SequenceLengthMismatch);
                }

                sequenceAlignment.AlignedSequences[0].Sequences.Add(dataSequence);
            }

            return(sequenceAlignment);
        }
コード例 #16
0
        /// <summary>
        /// Parses a single ClustalW text from a reader into a sequence.
        /// </summary>
        /// <param name="bioReader">A reader for a biological sequence text.</param>
        /// <param name="isReadOnly">
        /// Flag to indicate whether the resulting sequence alignment should be in readonly mode or not.
        /// If this flag is set to true then the resulting sequence's isReadOnly property
        /// will be set to true, otherwise it will be set to false.</param>
        /// <returns>A new Sequence Alignment instance containing parsed data.</returns>
        protected ISequenceAlignment ParseOneWithSpecificFormat(BioTextReader bioReader, bool isReadOnly)
        {
            if (bioReader == null)
            {
                throw new ArgumentNullException("bioReader");
            }

            string message = string.Empty;

            if (!bioReader.Line.StartsWith("CLUSTAL", StringComparison.OrdinalIgnoreCase))
            {
                message = string.Format(CultureInfo.CurrentCulture, Resource.INVAILD_INPUT_FILE, this.Name);
                throw new InvalidDataException(message);
            }

            bioReader.GoToNextLine();  // Skip blank lines until we get to the first block.

            // Now that we're at the first block, one or more blank lines are the block separators, which we'll need.
            bioReader.SkipBlankLines = false;

            Dictionary <string, ISequence> mapIdToSequence = new Dictionary <string, ISequence>();
            IAlphabet alignmentAlphabet = null;
            bool      isFirstBlock      = true;
            bool      inBlock           = false;

            while (bioReader.HasLines)
            {
                // Blank line or consensus line signals end of block.
                if (String.IsNullOrEmpty(bioReader.Line) ||
                    Helper.ContainsOnly(bioReader.Line, '*', ' ', '.', '+', ':'))
                {
                    if (inBlock)
                    {
                        // Blank line signifies end of block
                        inBlock      = false;
                        isFirstBlock = false;
                    }
                }
                else // It's not a blank or consensus line.
                {
                    // It's a data line in a block.
                    // Lines begin with sequence id, then the sequence segment, and optionally a number, which we will ignore
                    string[]  tokens   = bioReader.Line.Split((char[])null, StringSplitOptions.RemoveEmptyEntries); // (char[])null uses whitespace delimiters
                    string    id       = tokens[0];
                    string    data     = tokens[1].ToUpper(CultureInfo.InvariantCulture);
                    Sequence  sequence = null;
                    IAlphabet alphabet = Alphabet;

                    inBlock = true;
                    if (isFirstBlock)
                    {
                        if (null == alphabet)
                        {
                            alphabet = _basicParser.IdentifyAlphabet(alphabet, data);

                            if (null == alphabet)
                            {
                                message = string.Format(
                                    CultureInfo.InvariantCulture,
                                    Resource.InvalidSymbolInString,
                                    data);
                                throw new InvalidDataException(message);
                            }
                            else
                            {
                                if (null == alignmentAlphabet)
                                {
                                    alignmentAlphabet = alphabet;
                                }
                                else
                                {
                                    if (alignmentAlphabet != alphabet)
                                    {
                                        message = string.Format(
                                            CultureInfo.CurrentCulture,
                                            Properties.Resource.SequenceAlphabetMismatch);
                                        throw new InvalidDataException(message);
                                    }
                                }
                            }
                        }

                        if (Encoding == null)
                        {
                            sequence = new Sequence(alphabet, data);
                        }
                        else
                        {
                            sequence = new Sequence(alphabet, Encoding, data);
                        }

                        sequence.ID         = id;
                        sequence.IsReadOnly = false;

                        mapIdToSequence.Add(id, sequence);
                    }
                    else
                    {
                        if (!mapIdToSequence.ContainsKey(id))
                        {
                            message = string.Format(CultureInfo.CurrentCulture, Properties.Resource.ClustalUnknownSequence, id);
                            throw new InvalidDataException(message);
                        }

                        sequence = (Sequence)mapIdToSequence[id];
                        sequence.InsertRange(sequence.Count, data);
                    }
                }

                bioReader.GoToNextLine();
            }

            SequenceAlignment sequenceAlignment = new SequenceAlignment();

            sequenceAlignment.AlignedSequences.Add(new AlignedSequence());
            foreach (Sequence alignmentSequence in mapIdToSequence.Values)
            {
                alignmentSequence.IsReadOnly = isReadOnly;
                sequenceAlignment.AlignedSequences[0].Sequences.Add(alignmentSequence);
            }

            return(sequenceAlignment);
        }
コード例 #17
0
        /// <summary>
        /// Parses a single biological sequence alignment text from a reader.
        /// </summary>
        /// <param name="reader">A reader for a biological sequence alignment text.</param>
        /// <returns>The parsed ISequenceAlignment object.</returns>
        ISequenceAlignment ParseOne(TextReader reader)
        {
            ReadNextLine(reader);
            if (line == null)
            {
                throw new Exception(Properties.Resource.INVALID_INPUT_FILE);
            }

            this.ParseHeader(reader);

            var            alignedSequence = new AlignedSequence();
            IList <string> ids             = null;
            bool           isInBlock       = true;

            if (this.line.StartsWith("begin", StringComparison.OrdinalIgnoreCase))
            {
                while (this.line != null && isInBlock)
                {
                    if (string.IsNullOrEmpty(this.line.Trim()))
                    {
                        this.ReadNextLine(reader);
                        continue;
                    }

                    string blockName = GetTokens(this.line)[1];

                    switch (blockName.ToUpperInvariant())
                    {
                    case "TAXA":
                    case "TAXA;":
                        // This block contains the count of sequence & title of each sequence
                        ids = this.ParseTaxaBlock(reader);
                        break;

                    case "CHARACTERS":
                    case "CHARACTERS;":
                        // Block contains sequences
                        Dictionary <string, string> dataSet = this.ParseCharacterBlock(reader, ids);
                        IAlphabet alignmentAlphabet         = null;

                        foreach (string id in ids)
                        {
                            IAlphabet alphabet = this.Alphabet;
                            string    data     = dataSet[id];

                            if (null == alphabet)
                            {
                                byte[] dataArray = data.ToByteArray();
                                alphabet = Alphabets.AutoDetectAlphabet(dataArray, 0, dataArray.Length, null);

                                if (null == alphabet)
                                {
                                    throw new InvalidDataException(string.Format(
                                                                       CultureInfo.InvariantCulture,
                                                                       Properties.Resource.InvalidSymbolInString,
                                                                       data));
                                }

                                if (null == alignmentAlphabet)
                                {
                                    alignmentAlphabet = alphabet;
                                }
                                else
                                {
                                    if (alignmentAlphabet != alphabet)
                                    {
                                        throw new InvalidDataException(string.Format(
                                                                           CultureInfo.InvariantCulture,
                                                                           Properties.Resource.SequenceAlphabetMismatch));
                                    }
                                }
                            }

                            alignedSequence.Sequences.Add(new Sequence(alphabet, data)
                            {
                                ID = id
                            });
                        }

                        break;

                    case "END":
                    case "END;":
                        // Have reached the end of block
                        isInBlock = false;
                        break;

                    default:
                        // skip this block
                        while (this.line != null)
                        {
                            this.ReadNextLine(reader);
                            if (0 == string.Compare(this.line, "end;", StringComparison.OrdinalIgnoreCase))
                            {
                                break;
                            }
                        }
                        break;
                    }

                    this.ReadNextLine(reader);
                }
            }

            ISequenceAlignment sequenceAlignment = new SequenceAlignment();

            sequenceAlignment.AlignedSequences.Add(alignedSequence);
            return(sequenceAlignment);
        }
コード例 #18
0
        /// <summary>
        /// Parses a single Phylip text from a reader into a sequence.
        /// 1. First link has Count of Taxa and length of each sequence
        /// 2. Sequences
        ///     a. First ten character are ID
        ///     b. Sequence itself
        /// </summary>
        /// <param name="mbfReader">A reader for a biological sequence text.</param>
        /// <param name="isReadOnly">
        /// Flag to indicate whether the resulting sequence alignment should be in readonly mode or not.
        /// If this flag is set to true then the resulting sequence's isReadOnly property
        /// will be set to true, otherwise it will be set to false.</param>
        /// <returns>A new Sequence Alignment instance containing parsed data.</returns>
        protected ISequenceAlignment ParseOneWithSpecificFormat(MBFTextReader mbfReader, bool isReadOnly)
        {
            if (mbfReader == null)
            {
                throw new ArgumentNullException("mbfReader");
            }

            string message = string.Empty;

            // Parse first line
            IList <string> tokens = mbfReader.Line.Split((char[])null, StringSplitOptions.RemoveEmptyEntries);

            if (2 != tokens.Count)
            {
                message = string.Format(CultureInfo.CurrentCulture, Resource.INVALID_INPUT_FILE, this.Name);
                throw new InvalidDataException(message);
            }

            bool             isFirstBlock      = true;
            int              sequenceCount     = 0;
            int              sequenceLength    = 0;
            IList <Sequence> data              = new List <Sequence>();
            string           id                = string.Empty;
            string           sequenceString    = string.Empty;
            Sequence         sequence          = null;
            IAlphabet        alignmentAlphabet = null;

            sequenceCount  = Int32.Parse(tokens[0], CultureInfo.InvariantCulture);
            sequenceLength = Int32.Parse(tokens[1], CultureInfo.InvariantCulture);

            mbfReader.GoToNextLine();  // Skip blank lines until we get to the first block.

            // Now that we're at the first block, one or more blank lines are the block separators, which we'll need.
            mbfReader.SkipBlankLines = false;

            while (mbfReader.HasLines)
            {
                if (string.IsNullOrEmpty(mbfReader.Line.Trim()))
                {
                    mbfReader.GoToNextLine();
                    continue;
                }

                for (int index = 0; index < sequenceCount; index++)
                {
                    if (isFirstBlock)
                    {
                        // First 10 characters are sequence ID, remaining is the first block of sequence
                        // Note that both may contain whitespace, and there may be no whitespace between them.
                        if (mbfReader.Line.Length <= 10)
                        {
                            message = string.Format(CultureInfo.CurrentCulture, Resource.INVALID_INPUT_FILE, this.Name);
                            throw new Exception(message);
                        }
                        id             = mbfReader.Line.Substring(0, 10).Trim();
                        sequenceString = Util.Helper.StringRemoveWhitespace(mbfReader.Line.Substring(10));

                        IAlphabet alphabet = Alphabet;
                        if (null == alphabet)
                        {
                            alphabet = _basicParser.IdentifyAlphabet(alphabet, sequenceString);

                            if (null == alphabet)
                            {
                                message = string.Format(
                                    CultureInfo.InvariantCulture,
                                    Resource.InvalidSymbolInString,
                                    sequenceString);
                                throw new InvalidDataException(message);
                            }
                            else
                            {
                                if (null == alignmentAlphabet)
                                {
                                    alignmentAlphabet = alphabet;
                                }
                                else
                                {
                                    if (alignmentAlphabet != alphabet)
                                    {
                                        message = Properties.Resource.SequenceAlphabetMismatch;
                                        throw new InvalidDataException(message);
                                    }
                                }
                            }
                        }

                        if (Encoding == null)
                        {
                            sequence = new Sequence(alphabet, sequenceString);
                        }
                        else
                        {
                            sequence = new Sequence(alphabet, Encoding, sequenceString);
                        }

                        sequence.ID         = id;
                        sequence.IsReadOnly = false;
                        data.Add(sequence);
                    }
                    else
                    {
                        sequence = data[index];
                        sequence.InsertRange(sequence.Count, Util.Helper.StringRemoveWhitespace(mbfReader.Line));
                    }

                    mbfReader.GoToNextLine();
                }

                // Reset the first block flag
                isFirstBlock = false;
            }

            // Validate for the count of sequence
            if (sequenceCount != data.Count)
            {
                throw new InvalidDataException(Properties.Resource.SequenceCountMismatch);
            }

            SequenceAlignment sequenceAlignment = new SequenceAlignment();

            sequenceAlignment.AlignedSequences.Add(new AlignedSequence());

            foreach (Sequence dataSequence in data)
            {
                dataSequence.IsReadOnly = isReadOnly;

                // Validate for the count of sequence
                if (sequenceLength != dataSequence.Count)
                {
                    throw new InvalidDataException(Properties.Resource.SequenceLengthMismatch);
                }

                sequenceAlignment.AlignedSequences[0].Sequences.Add(dataSequence);
            }

            return(sequenceAlignment);
        }
コード例 #19
0
        /// <summary>
        /// Parses a single biological sequence alignment text from a reader.
        /// </summary>
        /// <param name="reader">A reader for a biological sequence alignment text.</param>
        /// <returns>The parsed ISequenceAlignment object.</returns>
        public ISequenceAlignment ParseOne(TextReader reader)
        {
            if (reader == null)
            {
                throw new ArgumentNullException("reader");
            }

            ReadNextLine(reader);
            if (line == null)
            {
                string message = Properties.Resource.INVALID_INPUT_FILE;
                Trace.Report(message);
                throw new FileFormatException(message);
            }
            else
            {
                ParseHeader(reader);

                string             message           = string.Empty;
                ISequenceAlignment sequenceAlignment = new SequenceAlignment();
                sequenceAlignment.AlignedSequences.Add(new AlignedSequence());
                IList <string> ids       = null;
                bool           isInBlock = true;

                if (line.StartsWith("begin", StringComparison.OrdinalIgnoreCase))
                {
                    while (line != null && isInBlock)
                    {
                        if (string.IsNullOrEmpty(line.Trim()))
                        {
                            ReadNextLine(reader);
                            continue;
                        }

                        string blockName = GetTokens(line)[1];

                        switch (blockName.ToUpper(CultureInfo.InvariantCulture))
                        {
                        case "TAXA":
                        case "TAXA;":
                            // This block contains the count of sequence & title of each sequence
                            ids = (IList <string>)ParseTaxaBlock(reader);

                            break;

                        case "CHARACTERS":
                        case "CHARACTERS;":
                            // Block contains sequences
                            Dictionary <string, string> dataSet = ParseCharacterBlock(reader, ids);

                            IAlphabet alignmentAlphabet = null;
                            string    data = string.Empty;

                            foreach (string ID in ids)
                            {
                                IAlphabet alphabet = Alphabet;
                                Sequence  sequence = null;
                                data = dataSet[ID];

                                if (null == alphabet)
                                {
                                    byte[] dataArray = data.Select(a => (byte)a).ToArray();
                                    alphabet = Alphabets.AutoDetectAlphabet(dataArray, 0, dataArray.Length, null);

                                    if (null == alphabet)
                                    {
                                        message = string.Format(
                                            CultureInfo.InvariantCulture,
                                            Properties.Resource.InvalidSymbolInString,
                                            data);
                                        throw new InvalidDataException(message);
                                    }
                                    else
                                    {
                                        if (null == alignmentAlphabet)
                                        {
                                            alignmentAlphabet = alphabet;
                                        }
                                        else
                                        {
                                            if (alignmentAlphabet != alphabet)
                                            {
                                                message = string.Format(
                                                    CultureInfo.InvariantCulture,
                                                    Properties.Resource.SequenceAlphabetMismatch);
                                                throw new InvalidDataException(message);
                                            }
                                        }
                                    }
                                }

                                sequence    = new Sequence(alphabet, data);
                                sequence.ID = ID;
                                sequenceAlignment.AlignedSequences[0].Sequences.Add(sequence);
                            }

                            break;

                        case "END":
                        case "END;":
                            // Have reached the end of block
                            isInBlock = false;

                            break;

                        default:
                            // skip this block
                            while (line != null)
                            {
                                ReadNextLine(reader);
                                if (0 == string.Compare(line, "end;", StringComparison.OrdinalIgnoreCase))
                                {
                                    break;
                                }
                            }

                            break;
                        }

                        ReadNextLine(reader);
                    }
                }

                return(sequenceAlignment);
            }
        }