コード例 #1
0
        private IMatrixData ProcessDbFiles(ProcessInfo processInfo, int nThreads, IList<Database> databases)
        {
            string tempFile = Path.Combine(FileUtils.GetTempFolder(), "databaseref.txt");
            IMatrixData matrix;
            StreamWriter writer = null;

            try{
                processInfo.Progress(0);
                processInfo.Status(string.Format("Read database files [{0}|{1}]", 0, "?"));

                Enum[] enums = new Enum[] { databaseRef.file, databaseRef.source, databaseRef.specie, databaseRef.taxid, databaseRef.version, databaseRef.identifier };
                IList<string> header = enums.Select(Constants.GetPattern).ToList();

                if (databases == null || databases.Count == 0){
                    return null;
                }

                writer = new StreamWriter(tempFile);

                int nTasks = databases.Count;
                writer.WriteLine(StringUtils.Concat("\t", header));
                writer.WriteLine("#!{Type}" + StringUtils.Concat("\t", header.Select(x => "T")));

                ThreadDistributor distr = new ThreadDistributor(nThreads, nTasks,
                                                                x =>
                                                                ParseDatabase(writer, databases[x],
                                                                              string.Format(
                                                                                  "Read database files [{0}|{1}]",
                                                                                  x + 1, nTasks), (x + 1)*100/nTasks,
                                                                              processInfo));
                distr.Start();

                processInfo.Status("Close all files");

                writer.Close();
                writer.Dispose();
                writer = null;

                processInfo.Progress(0);
                processInfo.Status("Create DatabaseRef Matrix");

                matrix = new MatrixData();
                LoadData(matrix, tempFile, processInfo);
            }
            catch (Exception ex){
                throw ex;
            }
            finally{
                if (writer != null){
                    writer.Close();
                }

                if (File.Exists(tempFile)){
                    File.Delete(tempFile);
                }
            }

            return matrix;
        }
コード例 #2
0
ファイル: Fasta.cs プロジェクト: JurgenCox/perseus-plugins
        public void ParseFile(string path, ProcessInfo processInfo)
        {
            processInfo.Status("Parsing " + path);
            string accession = "";
            int sequenceCounter = 0;
            StringBuilder sequence = new StringBuilder();
            ProteinSequence protein = new ProteinSequence();
            try{
                StreamReader file = new StreamReader(path);
                string line;
                while ((line = file.ReadLine()) != null){ // valid line
                    if (sequenceCounter%500 == 0){
                        processInfo.Status("Parsing " + path + ", " + (int) ((float) file.BaseStream.Position/file.BaseStream.Length*100) +
                                            "%");
                    }
                    bool lineIsHeader = line.StartsWith(">");

                    // skip all lines until the first header is found
                    if (sequenceCounter == 0 && !lineIsHeader){
                        continue;
                    }

                    // line is a piece of a sequence
                    if (sequenceCounter > 0 && !lineIsHeader){
                        sequence.Append(line.Trim());
                        continue;
                    }

                    // line is a fasta header
                    if (lineIsHeader){
                        if (sequenceCounter > 0)
                            // this is not the first header, i.e. the previous sequence is now completely read in
                        {
                            // add the previous protein
                            protein.SetSequence(sequence.ToString());
                            entries.Add(accession, protein);
                        }
                        // initialize a new protein
                        protein = new ProteinSequence();
                        sequenceCounter++;
                        // then parse the new header
                        string header = line;
                        Match m = regexUniprotAccession.Match(header);
                        if (m.Success){ // uniprot header
                            accession = m.Groups[1].Value;
                            protein.Accession = accession;
                            protein.Header = header;
                        } else{ // fallback position: take entire header after the > as accession
                            accession = header.Substring(1).Trim();
                            protein.Accession = accession;
                            protein.Header = header;
                        }
                        sequence = new StringBuilder();
                    }
                } //end while
                file.Close();

                //add the last protein
                if (sequenceCounter > 0){ // make sure there is at least one sequence in the file
                    protein.SetSequence(sequence.ToString());
                    entries.Add(accession, protein);
                }
            } catch (Exception){
                processInfo.ErrString =
                    "Something went wrong while parsing the fasta file.\nMake sure the path is correct and the " +
                    "file is not opened in another application.\nMake sure the fasta file is valid.";
            }
        }
コード例 #3
0
        private IMatrixData ProcessAplFiles(ProcessInfo processInfo, int nThreads, IList<MsRunImpl> aplfiles)
        {
            string tempFile = Path.Combine(FileUtils.GetTempFolder(), "spectraref.txt");
            if (File.Exists(tempFile)){
                File.Delete(tempFile);
            }
            IMatrixData matrix;
            StreamWriter writer = null;

            try{
                Enum[] enums = new Enum[]{spectraRef.raw_file, spectraRef.charge, spectraRef.scan_number, spectraRef.location, spectraRef.format, spectraRef.id_format, spectraRef.fragmentation, spectraRef.mz, spectraRef.index};
                IList<string> header = enums.Select(Constants.GetPattern).ToList();

                if (aplfiles == null || aplfiles.Count == 0){
                    return null;
                }

                int nTasks = aplfiles.Count;

                processInfo.Progress(0);
                processInfo.Status(string.Format("Read Andromeda peaklist files [{0}|{1}]", 0, nTasks));

                writer = new StreamWriter(tempFile);
                writer.WriteLine(StringUtils.Concat("\t", header));
                writer.WriteLine("#!{Type}" + StringUtils.Concat("\t", header.Select(x => "T")));

                ThreadDistributor distr = new ThreadDistributor(nThreads, nTasks,
                                                                x =>
                                                                ParseAplFile(aplfiles[x], writer,
                                                                             string.Format(
                                                                                 "Read Andromeda peaklist files [{0}|{1}]",
                                                                                 x + 1, nTasks), (x + 1)*100/nTasks,
                                                                             processInfo));
                distr.Start();

                processInfo.Status("Close all files");

                writer.Close();
                writer.Dispose();
                writer = null;

                processInfo.Progress(0);
                processInfo.Status("Create SpectraRef matrix");

                matrix = new MatrixData();
                LoadData(matrix, tempFile, processInfo);
            }
            catch (Exception ex){
                throw ex;
            }
            finally{
                if (writer != null){
                    writer.Close();
                }

                if (File.Exists(tempFile)){
                    File.Delete(tempFile);
                }
            }

            return matrix;
        }
コード例 #4
0
        private static void ParseDatabase(StreamWriter writer, Database db, string status, int progress,
                                          ProcessInfo processInfo)
        {
            if (db.File == null || !File.Exists(db.File)){
                return;
            }

            StreamReader reader = new StreamReader(db.File);

            string line;
            Regex regex = new Regex(db.SearchExpression);

            while ((line = reader.ReadLine()) != null){
                if (line.StartsWith(">")){
                    string identifier = regex.Match(line).Groups[1].Value;
                    object[] items = new object[]{
                        db.File, db.Source, db.Species, db.Taxid, db.Version,
                        db.Prefix == null ? identifier : db.Prefix + identifier
                    };
                    lock (writer){
                        writer.WriteLine(StringUtils.Concat("\t", items));
                    }
                }
            }

            reader.Close();

            lock (processInfo){
                processInfo.Progress(progress);
                processInfo.Status(status);
            }
        }
コード例 #5
0
        private static void ParseAplFile(MsRunImpl aplfile, StreamWriter writer, string status, int progress,
                                         ProcessInfo processInfo)
        {
            lock (processInfo){
                processInfo.Progress(progress);
                processInfo.Status(status);
            }

            string file = aplfile.Location.Value;
            string form = aplfile.Format == null ? "" : aplfile.Format.Name;
            string idform = aplfile.IdFormat == null ? "" : aplfile.IdFormat.Name;
            int m = 0;
            Regex regex = new Regex("Raw[f|F]ile: (.*) Index: ([0-9]+)");
            AplParser parser = new AplParser(delegate(AplEntry entry)
                {
                    if (regex.IsMatch(entry.Title)){
                        string rawfile = regex.Match(entry.Title).Groups[1].Value;
                        string scannumber = regex.Match(entry.Title).Groups[2].Value;
                        m++;
                        object[] items = new object[]{
                            rawfile, entry.PrecursorCharge, scannumber, file, form, idform, entry.Fragmentation, entry.Mz,
                            m.ToString(CultureInfo.InvariantCulture)
                        };
                        lock (writer){
                            writer.WriteLine(StringUtils.Concat("\t", items));
                        }
                    }
                });

            parser.Parse(file);

            lock (writer){
                writer.Flush();
            }
        }
コード例 #6
0
        public override IMatrixData ProcessData(IMatrixData[] inputData, Parameters param, ref IMatrixData[] supplTables,
                                                ref IDocumentData[] documents, ProcessInfo processInfo)
        {
            TextWriter defaultOut = Console.Out;
            TextWriter defaultErr = Console.Error;

            try{
                if (documents == null){
                    documents = new IDocumentData[NumDocuments];
                    for (int i = 0; i < NumDocuments; i++){
                        documents[i] = new DocumentData();
                    }
                }

                TextWriter logger = null;
                if (documents.Length > 0){
                    logger = new StreamWriter(new DocumentStream(documents[0]));

                    Console.SetOut(logger);
                    Console.SetError(logger);
                }

                int nThreads = GetMaxThreads(param);

                IList<MsRunImpl> runs = new List<MsRunImpl>();
                SingleChoiceWithSubParams singleSub =
                    param.GetParam(MetadataElement.MS_RUN.Name) as SingleChoiceWithSubParams;
                if (singleSub != null){
                    MsRunParam sub =
                        singleSub.SubParams[singleSub.Value].GetAllParameters().FirstOrDefault() as MsRunParam;
                    if (sub != null){
                        if (sub.Value != null){
                            foreach (MsRunImpl run in sub.Value){
                                runs.Add(run);
                            }
                        }
                    }
                }

                IList<StudyVariable> studyVariables = new List<StudyVariable>();
                singleSub = param.GetParam(MetadataElement.STUDY_VARIABLE.Name) as SingleChoiceWithSubParams;
                if (singleSub != null){
                    StudyVariableParam sub =
                        singleSub.SubParams[singleSub.Value].GetAllParameters().FirstOrDefault() as StudyVariableParam;
                    if (sub != null){
                        if (sub.Value != null){
                            foreach (StudyVariable variable in sub.Value){
                                studyVariables.Add(variable);
                            }
                        }
                    }
                }

                IList<Sample> samples = new List<Sample>();
                singleSub = param.GetParam(MetadataElement.SAMPLE.Name) as SingleChoiceWithSubParams;
                if (singleSub != null){
                    SampleParam sub =
                        singleSub.SubParams[singleSub.Value].GetAllParameters().FirstOrDefault() as SampleParam;
                    if (sub != null){
                        if (sub.Value != null){
                            foreach (Sample sample in sub.Value){
                                samples.Add(sample);
                            }
                        }
                    }
                }

                IList<Assay> assays = new List<Assay>();
                singleSub = param.GetParam(MetadataElement.ASSAY.Name) as SingleChoiceWithSubParams;
                if (singleSub != null){
                    AssayParam sub =
                        singleSub.SubParams[singleSub.Value].GetAllParameters().FirstOrDefault() as AssayParam;
                    if (sub != null){
                        if (sub.Value != null){
                            foreach (Assay assay in sub.Value){
                                assays.Add(assay);
                            }
                        }
                    }
                }

                IList<Database> databases = new List<Database>();
                singleSub = param.GetParam("database") as SingleChoiceWithSubParams;
                if (singleSub != null){
                    DatabaseParam sub =
                        singleSub.SubParams[singleSub.Value].GetAllParameters().FirstOrDefault() as DatabaseParam;
                    if (sub != null && sub.Value != null){
                        foreach (Database db in sub.Value){
                            databases.Add(db);
                        }
                    }
                }

                IMatrixData output = (IMatrixData) inputData[0].CreateNewInstance(DataType.Matrix);
                List<string> columnnames = new List<string>{
                    MetadataElement.STUDY_VARIABLE.Name,
                    MetadataElement.ASSAY.Name,
                    MetadataElement.MS_RUN.Name,
                    MetadataElement.SAMPLE.Name,
                    MetadataElement.INSTRUMENT.Name
                };

                List<string[]> matrix = new List<string[]>();
                for (int i = 0; i < columnnames.Count; i++){
                    matrix.Add(new string[assays.Count]);
                }

                for (int i = 0; i < assays.Count; i++){
                    Assay assay = assays[i];
                    MsRunImpl runImpl = runs.FirstOrDefault(x => x.Id.Equals(assay.MsRun.Id));
                    Instrument instrument = instruments.FirstOrDefault(x => x.Id.Equals(assay.MsRun.Id));

                    if (runImpl == null){
                        continue;
                    }

                    var studyVariable = i < studyVariables.Count ? studyVariables[i] : null;
                    var sample = i < samples.Count ? samples[i] : null;
                    foreach (var s in studyVariables){
                        if (s.AssayMap.ContainsKey(assay.Id)){
                            studyVariable = s;
                            try{
                                int sampleId = studyVariable.SampleMap.FirstOrDefault().Key;
                                sample = samples.FirstOrDefault(x => x.Id.Equals(sampleId));
                            }
                            catch (Exception){
                                Console.Error.WriteLine("Can not find sample");
                            }
                            break;
                        }
                    }

                    AddRow(matrix, columnnames, i, runImpl, assay, sample, studyVariable, instrument);
                }

                output.SetData(Matrix.Experiment, new List<string>(), new float[assays.Count,columnnames.Count],
                               columnnames,
                               matrix,
                               new List<string>(), new List<string[][]>(), new List<string>(), new List<double[]>(),
                               new List<string>(), new List<double[][]>(), new List<string>(), new List<string[][]>(),
                               new List<string>(), new List<double[]>());
                IList<IMatrixData> supplement = new List<IMatrixData>();
                try{
                    IList<MsRunImpl> aplfiles =
                        runs.Where(x => x.Location != null && x.Location.Value.EndsWith(".apl")).ToList();

                    IMatrixData temp = ProcessAplFiles(processInfo, nThreads, aplfiles);
                    if (temp != null){
                        supplement.Add(temp);
                    }
                }
                catch (Exception e){
                    throw new Exception("Could not parse spectra file(s)! " + e.Message + "\n" + e.StackTrace);
                }

                try{
                    IMatrixData temp = ProcessDbFiles(processInfo, databases.Count < nThreads ? 1 : nThreads, databases);
                    if (temp != null){
                        supplement.Add(temp);
                    }
                }
                catch (Exception e){
                    throw new Exception("Could not parse database file(s)! " + e.Message + "\n" + e.StackTrace);
                }

                if (logger != null){
                    logger.Dispose();
                }

                supplTables = supplement.ToArray();

                processInfo.Status("Define Experiment: DONE!");
                processInfo.Progress(100);

                return output;
            }
            catch (Exception e){
                string msg = "Process aborted! " + e.Message;
                MessageBox.Show(msg);
                Logger.Error(Name, msg);
                processInfo.Status(msg);
            }
            finally{
                Console.SetOut(defaultOut);
                Console.SetError(defaultErr);
            }

            return null;
        }
コード例 #7
0
        public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables,
			ref IDocumentData[] documents, ProcessInfo processInfo)
        {
            int proteinIdColumnInd = param.GetParam<int>("Protein IDs").Value;
            string[][] proteinIds = new string[mdata.RowCount][];
            string[][] leadingIds = new string[mdata.RowCount][];
            List<string> allIds = new List<string>();
            for (int row = 0; row < mdata.RowCount; row++){
                proteinIds[row] = mdata.StringColumns[proteinIdColumnInd][row].Split(';');
                leadingIds[row] = new[]{proteinIds[row][0]};
                allIds.AddRange(proteinIds[row]);
            }
            string fastaFilePath = param.GetParam<string>("Fasta file").Value;
            Fasta fasta = new Fasta();
            fasta.ParseFile(fastaFilePath, processInfo);
            // Text annotations
            processInfo.Status("Adding fasta header annotations.");
            int[] selection =
                param.GetParamWithSubParams<int>("Fasta header annotations").GetSubParameters().GetParam<int[]>("Annotations").Value;
            string[][] idsToBeAnnotated = (param.GetParamWithSubParams<int>("Fasta header annotations").Value == 0)
                ? proteinIds
                : leadingIds;
            ProteinSequence[][] fastaEntries = new ProteinSequence[mdata.RowCount][];
            for (int row = 0; row < mdata.RowCount; row++){
                List<ProteinSequence> rowEntries = new List<ProteinSequence>();
                foreach (string id in idsToBeAnnotated[row]){
                    ProteinSequence entry = fasta.GetEntry(id);
                    if (entry == null){
                        continue;
                    }
                    rowEntries.Add(entry);
                }
                fastaEntries[row] = rowEntries.ToArray();
            }
            if (ArrayUtils.Contains(selection, 0)){ // Entry name
                string[] annotationColumn = new string[mdata.RowCount];
                for (int row = 0; row < mdata.RowCount; row++){
                    List<string> rowAnnotations = new List<string>();
                    foreach (ProteinSequence entry in fastaEntries[row]){
                        string entryName = entry.EntryName;
                        if (entryName != null && !ArrayUtils.Contains(rowAnnotations, entryName)){
                            rowAnnotations.Add(entryName);
                        }
                    }
                    annotationColumn[row] = string.Join(";", rowAnnotations.ToArray());
                }
                mdata.AddStringColumn("Entry name", "", annotationColumn);
            }
            if (ArrayUtils.Contains(selection, 1)){ // Gene name
                string[] annotationColumn = new string[mdata.RowCount];
                for (int row = 0; row < mdata.RowCount; row++){
                    List<string> rowAnnotations = new List<string>();
                    foreach (ProteinSequence entry in fastaEntries[row]){
                        string geneName = entry.GeneName;
                        if (geneName != null && !ArrayUtils.Contains(rowAnnotations, geneName)){
                            rowAnnotations.Add(geneName);
                        }
                    }
                    annotationColumn[row] = string.Join(";", rowAnnotations.ToArray());
                }
                mdata.AddStringColumn("Gene name", "", annotationColumn);
            }
            if (ArrayUtils.Contains(selection, 2)){
                // Verbose protein name, i.e. all protein names annotated in all fasta headers, including the
                //'Isoform x of...' prefixes and '(Fragment)' suffixes
                string[] annotationColumn = new string[mdata.RowCount];
                for (int row = 0; row < mdata.RowCount; row++){
                    List<string> rowAnnotations = new List<string>();
                    foreach (ProteinSequence entry in fastaEntries[row]){
                        string proteinName = entry.ProteinName;
                        if (proteinName != null && !ArrayUtils.Contains(rowAnnotations, proteinName)){
                            rowAnnotations.Add(proteinName);
                        }
                    }
                    annotationColumn[row] = string.Join(";", rowAnnotations.ToArray());
                }
                mdata.AddStringColumn("Protein name (verbose)", "", annotationColumn);
            }
            if (ArrayUtils.Contains(selection, 3)){ // Consensus protein name
                string[] annotationColumn = new string[mdata.RowCount];
                for (int row = 0; row < mdata.RowCount; row++){
                    List<string> rowAnnotations = new List<string>();
                    foreach (ProteinSequence entry in fastaEntries[row]){
                        string proteinName = entry.ConsensusProteinName;
                        if (proteinName != null && !ArrayUtils.Contains(rowAnnotations, proteinName)){
                            rowAnnotations.Add(proteinName);
                        }
                    }
                    annotationColumn[row] = String.Join(";", rowAnnotations.ToArray());
                }
                mdata.AddStringColumn("Protein name", "", annotationColumn);
            }
            if (ArrayUtils.Contains(selection, 4)){ // Species
                string[] annotationColumn = new string[mdata.RowCount];
                for (int row = 0; row < mdata.RowCount; row++){
                    List<string> rowAnnotations = new List<string>();
                    foreach (ProteinSequence entry in fastaEntries[row]){
                        string speciesName = entry.Species;
                        if (speciesName != null && !ArrayUtils.Contains(rowAnnotations, speciesName)){
                            rowAnnotations.Add(speciesName);
                        }
                    }
                    annotationColumn[row] = String.Join(";", rowAnnotations.ToArray());
                }
                mdata.AddStringColumn("Species", "", annotationColumn);
            }
            // Numeric annotations
            processInfo.Status("Adding numeric annotations.");
            selection =
                param.GetParamWithSubParams<int>("Numeric annotations").GetSubParameters().GetParam<int[]>("Annotations").Value;
            bool annotateLeadingId = (param.GetParamWithSubParams<int>("Numeric annotations").Value == 1);
            if (ArrayUtils.Contains(selection, 0)){ // Sequence length
                double[] annotationColumn = new double[mdata.RowCount];
                for (int row = 0; row < mdata.RowCount; row++){
                    List<double> rowAnnotations = new List<double>();
                    foreach (ProteinSequence entry in fastaEntries[row]){
                        double sequenceLength = entry.GetSequence().Length;
                        rowAnnotations.Add(sequenceLength);
                        if (annotateLeadingId && rowAnnotations.Count > 0){
                            break;
                        }
                    }
                    annotationColumn[row] = ArrayUtils.Median(rowAnnotations.ToArray());
                }
                mdata.AddNumericColumn("Sequence length", "", annotationColumn);
            }
            if (ArrayUtils.Contains(selection, 1)){ // Monoisotopic molecular mass
                double[] annotationColumn = new double[mdata.RowCount];
                for (int row = 0; row < mdata.RowCount; row++){
                    List<double> rowAnnotations = new List<double>();
                    foreach (ProteinSequence entry in fastaEntries[row]){
                        double monoisotopicMass = entry.GetMonoisotopicMolecularMass();
                        rowAnnotations.Add(monoisotopicMass);
                        if (annotateLeadingId && rowAnnotations.Count > 0){
                            break;
                        }
                    }
                    annotationColumn[row] = ArrayUtils.Median(rowAnnotations.ToArray());
                }
                mdata.AddNumericColumn("Monoisotopic molecular mass", "", annotationColumn);
            }
            if (ArrayUtils.Contains(selection, 2)){ // Average molecular mass
                double[] annotationColumn = new double[mdata.RowCount];
                for (int row = 0; row < mdata.RowCount; row++){
                    List<double> rowAnnotations = new List<double>();
                    foreach (ProteinSequence entry in fastaEntries[row]){
                        double averageMass = entry.GetAverageMolecularMass();
                        rowAnnotations.Add(averageMass);
                        if (annotateLeadingId && rowAnnotations.Count > 0){
                            break;
                        }
                    }
                    annotationColumn[row] = ArrayUtils.Median(rowAnnotations.ToArray());
                }
                mdata.AddNumericColumn("Average molecular mass", "", annotationColumn);
            }
            // Theoretical peptides
            processInfo.Status("Calculating theoretical peptides.");
            annotateLeadingId = (param.GetParamWithSubParams<int>("Calculate theoretical peptides").Value == 1);
            Protease[] proteases = ArrayUtils.SubArray(Constants.defaultProteases,
                param.GetParamWithSubParams<int>("Calculate theoretical peptides").GetSubParameters().GetParam<int[]>("Proteases")
                    .Value);
            double minLength =
                param.GetParamWithSubParams<int>("Calculate theoretical peptides").GetSubParameters().GetParam<double>(
                    "Min. peptide length").Value;
            double maxLength =
                param.GetParamWithSubParams<int>("Calculate theoretical peptides").GetSubParameters().GetParam<double>(
                    "Max. peptide length").Value;
            bool displayPeptideSequences = annotateLeadingId &&
                                            param.GetParamWithSubParams<int>("Calculate theoretical peptides").GetSubParameters().GetParam<bool>(
                                                "Show sequences").Value;
            foreach (Protease protease in proteases){
                double[] annotationColumn = new double[mdata.RowCount];
                string[] peptideColumn = new string[mdata.RowCount];
                for (int row = 0; row < mdata.RowCount; row++){
                    List<double> rowAnnotations = new List<double>();
                    List<string> rowPeptides = new List<string>();
                    foreach (ProteinSequence entry in fastaEntries[row]){
                        double nTheoreticalPeptides = entry.GetNumberOfTheoreticalPeptides(protease, (int) minLength, (int) maxLength);
                        rowAnnotations.Add(nTheoreticalPeptides);
                        if (displayPeptideSequences){
                            rowPeptides.AddRange(entry.GetTheoreticalPeptideSequences(protease, (int) minLength, (int) maxLength));
                        }
                        if (annotateLeadingId && rowAnnotations.Count > 0){
                            break;
                        }
                    }
                    annotationColumn[row] = ArrayUtils.Median(rowAnnotations.ToArray());
                    peptideColumn[row] = String.Join(";", rowPeptides);
                }
                mdata.AddNumericColumn(
                    "Number of theoretical peptides (" + protease.name + ", " + minLength + "-" + maxLength + ")", "", annotationColumn);
                if (displayPeptideSequences){
                    mdata.AddStringColumn(
                        "Theoretical peptide sequences (" + protease.name + ", " + minLength + "-" + maxLength + ")", "", peptideColumn);
                }
            }
            // Sequence features
            processInfo.Status("Counting sequence features.");
            annotateLeadingId = (param.GetParamWithSubParams<int>("Count sequence features").Value == 1);
            bool normalizeBySequenceLength =
                param.GetParamWithSubParams<int>("Count sequence features").GetSubParameters().GetParam<bool>(
                    "Normalize by sequence length").Value;
            if (param.GetParamWithSubParams<int>("Count sequence features").GetSubParameters().GetParam<string>("Regex").Value !=
                ""){
                Regex regex;
                try{
                    regex =
                        new Regex(
                            param.GetParamWithSubParams<int>("Count sequence features").GetSubParameters().GetParam<string>("Regex").Value);
                } catch (ArgumentException){
                    processInfo.ErrString = "The regular expression you provided has invalid syntax.";
                    return;
                }
                double[] sequenceFeatureColumn = new double[mdata.RowCount];
                for (int row = 0; row < mdata.RowCount; row++){
                    List<double> featureCount = new List<double>();
                    foreach (ProteinSequence entry in fastaEntries[row]){
                        double nFeatures = regex.Matches(entry.GetSequence()).Count;
                        featureCount.Add(normalizeBySequenceLength ? nFeatures/entry.GetLength() : nFeatures);
                        if (annotateLeadingId){
                            break;
                        }
                    }
                    sequenceFeatureColumn[row] = ArrayUtils.Median(featureCount.ToArray());
                }
                mdata.AddNumericColumn(
                    (normalizeBySequenceLength ? "Normalized feature count (" : "Feature count (") + regex + ")", "",
                    sequenceFeatureColumn);
            }
            processInfo.Status("Done.");
        }