/// <summary> /// When implemented in a derived class, performs the execution of the activity. /// </summary> /// <param name="context">The execution context under which the activity executes.</param> protected override IEnumerable <ISequence> Execute(CodeActivityContext context) { string filename = Filename.Get(context); ISequenceParser parser = SequenceParsers.FindParserByFileName(filename); if (parser == null) { throw new ArgumentException("Could not determine parser for " + filename); } string alphaName = DesiredAlphabet; if (!string.IsNullOrEmpty(alphaName)) { alphaName = alphaName.ToLowerInvariant(); IAlphabet alphabet = Alphabets.All.FirstOrDefault(a => a.Name.ToLowerInvariant() == alphaName); if (alphabet == null) { throw new ArgumentException("Unknown alphabet name"); } parser.Alphabet = alphabet; } if (LogOutput) { var tw = context.GetExtension <TextWriter>() ?? Console.Out; tw.WriteLine("Reading sequences from " + filename); } return(parser.Parse()); }
protected override ActivityExecutionStatus Execute(ActivityExecutionContext executionContext) { String inputFileName = InputFile; ISequenceParser parser = SequenceParsers.FindParserByFile(inputFileName); ListSequenceResult = parser.Parse(inputFileName); return(ActivityExecutionStatus.Closed); }
// Parses a sequence and adds it to the displayed list private void ParseSequence(ISequenceParser parser, string filename) { IList <ISequence> parsed = parser.Parse(filename); ListBox sequenceList = (ListBox)FindName("SequencesListBox"); foreach (ISequence seq in parsed) { sequenceList.Items.Add(seq); } }
/// <summary> /// Helper method to parse the given filename into a set /// of ISequence elements. This routine will load sequences from /// any support sequence parser in .NET Bio. /// </summary> /// <param name="fileName">Filename to load data from</param> /// <returns>Enumerable set of ISequence elements</returns> protected static IEnumerable <ISequence> ParseFile(string fileName) { ISequenceParser parser = SequenceParsers.FindParserByFileName(fileName); if (parser == null) { throw new Exception("Could not locate an appropriate sequence parser for " + fileName); } return(parser.Parse()); }
// Parses a sequence and adds it to the displayed list private void ParseSequence(ISequenceParser parser, string filename) { parser.Open(filename); IEnumerable <ISequence> parsed = parser.Parse(); ListBox sequenceList = (ListBox)FindName("SequencesListBox"); foreach (ISequence seq in parsed) { sequenceList.Items.Add(seq); } parser.Close(); }
private const double minimumScoreWeight = 0.6; // weight used to calculate the minimum allowed score for best matches #endregion #region Constructor /// <summary> /// Compare input sequences with a list of known contaminants. /// </summary> /// <param name="targetParser">The target (aka input) sequences</param> public SequenceContaminationFinder(ISequenceParser targetParser) { if (targetParser == null) { throw new ArgumentNullException("targetParser"); } // Load target (aka input sqeuences) from file this.TargetSequences = targetParser.Parse(); // Initially set this as null until a BLAST Xml file is processed. this.BlastHspCounter = new Dictionary <string, List <string> >(); }
/// <summary> /// This method loads new sequences from a file. /// </summary> private void OnLoadFile() { string filterString = "All Supported Formats|" + string.Join(";", SequenceParsers.All.Select(parser => parser.SupportedFileTypes.Replace(',', ';').Replace(".", "*."))) + "|" + string.Join("|", SequenceParsers.All.Select(parser => string.Format("{0}|{1}", parser.Name, parser.SupportedFileTypes.Replace(',', ';').Replace(".", "*.")))); OpenFileDialog openFileDialog = new OpenFileDialog { CheckFileExists = true, Filter = filterString }; // Prompt the user for the filename if (openFileDialog.ShowDialog() == true) { // See if we can auto-locate the parser ISequenceParser parser = SequenceParsers.FindParserByFileName(openFileDialog.FileName); if (parser == null) { // Use the extension string fileExtension = Path.GetExtension(openFileDialog.FileName); parser = SequenceParsers.All.FirstOrDefault(sp => sp.SupportedFileTypes.Contains(fileExtension)); } // Cannot parse this file. if (parser == null) { MessageBox.Show(string.Format("Cannot locate a sequence parser for {0}", openFileDialog.FileName), "Cannot Parse File"); return; } // Parse the file - open it read-only as we will not be writing the sequences back out. try { foreach (var sequence in parser.Parse()) { LoadedSequences.Add(new SequenceViewModel(this, sequence)); } } catch (Exception ex) { ShowError("Cannot Parse File", "Failed to open " + openFileDialog.FileName, ex); } finally { parser.Close(); } } }
static void Main(string[] args) { if (args.Length != 2) { Console.WriteLine("Need source and destination filenames."); return; } string sourceFilename = args[0]; string destFilename = args[1]; ISequenceParser parser = SequenceParsers.FindParserByFileName(sourceFilename); if (parser == null) { parser = SequenceParsers.All.FirstOrDefault( sp => sp.SupportedFileTypes.Contains(Path.GetExtension(sourceFilename))); if (parser == null) { Console.WriteLine("Failed to locate parser for {0}", sourceFilename); return; } parser.Open(sourceFilename); } ISequenceFormatter formatter = SequenceFormatters.FindFormatterByFileName(destFilename); if (formatter == null) { formatter = SequenceFormatters.All.FirstOrDefault( sp => sp.SupportedFileTypes.Contains(Path.GetExtension(destFilename))); if (formatter == null) { Console.WriteLine("Failed to locate formatter for {0}", destFilename); return; } formatter.Open(destFilename); } foreach (var sequence in parser.Parse()) { formatter.Write(sequence); } parser.Close(); formatter.Close(); }
/// <summary> /// Runs the tasks /// </summary> /// <param name="tasksToRun">number of tasks to be ran in a range collection</param> /// <param name="taskCount">task count</param> public override void RunTasks(RangeCollection tasksToRun, long taskCount) { ISequenceParser parser = SequenceParsers.FindParserByFileName(InputFile.FullName); var allTheWorkQuery = parser.Parse(); var myWorkAndAnIndex = SpecialFunctions.DivideWork(allTheWorkQuery, tasksToRun, taskCount, 1, new RangeCollection()); var myUniqueResultFileName = GetFileTaskFileName(tasksToRun.ToString()); float gcCount = 0; long seqLength = 0; using (TextWriter writer = File.CreateText(myUniqueResultFileName)) { // loop all sequences in current task foreach (var numberAndIndex in myWorkAndAnIndex) { writer.WriteLine(">" + numberAndIndex.Key.ID); foreach (byte val in numberAndIndex.Key) { seqLength++; switch (val) { case (byte)'G': case (byte)'g': case (byte)'C': case (byte)'c': gcCount++; break; } } if (gcCount > 0) { writer.Write(((gcCount / (float)seqLength) * 100) + "%"); } else { writer.Write(gcCount + "%"); } seqLength = 0; gcCount = 0; writer.WriteLine(); } } }
private bool LoadAlignment() { Uri alignmentURI = new Uri("/" + _alignmentFileName, UriKind.Relative); var alnfileentry = from entry in _crwFile.GetParts() where entry.Uri.Equals(alignmentURI) select entry; PackagePart alnEntry = alnfileentry.First(); if (alnEntry != null) { //Extract the alignment file from the zip archive. using (var reader = new BufferedStream(alnEntry.GetStream(FileMode.Open))) { using (var writer = new BufferedStream(File.Create(_tempDirectory + _alignmentFileName))) { byte[] buf = new byte[1024]; int readCount = 0; while ((readCount = reader.Read(buf, 0, buf.Length)) > 0) { writer.Write(buf, 0, readCount); } } } ISequenceParser parser = SequenceParsers.FindParserByFile(_tempDirectory + _alignmentFileName); _alignment = new SequenceAlignment(parser.Parse(_tempDirectory + _alignmentFileName)); _alignment.MoleculeType = _moleculeType; _alignment.GeneType = _geneType; _alignment.GeneName = _geneName; _alignment.LogicalName = _logicalAlignmentName; foreach (var seq in _alignment.Sequences) { //Workaround for MBF Framework, have to cast Sequence objects as writeable, opened feature request. Sequence s = seq as Sequence; if (s != null) { s.IsReadOnly = false; } } return(true); } else { return(false); } //Error condition, missing alignment file. }
/// <summary> /// This method loads a single sequence file. FASTA is the preferred format. /// </summary> /// <param name="sequenceFilename"></param> /// <param name="molNames"></param> /// <param name="distinct"></param> /// <returns></returns> public static List <ISequence> LoadSequenceFile(string sequenceFilename, string[] molNames, bool distinct = true) { if (string.IsNullOrWhiteSpace(sequenceFilename)) { throw new ArgumentOutOfRangeException(nameof(sequenceFilename)); } if (!File.Exists(sequenceFilename)) { throw new FileNotFoundException(sequenceFilename); } List <ISequence> sequences = null; ISequenceParser sequenceParser = null; try { sequenceParser = SequenceParsers.FindParserByFileName(sequenceFilename); } catch (DirectoryNotFoundException directoryNotFoundException) { // just forward exception for now throw new DirectoryNotFoundException(directoryNotFoundException.Message, directoryNotFoundException.InnerException); } if (sequenceParser != null) { sequences = sequenceParser.Parse().ToList(); sequenceParser.Close(); if (distinct) { sequences = sequences.Distinct().ToList(); } } if (sequences != null && sequences.Count > 0 && molNames != null && molNames.Length > 0) { sequences = sequences.Where(a => molNames.Contains(SequenceIdSplit.SequenceIdToPdbIdAndChainId(a.ID).Mol)).ToList(); } return(sequences); }
public QcAnalyzer(ISequenceParser parser, string file) { if (parser == null) { throw new ArgumentNullException("parser"); } if (file == null) { throw new ArgumentNullException("file"); } this.Sequences = parser.Parse(); this.FileName = file; this.HasRunContentByPosition = false; this.HasRunContentBySequence = false; }
/// <summary> /// Base constructor for discarding sequences /// </summary> /// <param name="parser">SequenceParser for input data</param> /// <param name="filtered">SequenceFormatter for filtered data</param> /// <param name="discarded">SequenceFormatter for discarded data</param> public Discarder(ISequenceParser parser, ISequenceFormatter filtered, ISequenceFormatter discarded) { if (parser == null) { throw new ArgumentNullException("parser"); } if (filtered == null) { throw new ArgumentNullException("filtered"); } this.Sequences = parser.Parse(); this.FilteredWriter = filtered; this.DiscardedWriter = discarded; this.Counted = 0; this.DiscardCount = 0; }
/// <summary> /// convert input file to output file using the specified format conversion /// </summary> public void ConvertFile() { //make sure input file is valid if (!File.Exists(this.InputFile)) { throw new Exception("Input file does not exist."); } //Finds a parser and opens the file ISequenceParser inputParser = SequenceParsers.FindParserByFileName(this.InputFile); if (inputParser == null) { throw new Exception("Input file not a valid file format to parse."); } //Finds a formatter and opens the file ISequenceFormatter outputFormatter = SequenceFormatters.FindFormatterByFileName(this.OutputFile); if (outputFormatter == null) { throw new Exception("Output file not a valid file format for conversion."); } try { foreach (ISequence sequence in inputParser.Parse()) { outputFormatter.Format(sequence); } } catch { throw new OperationCanceledException( string.Format( "Unable to convert sequences from {0} to {1} - verify that the input sequences have the appropriate type of data to convert to the output formatter.", inputParser.Name, outputFormatter.Name)); } finally { outputFormatter.Close(); inputParser.Close(); } }
/// <summary> /// Parses a fragment using an appropriate parser. /// </summary> /// <param name="parser">Open parser.</param> /// <param name="fragmentName">Fragment name.</param> /// <returns>Fragment sequence.</returns> private ISequence ParseFragment(ISequenceParser parser, String fragmentName) { List <ISequence> sequences; using (parser) { sequences = parser.Parse().ToList(); } if (sequences.Count != 1) { throw new SequenceCountException(fragmentName + " contains " + sequences.Count + " well-formatted sequences. It should contain exactly one."); } if (sequences.First().Count < 150) { throw new SequenceLengthException("Sequence in " + fragmentName + " is shorter than 150nt. It should not be used as a fragment.", sequences.First()); } return(sequences.First()); }
/// <summary> /// Parses a sequence given a file name. Uses built in mechanisms to detect the /// appropriate parser based on the file name. /// </summary> /// <param name="fileName">The path of the file to be parsed for a sequence</param> public void ParseSequence(string fileName) { ISequenceParser parser = SequenceParsers.FindParserByFileName(fileName); if (parser == null) { throw new ArgumentException("Could not find an appropriate parser for " + fileName); } // Get the first sequence from the file SequenceToSplit = parser.Parse().FirstOrDefault(); parser.Close(); if (SequenceToSplit == null) { throw new ArgumentException("Unable to parse a sequence from file " + fileName); } }
/// <summary> /// Base constructor for trimming sequences /// </summary> /// <param name="parser">SequenceParser for input data</param> /// <param name="filtered">SequenceFormatter for output data</param> /// <param name="fromLeft">Trim from the start of the read</param> public Trimmer(ISequenceParser parser, ISequenceFormatter filtered, ISequenceFormatter discarded, bool fromLeft) { if (parser == null) { throw new ArgumentNullException("parser"); } if (filtered == null) { throw new ArgumentNullException("filtered"); } this.Sequences = parser.Parse(); this.FilteredWriter = filtered; this.DiscardedWriter = discarded; this.Counted = 0; this.TrimCount = 0; this.DiscardCount = 0; this.TrimFromStart = fromLeft; }
public void openProject(String file) { Sequence sequence = null; parser = SequenceParsers.GenBank; parser.Open(file); sequence = (Sequence)parser.Parse().ToList()[0]; parser.Close(); Fragment project = new Fragment(file, "project", sequence); GenBankMetadata meta = sequence.Metadata["GenBank"] as GenBankMetadata; FragmentDict = new Dictionary <string, Fragment>(); foreach (var feat in meta.Features.MiscFeatures) { String subseq = project.GetString().Substring(feat.Location.LocationStart - 1, feat.Location.LocationEnd - feat.Location.LocationStart + 1); FragmentDict.Add(feat.StandardName, new Fragment(file, feat.StandardName, new Sequence(Alphabets.DNA, subseq))); } }
/// <summary> /// Parses a sequence given a file name. Uses built in mechanisms to detect the /// appropriate parser based on the file name. /// </summary> /// <param name="fileName">The path of the file to be parsed for a sequence</param> internal void ParseSequence(string fileName) { ISequenceParser parser = SequenceParsers.FindParserByFileName(fileName); if (parser == null) { throw new ArgumentException("Could not find an appropriate parser for " + fileName); } IEnumerable <ISequence> sequences = parser.Parse(); if (sequences == null) { throw new ArgumentException("Unable to parse a sequence from file " + fileName); } SequenceToSplit = sequences.ElementAt(0); parser.Close(); }
/// <summary> /// sequenceParser method /// method for parsing sequences from file /// return tuple(list of codons, number of cds's) /// </summary> /// <param name="file"></param> public static Tuple <List <string>, int> sequenceParser(string file) { parser = SequenceParsers.FindParserByFileName(file); List <ISequence> sequences = new List <ISequence>(); List <string> list = new List <string>(); // temp variables string seqTemp; // parsing sequence try { using (parser) { sequences = parser.Parse().ToList(); foreach (ISequence seq in sequences) { // getString method initialization seqTemp = getString(seq); // adding codon substrings for (int i = 0; i < seqTemp.Length - 2; i += 3) { list.Add(seqTemp.Substring(i, 3).ToUpper()); } } } parser.Close(); } catch (System.IO.FileFormatException) { string message = "Something went wrong. Probably you tried to use an improper file. Try again. \nFor more information about using Codon Context Ranking check the \"How to use\" page."; ModernDialog.ShowMessage(message.ToString(), "Warning", MessageBoxButton.OK); } return(new Tuple <List <string>, int>(list, sequences.Count)); }
/// <summary> /// This method breaks the sequences across multiple worksheets. /// </summary> /// <param name="parser"></param> /// <param name="fileName"></param> /// <param name="currentRow"></param> private void ImportSequencesAcrossSheets(ISequenceParser parser, string fileName, ref int currentRow) { Workbook workBook = Globals.ThisAddIn.Application.ActiveWorkbook; int sequenceCount = 0; Worksheet worksheet = null; Globals.ThisAddIn.Application.EnableEvents = false; try { foreach (ISequence sequence in parser.Parse()) { if (worksheet == null || sequenceCount++ >= this.sequencesPerWorksheet) { if (worksheet != null) { worksheet.Cells[1, 1].EntireColumn.AutoFit(); // Autofit first column } currentRow = 1; sequenceCount = 1; worksheet = workBook.Worksheets.Add( Type.Missing, workBook.Worksheets.Item[workBook.Worksheets.Count], Type.Missing, Type.Missing) as Worksheet; if (worksheet == null) { return; } // Get a name for the worksheet. string validName = this.GetValidFileNames( string.IsNullOrEmpty(sequence.ID) ? Path.GetFileNameWithoutExtension(fileName) : sequence.ID); worksheet.Name = validName; ((_Worksheet)worksheet).Activate(); Globals.ThisAddIn.Application.ActiveWindow.Zoom = ZoomLevel; } // If sequence ID cannot be used as a sheet name, update the sequence DisplayID with the string used as sheet name. if (string.IsNullOrEmpty(sequence.ID)) { sequence.ID = Path.GetFileNameWithoutExtension(fileName) + "_" + sequenceCount; } this.WriteOneSequenceToWorksheet(parser, ref currentRow, sequence, worksheet); } if (worksheet != null) { worksheet.Cells[1, 1].EntireColumn.AutoFit(); // Autofit first column } } finally { this.EnableAllControls(); Globals.ThisAddIn.Application.EnableEvents = true; } }
/// <summary> /// Helper method to parse the given filename into a set /// of ISequence elements. This routine will load sequences from /// any support sequence parser in .NET Bio. /// </summary> /// <param name="fileName">Filename to load data from</param> /// <returns>Enumerable set of ISequence elements</returns> internal static IList <ISequence> ParseFile(string fileName) { ISequenceParser parser = SequenceParsers.FindParserByFileName(fileName) ?? new FastAParser(); return(parser.Parse(fileName).ToList()); // so we don't read it multiple times. }
public void openProject(String file) { Sequence sequence = null; parser = SequenceParsers.GenBank; parser.Open(file); sequence = (Sequence)parser.Parse().ToList()[0]; parser.Close(); Fragment project = new Fragment(file, "project", sequence); GenBankMetadata meta = sequence.Metadata["GenBank"] as GenBankMetadata; FragmentDict = new Dictionary<string, Fragment>(); foreach (var feat in meta.Features.MiscFeatures) { String subseq = project.GetString().Substring(feat.Location.LocationStart-1, feat.Location.LocationEnd - feat.Location.LocationStart + 1); FragmentDict.Add(feat.StandardName, new Fragment(file, feat.StandardName, new Sequence(Alphabets.AmbiguousDNA, subseq))); } }
/// <summary> /// Parses a fragment using an appropriate parser. /// </summary> /// <param name="parser">Open parser.</param> /// <param name="fragmentName">Fragment name.</param> /// <returns>Fragment sequence.</returns> private ISequence ParseFragment(ISequenceParser parser, String fragmentName) { List<ISequence> sequences; using (parser) { sequences = parser.Parse().ToList(); } if (sequences.Count != 1) { throw new SequenceCountException(fragmentName + " contains " + sequences.Count + " well-formatted sequences. It should contain exactly one."); } if (sequences.First().Count < 150) { throw new SequenceLengthException("Sequence in " + fragmentName + " is shorter than 150nt. It should not be used as a fragment.", sequences.First()); } return sequences.First(); }
/// <summary> /// This method imports a set of sequences, one per worksheet. /// </summary> /// <param name="parser">SequenceParser instance.</param> /// <param name="fileName">Name of the file</param> private void ImportSequencesOnePerSheet(ISequenceParser parser, string fileName) { Workbook workBook = Globals.ThisAddIn.Application.ActiveWorkbook; foreach (ISequence sequence in parser.Parse()) { var worksheet = workBook.Worksheets.Add( Type.Missing, workBook.Worksheets.Item[workBook.Worksheets.Count], Type.Missing, Type.Missing) as Worksheet; if (worksheet == null) { return; } string validName = this.GetValidFileNames( string.IsNullOrEmpty(sequence.ID) ? Path.GetFileNameWithoutExtension(fileName) : sequence.ID); // If sequence ID cannot be used as a sheet name, update the sequence DisplayID with the string used as sheet name. if (string.IsNullOrEmpty(sequence.ID)) { sequence.ID = validName; } worksheet.Name = validName; ((_Worksheet)worksheet).Activate(); Globals.ThisAddIn.Application.ActiveWindow.Zoom = ZoomLevel; Globals.ThisAddIn.Application.EnableEvents = false; try { int currentRow = 1; this.WriteOneSequenceToWorksheet(parser, ref currentRow, sequence, worksheet); this.currentFileNumber++; } finally { worksheet.Cells[1, 1].EntireColumn.AutoFit(); // Autofit first column this.EnableAllControls(); Globals.ThisAddIn.Application.EnableEvents = true; } } }
/// <summary> /// This method imports a set of sequences, one sequence per row. /// </summary> /// <param name="parser">SequenceParser instance.</param> /// <param name="fileName">Name of the file</param> /// <param name="currentRow">Next row of insertion</param> private void ImportSequencesAllInOneSheet(ISequenceParser parser, string fileName, ref int currentRow) { Workbook workBook = Globals.ThisAddIn.Application.ActiveWorkbook; var worksheet = workBook.Worksheets.Add( Type.Missing, workBook.Worksheets.Item[workBook.Worksheets.Count], Type.Missing, Type.Missing) as Worksheet; worksheet.Name = this.GetValidFileNames(Path.GetFileNameWithoutExtension(fileName)); ((_Worksheet)worksheet).Activate(); Globals.ThisAddIn.Application.ActiveWindow.Zoom = ZoomLevel; Globals.ThisAddIn.Application.EnableEvents = false; try { int sequenceCount = 0; foreach (ISequence sequence in parser.Parse()) { sequenceCount++; if (string.IsNullOrEmpty(sequence.ID)) { sequence.ID = Path.GetFileNameWithoutExtension(fileName) + "_" + sequenceCount; } this.WriteOneSequenceToWorksheet(parser, ref currentRow, sequence, worksheet); this.currentFileNumber++; } } finally { worksheet.Cells[1, 1].EntireColumn.AutoFit(); // Autofit first column this.EnableAllControls(); Globals.ThisAddIn.Application.EnableEvents = true; } }