public override IEnumerable <string> Process() { var format = new MappedItemGroupXmlFileFormat(); Progress.SetMessage("reading mapped reads from " + _options.CountFile + " ..."); var mapped = format.ReadFromFile(_options.CountFile); var sequenceLocusSet = new HashSet <string>(from item in mapped from mi in item from mr in mi.MappedRegions from al in mr.AlignedLocations select string.Format("{0}:{1}:{2}", al.Parent.Sequence, al.Seqname, al.Start)); Progress.SetMessage("There are {0} unique sequence:locus", sequenceLocusSet.Count); using (var sw = new StreamWriter(_options.OutputFile)) { using (var sr = SAMFactory.GetReader(_options.BamFile, false)) { sr.ReadHeaders().ForEach(m => sw.WriteLine(m)); int count = 0; int accepted = 0; string line; while ((line = sr.ReadLine()) != null) { if (count % 1000 == 0) { if (Progress.IsCancellationPending()) { throw new UserTerminatedException(); } } if (count % 100000 == 0 && count > 0) { Progress.SetMessage("{0} candidates from {1} reads", accepted, count); } count++; var parts = line.Split('\t'); var locus = string.Format("{0}:{1}:{2}", parts[SAMFormatConst.SEQ_INDEX], parts[SAMFormatConst.RNAME_INDEX], parts[SAMFormatConst.POS_INDEX]); if (!sequenceLocusSet.Contains(locus)) { continue; } sw.WriteLine(line); accepted++; } } } return(new[] { _options.OutputFile }); }
public override IEnumerable <string> Process() { var result = new List <string>(); var samformat = _options.GetEngineFormat(); var format = new MappedItemGroupXmlFileFormat(); Progress.SetMessage("reading mapped reads from " + _options.InputFile1 + " ..."); var items1 = format.ReadFromFile(_options.InputFile1); Progress.SetMessage("reading mapped reads from " + _options.InputFile2 + " ..."); var items2 = format.ReadFromFile(_options.InputFile2); var reads1 = items1.GetQueries().ToDictionary(m => m.Qname); var reads2 = items2.GetQueries().ToDictionary(m => m.Qname); var qnames = reads1.Keys.Union(reads2.Keys).Distinct().ToList(); foreach (var qname in qnames) { if (!reads1.ContainsKey(qname) || !reads2.ContainsKey(qname)) { continue; } var r1 = reads1[qname]; var r2 = reads2[qname]; var res = samformat.CompareScore(r1.AlignmentScore, r2.AlignmentScore); if (res == 0) { items1.RemoveRead(qname); items2.RemoveRead(qname); } else if (res < 0) { items2.RemoveRead(qname); } else { items1.RemoveRead(qname); } } var writer = new MappedItemGroupSequenceWriter(); SaveItems(items1, _options.OutputFile1, writer, format, result); SaveItems(items2, _options.OutputFile2, writer, format, result); return(result); }
private static void SaveItems(List <MappedItemGroup> items1, string outputFile, MappedItemGroupSequenceWriter writer, MappedItemGroupXmlFileFormat format, List <string> result) { items1.RemoveAll(m => m.QueryCount == 0); var xml1 = outputFile + ".xml"; if (items1.Any(m => m.Name.Contains(".tRNA"))) { items1.SortTRna(); } writer.WriteToFile(outputFile, items1); format.WriteToFile(xml1, items1); result.Add(outputFile); result.Add(xml1); }
public override IEnumerable <string> Process() { var format = new MappedItemGroupXmlFileFormat(); using (StreamWriter sw = new StreamWriter(options.OutputFile)) { sw.WriteLine("File\tFeature\tStrand\tCount\tPosition\tPercentage"); foreach (var file in options.GetCountFiles()) { var xmlfile = file.File.EndsWith(".xml") ? file.File : file.File + ".mapped.xml"; var count = format.ReadFromFile(xmlfile).OrderByDescending(m => m.GetEstimatedCount()).ToList(); foreach (var group in count) { var item = group[0]; Dictionary <long, double> positionCount = new Dictionary <long, double>(); foreach (var region in item.MappedRegions) { foreach (var loc in region.AlignedLocations) { for (long p = loc.Start; p <= loc.End; p++) { var offset = region.Region.Strand == '+' ? p - region.Region.Start + 1 : region.Region.End - p + 1; double v; if (!positionCount.TryGetValue(offset, out v)) { v = 0; } positionCount[offset] = v + loc.Parent.GetEstimatedCount(); } } } var allcount = item.GetEstimatedCount(); var keys = positionCount.Keys.ToList(); keys.Sort(); foreach (var key in keys) { sw.WriteLine("{0}\t{1}\t{2}\t{3:0.##}\t{4}\t{5:0.00}", file.Name, item.Name, item.MappedRegions.First().Region.Strand, item.GetEstimatedCount(), key, positionCount[key] / allcount); } } } } var rfile = new FileInfo(FileUtils.GetTemplateDir() + "/smallrna_position.r").FullName; if (File.Exists(rfile)) { var targetr = Path.ChangeExtension(options.OutputFile, ".r").Replace("\\", "/"); var content = File.ReadAllText(rfile).Replace("$$workspace", Path.GetDirectoryName(Path.GetFullPath(options.OutputFile)).Replace("\\", "/")) .Replace("$$positionfile", Path.GetFileName(options.OutputFile).Replace("\\", "/")); File.WriteAllText(targetr, content); if (File.Exists(targetr)) { SystemUtils.Execute("R", "--vanilla -f " + targetr); } } return(new string[] { Path.GetFullPath(options.OutputFile) }); }