Пример #1
0
        public override IEnumerable <string> Process()
        {
            var format = new MappedItemGroupXmlFileFormat();

            Progress.SetMessage("reading mapped reads from " + _options.CountFile + " ...");
            var mapped = format.ReadFromFile(_options.CountFile);

            var sequenceLocusSet = new HashSet <string>(from item in mapped
                                                        from mi in item
                                                        from mr in mi.MappedRegions
                                                        from al in mr.AlignedLocations
                                                        select string.Format("{0}:{1}:{2}", al.Parent.Sequence, al.Seqname, al.Start));

            Progress.SetMessage("There are {0} unique sequence:locus", sequenceLocusSet.Count);

            using (var sw = new StreamWriter(_options.OutputFile))
            {
                using (var sr = SAMFactory.GetReader(_options.BamFile, false))
                {
                    sr.ReadHeaders().ForEach(m => sw.WriteLine(m));

                    int    count    = 0;
                    int    accepted = 0;
                    string line;
                    while ((line = sr.ReadLine()) != null)
                    {
                        if (count % 1000 == 0)
                        {
                            if (Progress.IsCancellationPending())
                            {
                                throw new UserTerminatedException();
                            }
                        }

                        if (count % 100000 == 0 && count > 0)
                        {
                            Progress.SetMessage("{0} candidates from {1} reads", accepted, count);
                        }

                        count++;

                        var parts = line.Split('\t');

                        var locus = string.Format("{0}:{1}:{2}", parts[SAMFormatConst.SEQ_INDEX], parts[SAMFormatConst.RNAME_INDEX], parts[SAMFormatConst.POS_INDEX]);
                        if (!sequenceLocusSet.Contains(locus))
                        {
                            continue;
                        }

                        sw.WriteLine(line);
                        accepted++;
                    }
                }
            }

            return(new[] { _options.OutputFile });
        }
        public override IEnumerable <string> Process()
        {
            var result    = new List <string>();
            var samformat = _options.GetEngineFormat();

            var format = new MappedItemGroupXmlFileFormat();

            Progress.SetMessage("reading mapped reads from " + _options.InputFile1 + " ...");
            var items1 = format.ReadFromFile(_options.InputFile1);

            Progress.SetMessage("reading mapped reads from " + _options.InputFile2 + " ...");
            var items2 = format.ReadFromFile(_options.InputFile2);

            var reads1 = items1.GetQueries().ToDictionary(m => m.Qname);
            var reads2 = items2.GetQueries().ToDictionary(m => m.Qname);

            var qnames = reads1.Keys.Union(reads2.Keys).Distinct().ToList();

            foreach (var qname in qnames)
            {
                if (!reads1.ContainsKey(qname) || !reads2.ContainsKey(qname))
                {
                    continue;
                }

                var r1  = reads1[qname];
                var r2  = reads2[qname];
                var res = samformat.CompareScore(r1.AlignmentScore, r2.AlignmentScore);
                if (res == 0)
                {
                    items1.RemoveRead(qname);
                    items2.RemoveRead(qname);
                }
                else if (res < 0)
                {
                    items2.RemoveRead(qname);
                }
                else
                {
                    items1.RemoveRead(qname);
                }
            }

            var writer = new MappedItemGroupSequenceWriter();

            SaveItems(items1, _options.OutputFile1, writer, format, result);
            SaveItems(items2, _options.OutputFile2, writer, format, result);

            return(result);
        }
        private static void SaveItems(List <MappedItemGroup> items1, string outputFile, MappedItemGroupSequenceWriter writer,
                                      MappedItemGroupXmlFileFormat format, List <string> result)
        {
            items1.RemoveAll(m => m.QueryCount == 0);
            var xml1 = outputFile + ".xml";

            if (items1.Any(m => m.Name.Contains(".tRNA")))
            {
                items1.SortTRna();
            }

            writer.WriteToFile(outputFile, items1);
            format.WriteToFile(xml1, items1);
            result.Add(outputFile);
            result.Add(xml1);
        }
        public override IEnumerable <string> Process()
        {
            var format = new MappedItemGroupXmlFileFormat();

            using (StreamWriter sw = new StreamWriter(options.OutputFile))
            {
                sw.WriteLine("File\tFeature\tStrand\tCount\tPosition\tPercentage");
                foreach (var file in options.GetCountFiles())
                {
                    var xmlfile = file.File.EndsWith(".xml") ? file.File : file.File + ".mapped.xml";

                    var count = format.ReadFromFile(xmlfile).OrderByDescending(m => m.GetEstimatedCount()).ToList();

                    foreach (var group in count)
                    {
                        var item = group[0];
                        Dictionary <long, double> positionCount = new Dictionary <long, double>();
                        foreach (var region in item.MappedRegions)
                        {
                            foreach (var loc in region.AlignedLocations)
                            {
                                for (long p = loc.Start; p <= loc.End; p++)
                                {
                                    var    offset = region.Region.Strand == '+' ? p - region.Region.Start + 1 : region.Region.End - p + 1;
                                    double v;
                                    if (!positionCount.TryGetValue(offset, out v))
                                    {
                                        v = 0;
                                    }
                                    positionCount[offset] = v + loc.Parent.GetEstimatedCount();
                                }
                            }
                        }

                        var allcount = item.GetEstimatedCount();
                        var keys     = positionCount.Keys.ToList();
                        keys.Sort();
                        foreach (var key in keys)
                        {
                            sw.WriteLine("{0}\t{1}\t{2}\t{3:0.##}\t{4}\t{5:0.00}",
                                         file.Name,
                                         item.Name,
                                         item.MappedRegions.First().Region.Strand,
                                         item.GetEstimatedCount(),
                                         key,
                                         positionCount[key] / allcount);
                        }
                    }
                }
            }

            var rfile = new FileInfo(FileUtils.GetTemplateDir() + "/smallrna_position.r").FullName;

            if (File.Exists(rfile))
            {
                var targetr = Path.ChangeExtension(options.OutputFile, ".r").Replace("\\", "/");
                var content = File.ReadAllText(rfile).Replace("$$workspace", Path.GetDirectoryName(Path.GetFullPath(options.OutputFile)).Replace("\\", "/"))
                              .Replace("$$positionfile", Path.GetFileName(options.OutputFile).Replace("\\", "/"));
                File.WriteAllText(targetr, content);

                if (File.Exists(targetr))
                {
                    SystemUtils.Execute("R", "--vanilla -f " + targetr);
                }
            }

            return(new string[] { Path.GetFullPath(options.OutputFile) });
        }