private static Dictionary<string, Dictionary<string, FeatureItemGroup>> PrepareDictionary() { var fileDic = new Dictionary<string, Dictionary<string, FeatureItemGroup>>(); // in sample 1, three groups were mapped with three different queries fileDic["Sample1"] = new Dictionary<string, FeatureItemGroup>(); fileDic["Sample1"]["tRNA1"] = new FeatureItemGroup(new FeatureItem() { Name = "tRNA:chr1.tRNA35-GlyGCC" }); fileDic["Sample1"]["tRNA1"][0].Locations.Add(new FeatureLocation()); fileDic["Sample1"]["tRNA1"][0].Locations[0].SamLocations.Add(new FeatureSamLocation(fileDic["Sample1"]["tRNA1"][0].Locations[0]) { SamLocation = new Sam.SAMAlignedLocation(new Sam.SAMAlignedItem() { Qname = "Query1" }) }); fileDic["Sample1"]["tRNA2"] = new FeatureItemGroup(new FeatureItem() { Name = "tRNA:chr2.tRNA36-GlyGCC" }); fileDic["Sample1"]["tRNA2"][0].Locations.Add(new FeatureLocation()); fileDic["Sample1"]["tRNA2"][0].Locations[0].SamLocations.Add(new FeatureSamLocation(fileDic["Sample1"]["tRNA2"][0].Locations[0]) { SamLocation = new Sam.SAMAlignedLocation(new Sam.SAMAlignedItem() { Qname = "Query2" }) }); fileDic["Sample1"]["tRNA3"] = new FeatureItemGroup(new FeatureItem() { Name = "tRNA:chr1.tRNA111-HisGTG" }); fileDic["Sample1"]["tRNA3"][0].Locations.Add(new FeatureLocation()); fileDic["Sample1"]["tRNA3"][0].Locations[0].SamLocations.Add(new FeatureSamLocation(fileDic["Sample1"]["tRNA3"][0].Locations[0]) { SamLocation = new Sam.SAMAlignedLocation(new Sam.SAMAlignedItem() { Qname = "Query3" }) }); // in sample 2, two groups were mapped with two different queries. The first group contains two tRNA with same code. fileDic["Sample2"] = new Dictionary<string, FeatureItemGroup>(); fileDic["Sample2"]["tRNA1"] = new FeatureItemGroup(new FeatureItem() { Name = "tRNA:chr1.tRNA35-GlyGCC" }); fileDic["Sample2"]["tRNA1"][0].Locations.Add(new FeatureLocation()); fileDic["Sample2"]["tRNA1"][0].Locations[0].SamLocations.Add(new FeatureSamLocation(fileDic["Sample2"]["tRNA1"][0].Locations[0]) { SamLocation = new Sam.SAMAlignedLocation(new Sam.SAMAlignedItem() { Qname = "Query4" }) }); fileDic["Sample2"]["tRNA1"].Add(new FeatureItem() { Name = "tRNA:chr2.tRNA36-GlyGCC" }); fileDic["Sample2"]["tRNA1"][1].Locations.Add(new FeatureLocation()); fileDic["Sample2"]["tRNA1"][1].Locations[0].SamLocations.Add(new FeatureSamLocation(fileDic["Sample2"]["tRNA1"][1].Locations[0]) { SamLocation = new Sam.SAMAlignedLocation(new Sam.SAMAlignedItem() { Qname = "Query4" }) }); fileDic["Sample2"]["tRNA3"] = new FeatureItemGroup(new FeatureItem() { Name = "tRNA:chr1.tRNA111-HisGTG" }); fileDic["Sample2"]["tRNA3"][0].Locations.Add(new FeatureLocation()); fileDic["Sample2"]["tRNA3"][0].Locations[0].SamLocations.Add(new FeatureSamLocation(fileDic["Sample2"]["tRNA3"][0].Locations[0]) { SamLocation = new Sam.SAMAlignedLocation(new Sam.SAMAlignedItem() { Qname = "Query5" }) }); return fileDic; }
public static Dictionary <string, Dictionary <string, FeatureItemGroup> > GroupByIdenticalQuery(Dictionary <string, Dictionary <string, FeatureItemGroup> > dic) { var featureNames = (from d in dic.Values from fig in d.Values from fi in fig select fi.Name).Distinct().ToList(); var map = new Dictionary <string, FeatureItem>(); foreach (var featureName in featureNames) { var item = new FeatureItem() { Name = featureName }; item.Locations.Add(new FeatureLocation()); map[featureName] = item; } foreach (var d in dic.Values) { foreach (var fig in d.Values) { foreach (var fi in fig) { var item = map[fi.Name]; item.Locations[0].SamLocations.AddRange(from l in fi.Locations from ll in l.SamLocations select ll); } } } var groups = map.Values.GroupByIdenticalQuery(); var result = new Dictionary <string, Dictionary <string, FeatureItemGroup> >(); foreach (var sample in dic.Keys) { var oldfeatures = (from v in dic[sample].Values from vv in v select vv).ToDictionary(m => m.Name); var newdic = new Dictionary <string, FeatureItemGroup>(); result[sample] = newdic; foreach (var g in groups) { var findfeatures = (from item in g where oldfeatures.ContainsKey(item.Name) select oldfeatures[item.Name]).ToList(); if (findfeatures.Count > 0) { var newgroup = new FeatureItemGroup(); newgroup.AddRange(findfeatures); newdic[g.DisplayName] = newgroup; } } } return(result); }
protected void OutputCount(StreamWriter sw, FeatureItemGroup feature, List<string> samples, int offset, bool hasNTA, string indexSuffix, string removeNamePrefix) { Func<FeatureSamLocation, bool> acceptOffset = m => MirnaConsts.NO_OFFSET == offset || m.Offset == offset; if (!feature.Any(l => l.Locations.Any(k => k.SamLocations.Any(g => acceptOffset(g))))) { return; } var featureName = (from f in feature select f.Name.StringAfter(removeNamePrefix)).Merge(";"); var featureSequences = (from l in feature select l.Sequence).ToArray(); var sequence = featureSequences.Distinct().Count() == 1 ? featureSequences.First() : featureSequences.Merge(";"); if (!hasNTA) { var counts = (from sample in samples select feature.GetEstimatedCount(m => m.SamLocation.Parent.Sample.Equals(sample) && acceptOffset(m))).ToArray(); if (counts.Any(l => l >= 0.05)) { sw.WriteLine("{0}{1}\t{2}\t{3}\t{4}", featureName, indexSuffix, feature.DisplayLocations, sequence, (from count in counts select string.Format("{0:0.#}", count)).Merge("\t")); } } else { var ntas = (from m in feature from mr in m.Locations from l in mr.SamLocations where acceptOffset(l) select l.SamLocation.Parent.ClippedNTA).Distinct().OrderBy(m => m).ToList(); foreach (var nta in ntas) { var counts = (from sample in samples select feature.GetEstimatedCount(m => m.SamLocation.Parent.Sample.Equals(sample) && acceptOffset(m) && m.SamLocation.Parent.ClippedNTA.Equals(nta))).ToArray(); if (counts.Any(l => l >= 0.05)) { sw.Write("{0}{1}_NTA_{2}\t{3}\t{4}\t{5}", featureName, indexSuffix, nta, feature.DisplayLocations, sequence, (from count in counts select string.Format("{0:0.#}", count)).Merge("\t")); } } } }
protected void OutputCount(StreamWriter sw, FeatureItemGroup feature, List <string> samples, int offset, bool hasNTA, string indexSuffix, string removeNamePrefix) { Func <FeatureSamLocation, bool> acceptOffset = m => MirnaConsts.NO_OFFSET == offset || m.Offset == offset; if (!feature.Any(l => l.Locations.Any(k => k.SamLocations.Any(g => acceptOffset(g))))) { return; } var featureName = (from f in feature select f.Name.StringAfter(removeNamePrefix)).Merge(";"); var featureSequences = (from l in feature select l.Sequence).ToArray(); var sequence = featureSequences.Distinct().Count() == 1 ? featureSequences.First() : featureSequences.Merge(";"); if (!hasNTA) { var counts = (from sample in samples select feature.GetEstimatedCount(m => m.SamLocation.Parent.Sample.Equals(sample) && acceptOffset(m))).ToArray(); if (counts.Any(l => l >= 0.05)) { sw.WriteLine("{0}{1}\t{2}\t{3}\t{4}", featureName, indexSuffix, feature.DisplayLocations, sequence, (from count in counts select string.Format("{0:0.#}", count)).Merge("\t")); } } else { var ntas = (from m in feature from mr in m.Locations from l in mr.SamLocations where acceptOffset(l) select l.SamLocation.Parent.ClippedNTA).Distinct().OrderBy(m => m).ToList(); foreach (var nta in ntas) { var counts = (from sample in samples select feature.GetEstimatedCount(m => m.SamLocation.Parent.Sample.Equals(sample) && acceptOffset(m) && m.SamLocation.Parent.ClippedNTA.Equals(nta))).ToArray(); if (counts.Any(l => l >= 0.05)) { sw.Write("{0}{1}_NTA_{2}\t{3}\t{4}\t{5}", featureName, indexSuffix, nta, feature.DisplayLocations, sequence, (from count in counts select string.Format("{0:0.#}", count)).Merge("\t")); } } } }
public static List<FeatureItemGroup> ConvertToGroup(this IEnumerable<FeatureItem> items) { var result = new List<FeatureItemGroup>(); foreach (var curItem in items) { var group = new FeatureItemGroup(); group.Add(curItem); result.Add(group); } return result; }
public static List<FeatureItemGroup> GroupByFunction(this IEnumerable<FeatureItem> items, Func<FeatureItem, string> func, bool updateName = false) { var result = new List<FeatureItemGroup>(); var dic = items.GroupBy(m => func(m)).ToList(); foreach (var curItems in dic) { var group = new FeatureItemGroup(); group.AddRange(from item in curItems orderby item.Name select item); if (updateName) { group.DisplayName = func(curItems.First()); } result.Add(group); } return result; }
public static Dictionary<string, Dictionary<string, FeatureItemGroup>> GroupByIdenticalQuery(Dictionary<string, Dictionary<string, FeatureItemGroup>> dic) { var featureNames = (from d in dic.Values from fig in d.Values from fi in fig select fi.Name).Distinct().ToList(); var map = new Dictionary<string, FeatureItem>(); foreach (var featureName in featureNames) { var item = new FeatureItem() { Name = featureName }; item.Locations.Add(new FeatureLocation()); map[featureName] = item; } foreach (var d in dic.Values) { foreach (var fig in d.Values) { foreach (var fi in fig) { var item = map[fi.Name]; item.Locations[0].SamLocations.AddRange(from l in fi.Locations from ll in l.SamLocations select ll); } } } var groups = map.Values.GroupByIdenticalQuery(); var result = new Dictionary<string, Dictionary<string, FeatureItemGroup>>(); foreach (var sample in dic.Keys) { var oldfeatures = (from v in dic[sample].Values from vv in v select vv).ToDictionary(m => m.Name); var newdic = new Dictionary<string, FeatureItemGroup>(); result[sample] = newdic; foreach (var g in groups) { var findfeatures = (from item in g where oldfeatures.ContainsKey(item.Name) select oldfeatures[item.Name]).ToList(); if (findfeatures.Count > 0) { var newgroup = new FeatureItemGroup(); newgroup.AddRange(findfeatures); newdic[g.DisplayName] = newgroup; } } } return result; }