public static List<FeatureItem> GroupByName(this IEnumerable<FeatureLocation> locations) { var result = new List<FeatureItem>(); foreach (var curregions in locations.GroupBy(m => m.Name).ToList()) { var mi = new FeatureItem(); mi.Name = curregions.Key; mi.Locations.AddRange(curregions); result.Add(mi); } return result; }
public static void CombineLocationByMappedReads(this FeatureItem item) { //deal with the item with multiple regions but one of them contains others if (item.Locations.Count > 1) { var removed = new List <FeatureLocation>(); for (int i = 0; i < item.Locations.Count; i++) { var regi = item.Locations[i]; for (int j = i + 1; j < item.Locations.Count; j++) { var regj = item.Locations[j]; if (removed.Contains(regj)) { continue; } var con = regi.Contains(regj); //Keep the small one if two ranges mapped by same reads if (con == 1) { //if i contains j and all mapped reads mapped to j, remove i if (regi.SamLocations.All(m => regj.SamLocations.Any(l => l.SamLocation == m.SamLocation))) { removed.Add(regi); break; } //if i contains j and all mapped reads from j were contained in i, remove j if (regj.SamLocations.All(m => regi.SamLocations.Any(l => l.SamLocation == m.SamLocation))) { removed.Add(regj); continue; } } else if (con == -1) { //if j contains i and all mapped reads mapped to i, remove j if (regj.SamLocations.All(m => regi.SamLocations.Any(l => l.SamLocation == m.SamLocation))) { removed.Add(regj); continue; } //if j contains i and all mapped reads from i were contained in j, remove i if (regi.SamLocations.All(m => regj.SamLocations.Any(l => l.SamLocation == m.SamLocation))) { removed.Add(regi); break; } } } } foreach (var floc in removed) { foreach (var sloc in floc.SamLocations) { sloc.SamLocation.Parent.RemoveLocation(sloc.SamLocation); } } item.Locations.RemoveAll(m => removed.Contains(m)); } }
public virtual List <FeatureItemGroup> ReadFromFile(string fileName) { var result = new List <FeatureItemGroup>(); using (XmlReader source = XmlReader.Create(fileName)) { Progress.SetMessage("reading queries ..."); List <SAMAlignedItem> queries = SAMAlignedItemUtils.ReadFrom(source); Progress.SetMessage("{0} queries read.", queries.Count); var qmmap = queries.ToSAMAlignedLocationMap(); queries.Clear(); Progress.SetMessage("reading subjects ..."); string value; source.ReadToFollowing("subjectResult"); if (source.ReadToDescendant("subjectGroup")) { do { var featureGroup = new FeatureItemGroup(); result.Add(featureGroup); if (source.ReadToDescendant("subject")) { do { var item = new FeatureItem(); featureGroup.Add(item); item.Name = source.GetAttribute("name"); if (source.ReadToDescendant("region")) { do { var fl = new FeatureLocation(); item.Locations.Add(fl); fl.Name = item.Name; fl.Seqname = source.GetAttribute("seqname"); fl.Start = long.Parse(source.GetAttribute("start")); fl.End = long.Parse(source.GetAttribute("end")); fl.Strand = source.GetAttribute("strand")[0]; fl.Sequence = source.GetAttribute("sequence"); value = source.GetAttribute("query_count_before_filter"); if (value != null) { fl.QueryCountBeforeFilter = int.Parse(value); } value = source.GetAttribute("pvalue"); if (value != null) { fl.PValue = double.Parse(value); } if (source.ReadToDescendant("query")) { do { string qname = source.GetAttribute("qname"); string loc = source.GetAttribute("loc"); string key = SAMAlignedLocation.GetKey(qname, loc); SAMAlignedLocation query = qmmap[key]; FeatureSamLocation fsl = new FeatureSamLocation(fl); fsl.SamLocation = query; fsl.Offset = int.Parse(source.GetAttribute("offset")); var attr = source.GetAttribute("overlap"); if (attr == null) { fsl.OverlapPercentage = query.OverlapPercentage(fl); } else { fsl.OverlapPercentage = double.Parse(attr); } var nmi = source.GetAttribute("nmi"); if (nmi != null) { fsl.NumberOfMismatch = int.Parse(nmi); } var nnpm = source.GetAttribute("nnpm"); if (nnpm != null) { fsl.NumberOfNoPenaltyMutation = int.Parse(nnpm); } } while (source.ReadToNextSibling("query")); } } while (source.ReadToNextSibling("region")); } } while (source.ReadToNextSibling("subject")); } } while (source.ReadToNextSibling("subjectGroup")); } qmmap.Clear(); } Progress.SetMessage("{0} subjects read.", result.Count); return(result); }
public List <FeatureItemGroup> ReadFromFile(string fileName) { Console.WriteLine("read file {0} ...", fileName); var result = new List <FeatureItemGroup>(); XElement root = XElement.Load(fileName); //Console.WriteLine("read locations ..."); Dictionary <string, SAMAlignedLocation> qmmap = root.ToSAMAlignedItems().ToSAMAlignedLocationMap(); //Console.WriteLine("read mapped items ..."); foreach (XElement groupEle in root.Element("subjectResult").Elements("subjectGroup")) { var group = new FeatureItemGroup(); result.Add(group); foreach (XElement featureEle in groupEle.Elements("subject")) { var item = new FeatureItem(); group.Add(item); item.Name = featureEle.Attribute("name").Value; foreach (XElement locEle in featureEle.Elements("region")) { var fl = new FeatureLocation(); item.Locations.Add(fl); fl.Name = item.Name; fl.ParseLocation(locEle); if (locEle.Attribute("sequence") != null) { fl.Sequence = locEle.Attribute("sequence").Value; } if (locEle.Attribute("query_count_before_filter") != null) { fl.QueryCountBeforeFilter = int.Parse(locEle.Attribute("query_count_before_filter").Value); } if (locEle.Attribute("pvalue") != null) { fl.PValue = double.Parse(locEle.Attribute("pvalue").Value); } foreach (XElement queryEle in locEle.Elements("query")) { string qname = queryEle.Attribute("qname").Value; string loc = queryEle.Attribute("loc").Value; string key = SAMAlignedLocation.GetKey(qname, loc); SAMAlignedLocation query = qmmap[key]; FeatureSamLocation fsl = new FeatureSamLocation(fl); fsl.SamLocation = query; var attr = queryEle.FindAttribute("overlap"); if (attr == null) { fsl.OverlapPercentage = query.OverlapPercentage(fl); } else { fsl.OverlapPercentage = double.Parse(attr.Value); } var nnpm = queryEle.FindAttribute("nnpm"); if (nnpm == null) { nnpm = queryEle.FindAttribute("nnmp"); } if (nnpm != null) { fsl.NumberOfNoPenaltyMutation = int.Parse(nnpm.Value); } var nmi = queryEle.FindAttribute("nmi"); if (nmi != null) { fsl.NumberOfMismatch = int.Parse(nmi.Value); } } } } } qmmap.Clear(); return(result); }
public static Dictionary<string, Dictionary<string, FeatureItemGroup>> GroupByIdenticalQuery(Dictionary<string, Dictionary<string, FeatureItemGroup>> dic) { var featureNames = (from d in dic.Values from fig in d.Values from fi in fig select fi.Name).Distinct().ToList(); var map = new Dictionary<string, FeatureItem>(); foreach (var featureName in featureNames) { var item = new FeatureItem() { Name = featureName }; item.Locations.Add(new FeatureLocation()); map[featureName] = item; } foreach (var d in dic.Values) { foreach (var fig in d.Values) { foreach (var fi in fig) { var item = map[fi.Name]; item.Locations[0].SamLocations.AddRange(from l in fi.Locations from ll in l.SamLocations select ll); } } } var groups = map.Values.GroupByIdenticalQuery(); var result = new Dictionary<string, Dictionary<string, FeatureItemGroup>>(); foreach (var sample in dic.Keys) { var oldfeatures = (from v in dic[sample].Values from vv in v select vv).ToDictionary(m => m.Name); var newdic = new Dictionary<string, FeatureItemGroup>(); result[sample] = newdic; foreach (var g in groups) { var findfeatures = (from item in g where oldfeatures.ContainsKey(item.Name) select oldfeatures[item.Name]).ToList(); if (findfeatures.Count > 0) { var newgroup = new FeatureItemGroup(); newgroup.AddRange(findfeatures); newdic[g.DisplayName] = newgroup; } } } return result; }
public static string GetTRNAAminoacid(FeatureItem item) { return GetTRNAAminoacid(item.Name); }
public static string GetTRNACode(FeatureItem item) { return GetTRNACode(item.Name); }