예제 #1
0
    public static List<FeatureItem> GroupByName(this IEnumerable<FeatureLocation> locations)
    {
      var result = new List<FeatureItem>();

      foreach (var curregions in locations.GroupBy(m => m.Name).ToList())
      {
        var mi = new FeatureItem();
        mi.Name = curregions.Key;
        mi.Locations.AddRange(curregions);
        result.Add(mi);
      }

      return result;
    }
예제 #2
0
        public static void CombineLocationByMappedReads(this FeatureItem item)
        {
            //deal with the item with multiple regions but one of them contains others
            if (item.Locations.Count > 1)
            {
                var removed = new List <FeatureLocation>();
                for (int i = 0; i < item.Locations.Count; i++)
                {
                    var regi = item.Locations[i];
                    for (int j = i + 1; j < item.Locations.Count; j++)
                    {
                        var regj = item.Locations[j];
                        if (removed.Contains(regj))
                        {
                            continue;
                        }

                        var con = regi.Contains(regj);

                        //Keep the small one if two ranges mapped by same reads
                        if (con == 1)
                        {
                            //if i contains j and all mapped reads mapped to j, remove i
                            if (regi.SamLocations.All(m => regj.SamLocations.Any(l => l.SamLocation == m.SamLocation)))
                            {
                                removed.Add(regi);
                                break;
                            }

                            //if i contains j and all mapped reads from j were contained in i, remove j
                            if (regj.SamLocations.All(m => regi.SamLocations.Any(l => l.SamLocation == m.SamLocation)))
                            {
                                removed.Add(regj);
                                continue;
                            }
                        }
                        else if (con == -1)
                        {
                            //if j contains i and all mapped reads mapped to i, remove j
                            if (regj.SamLocations.All(m => regi.SamLocations.Any(l => l.SamLocation == m.SamLocation)))
                            {
                                removed.Add(regj);
                                continue;
                            }

                            //if j contains i and all mapped reads from i were contained in j, remove i
                            if (regi.SamLocations.All(m => regj.SamLocations.Any(l => l.SamLocation == m.SamLocation)))
                            {
                                removed.Add(regi);
                                break;
                            }
                        }
                    }
                }

                foreach (var floc in removed)
                {
                    foreach (var sloc in floc.SamLocations)
                    {
                        sloc.SamLocation.Parent.RemoveLocation(sloc.SamLocation);
                    }
                }

                item.Locations.RemoveAll(m => removed.Contains(m));
            }
        }
        public virtual List <FeatureItemGroup> ReadFromFile(string fileName)
        {
            var result = new List <FeatureItemGroup>();

            using (XmlReader source = XmlReader.Create(fileName))
            {
                Progress.SetMessage("reading queries ...");

                List <SAMAlignedItem> queries = SAMAlignedItemUtils.ReadFrom(source);

                Progress.SetMessage("{0} queries read.", queries.Count);

                var qmmap = queries.ToSAMAlignedLocationMap();
                queries.Clear();

                Progress.SetMessage("reading subjects ...");
                string value;
                source.ReadToFollowing("subjectResult");
                if (source.ReadToDescendant("subjectGroup"))
                {
                    do
                    {
                        var featureGroup = new FeatureItemGroup();
                        result.Add(featureGroup);

                        if (source.ReadToDescendant("subject"))
                        {
                            do
                            {
                                var item = new FeatureItem();
                                featureGroup.Add(item);
                                item.Name = source.GetAttribute("name");

                                if (source.ReadToDescendant("region"))
                                {
                                    do
                                    {
                                        var fl = new FeatureLocation();
                                        item.Locations.Add(fl);

                                        fl.Name     = item.Name;
                                        fl.Seqname  = source.GetAttribute("seqname");
                                        fl.Start    = long.Parse(source.GetAttribute("start"));
                                        fl.End      = long.Parse(source.GetAttribute("end"));
                                        fl.Strand   = source.GetAttribute("strand")[0];
                                        fl.Sequence = source.GetAttribute("sequence");

                                        value = source.GetAttribute("query_count_before_filter");
                                        if (value != null)
                                        {
                                            fl.QueryCountBeforeFilter = int.Parse(value);
                                        }

                                        value = source.GetAttribute("pvalue");
                                        if (value != null)
                                        {
                                            fl.PValue = double.Parse(value);
                                        }

                                        if (source.ReadToDescendant("query"))
                                        {
                                            do
                                            {
                                                string             qname = source.GetAttribute("qname");
                                                string             loc   = source.GetAttribute("loc");
                                                string             key   = SAMAlignedLocation.GetKey(qname, loc);
                                                SAMAlignedLocation query = qmmap[key];

                                                FeatureSamLocation fsl = new FeatureSamLocation(fl);
                                                fsl.SamLocation = query;

                                                fsl.Offset = int.Parse(source.GetAttribute("offset"));

                                                var attr = source.GetAttribute("overlap");
                                                if (attr == null)
                                                {
                                                    fsl.OverlapPercentage = query.OverlapPercentage(fl);
                                                }
                                                else
                                                {
                                                    fsl.OverlapPercentage = double.Parse(attr);
                                                }

                                                var nmi = source.GetAttribute("nmi");
                                                if (nmi != null)
                                                {
                                                    fsl.NumberOfMismatch = int.Parse(nmi);
                                                }

                                                var nnpm = source.GetAttribute("nnpm");
                                                if (nnpm != null)
                                                {
                                                    fsl.NumberOfNoPenaltyMutation = int.Parse(nnpm);
                                                }
                                            } while (source.ReadToNextSibling("query"));
                                        }
                                    } while (source.ReadToNextSibling("region"));
                                }
                            } while (source.ReadToNextSibling("subject"));
                        }
                    } while (source.ReadToNextSibling("subjectGroup"));
                }
                qmmap.Clear();
            }

            Progress.SetMessage("{0} subjects read.", result.Count);
            return(result);
        }
예제 #4
0
        public List <FeatureItemGroup> ReadFromFile(string fileName)
        {
            Console.WriteLine("read file {0} ...", fileName);
            var result = new List <FeatureItemGroup>();

            XElement root = XElement.Load(fileName);

            //Console.WriteLine("read locations ...");
            Dictionary <string, SAMAlignedLocation> qmmap = root.ToSAMAlignedItems().ToSAMAlignedLocationMap();

            //Console.WriteLine("read mapped items ...");
            foreach (XElement groupEle in root.Element("subjectResult").Elements("subjectGroup"))
            {
                var group = new FeatureItemGroup();
                result.Add(group);

                foreach (XElement featureEle in groupEle.Elements("subject"))
                {
                    var item = new FeatureItem();
                    group.Add(item);
                    item.Name = featureEle.Attribute("name").Value;

                    foreach (XElement locEle in featureEle.Elements("region"))
                    {
                        var fl = new FeatureLocation();
                        item.Locations.Add(fl);

                        fl.Name = item.Name;
                        fl.ParseLocation(locEle);

                        if (locEle.Attribute("sequence") != null)
                        {
                            fl.Sequence = locEle.Attribute("sequence").Value;
                        }

                        if (locEle.Attribute("query_count_before_filter") != null)
                        {
                            fl.QueryCountBeforeFilter = int.Parse(locEle.Attribute("query_count_before_filter").Value);
                        }

                        if (locEle.Attribute("pvalue") != null)
                        {
                            fl.PValue = double.Parse(locEle.Attribute("pvalue").Value);
                        }

                        foreach (XElement queryEle in locEle.Elements("query"))
                        {
                            string             qname = queryEle.Attribute("qname").Value;
                            string             loc   = queryEle.Attribute("loc").Value;
                            string             key   = SAMAlignedLocation.GetKey(qname, loc);
                            SAMAlignedLocation query = qmmap[key];

                            FeatureSamLocation fsl = new FeatureSamLocation(fl);
                            fsl.SamLocation = query;
                            var attr = queryEle.FindAttribute("overlap");
                            if (attr == null)
                            {
                                fsl.OverlapPercentage = query.OverlapPercentage(fl);
                            }
                            else
                            {
                                fsl.OverlapPercentage = double.Parse(attr.Value);
                            }

                            var nnpm = queryEle.FindAttribute("nnpm");
                            if (nnpm == null)
                            {
                                nnpm = queryEle.FindAttribute("nnmp");
                            }
                            if (nnpm != null)
                            {
                                fsl.NumberOfNoPenaltyMutation = int.Parse(nnpm.Value);
                            }

                            var nmi = queryEle.FindAttribute("nmi");
                            if (nmi != null)
                            {
                                fsl.NumberOfMismatch = int.Parse(nmi.Value);
                            }
                        }
                    }
                }
            }
            qmmap.Clear();

            return(result);
        }
예제 #5
0
    public static Dictionary<string, Dictionary<string, FeatureItemGroup>> GroupByIdenticalQuery(Dictionary<string, Dictionary<string, FeatureItemGroup>> dic)
    {
      var featureNames = (from d in dic.Values
                          from fig in d.Values
                          from fi in fig
                          select fi.Name).Distinct().ToList();

      var map = new Dictionary<string, FeatureItem>();
      foreach (var featureName in featureNames)
      {
        var item = new FeatureItem() { Name = featureName };
        item.Locations.Add(new FeatureLocation());
        map[featureName] = item;
      }

      foreach (var d in dic.Values)
      {
        foreach (var fig in d.Values)
        {
          foreach (var fi in fig)
          {
            var item = map[fi.Name];
            item.Locations[0].SamLocations.AddRange(from l in fi.Locations from ll in l.SamLocations select ll);
          }
        }
      }

      var groups = map.Values.GroupByIdenticalQuery();

      var result = new Dictionary<string, Dictionary<string, FeatureItemGroup>>();
      foreach (var sample in dic.Keys)
      {
        var oldfeatures = (from v in dic[sample].Values
                           from vv in v
                           select vv).ToDictionary(m => m.Name);

        var newdic = new Dictionary<string, FeatureItemGroup>();
        result[sample] = newdic;

        foreach (var g in groups)
        {
          var findfeatures = (from item in g
                              where oldfeatures.ContainsKey(item.Name)
                              select oldfeatures[item.Name]).ToList();
          if (findfeatures.Count > 0)
          {
            var newgroup = new FeatureItemGroup();
            newgroup.AddRange(findfeatures);
            newdic[g.DisplayName] = newgroup;
          }
        }
      }

      return result;
    }
예제 #6
0
 public static string GetTRNAAminoacid(FeatureItem item)
 {
   return GetTRNAAminoacid(item.Name);
 }
예제 #7
0
 public static string GetTRNACode(FeatureItem item)
 {
   return GetTRNACode(item.Name);
 }