Example #1
0
 /// <summary>
 /// Load known CN data from a .bed file.  File lines have fields:
 /// chromosome, start, end, chromcountA, chromcountB
 /// So, copy number is the sum of the last 2 fields, major chromosome count is the max of the last 2 fields.
 /// </summary>
 /// <param name="oracleBedPath"></param>
 protected Dictionary<string, List<CNInterval>> LoadIntervalsFromBed(string oracleBedPath, bool getCN)
 {
     bool stripChr = false;
     int count = 0;
     long totalBases = 0;
     Dictionary<string, List<CNInterval>> bedIntervals = new Dictionary<string, List<CNInterval>>();
     using (StreamReader reader = new StreamReader(oracleBedPath))
     {
         while (true)
         {
             string fileLine = reader.ReadLine();
             if (fileLine == null) break;
             if (fileLine.Length == 0 || fileLine[0] == '#') continue;
             string[] bits = fileLine.TrimEnd('\t').Split('\t');
             if (bits.Length < 3) continue;
             string chromosome = bits[0];
             if (stripChr) chromosome = chromosome.Replace("chr", "");
             if (!bedIntervals.ContainsKey(chromosome)) bedIntervals[chromosome] = new List<CNInterval>();
             CNInterval interval = new CNInterval();
             interval.Start = int.Parse(bits[1]);
             interval.End = int.Parse(bits[2]);
             if (getCN) // bits.Length >= 5)
             {
                 interval.CN = int.Parse(bits[3]) + int.Parse(bits[4]);
             }
             totalBases += interval.Length;
             bedIntervals[chromosome].Add(interval);
             count++;
         }
     }
     Console.WriteLine(">>>Loaded {0} CN intervals ({1} bases)", count, totalBases);
     return bedIntervals;
 }
Example #2
0
        /// <summary>
        /// Load known CN data from a .bed file.  File lines have fields:
        /// chromosome, start, end, chromcountA, chromcountB
        /// So, copy number is the sum of the last 2 fields, major chromosome count is the max of the last 2 fields.
        /// </summary>
        /// <param name="oracleBedPath"></param>
        /// <param name="getCn"></param>
        /// <param name="heterogeneityFraction"></param>
        protected static Dictionary <string, List <CNInterval> > LoadIntervalsFromBed(string oracleBedPath, bool getCn, double heterogeneityFraction)
        {
            bool stripChr   = false;
            int  count      = 0;
            long totalBases = 0;
            Dictionary <string, List <CNInterval> > bedIntervals = new Dictionary <string, List <CNInterval> >();

            using (FileStream stream = new FileStream(oracleBedPath, FileMode.Open, FileAccess.Read))
                using (StreamReader reader = new StreamReader(stream))
                {
                    while (true)
                    {
                        string fileLine = reader.ReadLine();
                        if (fileLine == null)
                        {
                            break;
                        }
                        if (fileLine.Length == 0 || fileLine[0] == '#')
                        {
                            continue;
                        }
                        string[] bits = fileLine.TrimEnd('\t').Split('\t');
                        if (bits.Length < 3)
                        {
                            continue;
                        }
                        string chromosome = bits[0];
                        if (stripChr)
                        {
                            chromosome = chromosome.Replace("chr", "");
                        }
                        if (!bedIntervals.ContainsKey(chromosome))
                        {
                            bedIntervals[chromosome] = new List <CNInterval>();
                        }
                        CNInterval interval = new CNInterval(chromosome);
                        interval.Start = int.Parse(bits[1]);
                        interval.End   = int.Parse(bits[2]);
                        if (getCn) // bits.Length >= 5)
                        {
                            if (heterogeneityFraction < 1 && bits.Length > 5 && int.Parse(bits[3]) == 1 && int.Parse(bits[4]) == 1)
                            {
                                if (heterogeneityFraction > double.Parse(bits[5]))
                                {
                                    continue;
                                }
                            }
                            interval.Cn = int.Parse(bits[3]) + int.Parse(bits[4]);
                        }
                        totalBases += interval.Length;
                        bedIntervals[chromosome].Add(interval);
                        count++;
                    }
                }
            Console.WriteLine(">>>Loaded {0} CN intervals ({1} bases)", count, totalBases);
            return(bedIntervals);
        }
Example #3
0
        private static CNInterval ParseCnInterval(string fileLine)
        {
            string[]   bits       = fileLine.Split('\t');
            string     chromosome = bits[0];
            CNInterval interval   = new CNInterval(chromosome)
            {
                Start = int.Parse(bits[1]),
                Cn    = -1
            };

            string[] infoBits = bits[7].Split(';');
            foreach (string subBit in infoBits)
            {
                if (subBit.StartsWith("CN="))
                {
                    float tempCn = float.Parse(subBit.Substring(3));
                    if (subBit.EndsWith(".5"))
                    {
                        interval.Cn = (int)Math.Round(tempCn + 0.1); // round X.5 up to X+1
                    }
                    else
                    {
                        interval.Cn = (int)Math.Round(tempCn); // Round off
                    }
                }
                if (subBit.StartsWith("END="))
                {
                    interval.End = int.Parse(subBit.Substring(4));
                }
            }
            // Parse CN from Canvas output:
            if (bits.Length > 8)
            {
                string[] subBits  = bits[8].Split(':');
                string[] subBits2 = bits[9].Split(':');
                for (int subBitIndex = 0; subBitIndex < subBits.Length; subBitIndex++)
                {
                    if (subBits[subBitIndex] == "CN")
                    {
                        interval.Cn = int.Parse(subBits2[subBitIndex]);
                    }
                }
            }
            if (interval.End == 0 || interval.Cn < 0)
            {
                throw new ArgumentException("Invalid record. End cannot be 0 and CN must be >= 0");
            }

            return(interval);
        }
Example #4
0
        protected void LoadKnownCNVCF(string oracleVCFPath)
        {
            bool stripChr = false;

            // Load our "oracle" of known copy numbers:
            this.KnownCN = new Dictionary<string, List<CNInterval>>();
            int count = 0;
            using (GzipReader reader = new GzipReader(oracleVCFPath))
            {
                while (true)
                {
                    string fileLine = reader.ReadLine();
                    if (fileLine == null) break;
                    if (fileLine.Length == 0 || fileLine[0] == '#') continue;
                    string[] bits = fileLine.Split('\t');
                    string chromosome = bits[0];
                    if (stripChr) chromosome = chromosome.Replace("chr", "");
                    if (!KnownCN.ContainsKey(chromosome)) KnownCN[chromosome] = new List<CNInterval>();
                    CNInterval interval = new CNInterval();
                    interval.Start = int.Parse(bits[1]);
                    interval.CN = -1;
                    string[] infoBits = bits[7].Split(';');
                    foreach (string subBit in infoBits)
                    {
                        if (subBit.StartsWith("CN="))
                        {
                            float tempCN = float.Parse(subBit.Substring(3));
                            if (subBit.EndsWith(".5"))
                            {
                                interval.CN = (int)Math.Round(tempCN + 0.1); // round X.5 up to X+1
                            }
                            else
                            {
                                interval.CN = (int)Math.Round(tempCN); // Round off
                            }
                        }
                        if (subBit.StartsWith("END="))
                        {
                            interval.End = int.Parse(subBit.Substring(4));
                        }
                    }
                    // Parse CN from Canvas output:
                    if (bits.Length > 8)
                    {
                        string[] subBits = bits[8].Split(':');
                        string[] subBits2 = bits[9].Split(':');
                        for (int subBitIndex = 0; subBitIndex < subBits.Length; subBitIndex++)
                        {
                            if (subBits[subBitIndex] == "CN")
                            {
                                interval.CN = int.Parse(subBits2[subBitIndex]);
                            }
                        }
                    }
                    if (interval.End == 0 || interval.CN < 0)
                    {
                        Console.WriteLine("Error - bogus record!");
                        Console.WriteLine(fileLine);
                    }
                    else
                    {
                        KnownCN[chromosome].Add(interval);
                        count++;
                    }
                }
            }
            Console.WriteLine(">>>Loaded {0} known-CN intervals", count);
        }
Example #5
0
        protected void LoadKnownCNVCF(string oracleVCFPath)
        {
            bool stripChr = false;

            // Load our "oracle" of known copy numbers:
            this.KnownCN = new Dictionary <string, List <CNInterval> >();
            int count = 0;

            using (GzipReader reader = new GzipReader(oracleVCFPath))
            {
                while (true)
                {
                    string fileLine = reader.ReadLine();
                    if (fileLine == null)
                    {
                        break;
                    }
                    if (fileLine.Length == 0 || fileLine[0] == '#')
                    {
                        continue;
                    }
                    string[] bits       = fileLine.Split('\t');
                    string   chromosome = bits[0];
                    if (stripChr)
                    {
                        chromosome = chromosome.Replace("chr", "");
                    }
                    if (!KnownCN.ContainsKey(chromosome))
                    {
                        KnownCN[chromosome] = new List <CNInterval>();
                    }
                    CNInterval interval = new CNInterval();
                    interval.Start = int.Parse(bits[1]);
                    interval.CN    = -1;
                    string[] infoBits = bits[7].Split(';');
                    foreach (string subBit in infoBits)
                    {
                        if (subBit.StartsWith("CN="))
                        {
                            float tempCN = float.Parse(subBit.Substring(3));
                            if (subBit.EndsWith(".5"))
                            {
                                interval.CN = (int)Math.Round(tempCN + 0.1); // round X.5 up to X+1
                            }
                            else
                            {
                                interval.CN = (int)Math.Round(tempCN); // Round off
                            }
                        }
                        if (subBit.StartsWith("END="))
                        {
                            interval.End = int.Parse(subBit.Substring(4));
                        }
                    }
                    // Parse CN from Canvas output:
                    if (bits.Length > 8)
                    {
                        string[] subBits  = bits[8].Split(':');
                        string[] subBits2 = bits[9].Split(':');
                        for (int subBitIndex = 0; subBitIndex < subBits.Length; subBitIndex++)
                        {
                            if (subBits[subBitIndex] == "CN")
                            {
                                interval.CN = int.Parse(subBits2[subBitIndex]);
                            }
                        }
                    }
                    if (interval.End == 0 || interval.CN < 0)
                    {
                        Console.WriteLine("Error - bogus record!");
                        Console.WriteLine(fileLine);
                    }
                    else
                    {
                        KnownCN[chromosome].Add(interval);
                        count++;
                    }
                }
            }
            Console.WriteLine(">>>Loaded {0} known-CN intervals", count);
        }