public void TestReadFromFile() { var counts = new GeneCountTableFormat().ReadFromFile(@"../../../data/genetable.tsv"); Assert.AreEqual(2, counts.GeneValues.Count); Assert.AreEqual(4, counts.Samples.Length); Assert.AreEqual("ENSG00000000003", counts.GeneValues[0][0]); Assert.AreEqual("TSPAN6", counts.GeneValues[0][1]); Assert.AreEqual("IG-001", counts.Samples[0]); Assert.AreEqual(128, counts.Count[0, 0]); Assert.AreEqual(60, counts.Count[0, 1]); Assert.AreEqual(288, counts.Count[0, 2]); Assert.AreEqual(9, counts.Count[0, 3]); }
public GeneCountTable CalculateFPKM(out double[] sampleCounts, out double[] geneLengths) { Progress.SetMessage("Reading gene length from {0} ...", options.GeneLengthFile); var columnNames = FileUtils.ReadColumnNames(options.GeneLengthFile); var lengthIndex = columnNames.ToList().FindIndex(m => m.ToLower().Equals("length")); if (lengthIndex < 0) { throw new Exception("Cannot find length column in file " + options.GeneLengthFile); } var geneLengthMap = new MapItemReader(0, lengthIndex).ReadFromFile(options.GeneLengthFile).ToDictionary(m => m.Key, m => double.Parse(m.Value.Value)); Progress.SetMessage("Reading count table from {0} ...", options.InputFile); var counts = new GeneCountTableFormat().ReadFromFile(options.InputFile); if (!string.IsNullOrEmpty(options.KeyRegex)) { var reg = new Regex(options.KeyRegex); geneLengthMap = geneLengthMap.ToDictionary(l => reg.Match(l.Key).Groups[1].Value, l => l.Value); counts.GeneValues[0][0] = reg.Match(counts.GeneValues[0][0]).Groups[1].Value; } Dictionary<string, double> sampleReads; if (File.Exists(options.SampleReadsFile)) { Progress.SetMessage("Reading sample reads from {0} ...", options.SampleReadsFile); sampleReads = new MapItemReader(0, 1).ReadFromFile(options.SampleReadsFile).ToDictionary(m => m.Key, m => double.Parse(m.Value.Value)); } else //use total mapped reads as total reads { sampleReads = new Dictionary<string, double>(); for (int iSample = 0; iSample < counts.Samples.Length; iSample++) { double itotal = 0.0; for (int iGene = 0; iGene < counts.GeneValues.Count; iGene++) { itotal += counts.Count[iGene, iSample]; } sampleReads[counts.Samples[iSample]] = itotal; } } foreach (var sample in counts.Samples) { if (!sampleReads.ContainsKey(sample)) { throw new Exception(string.Format("No sample {0} found at sample reads file {1}", sample, options.SampleReadsFile)); } } foreach (var geneValues in counts.GeneValues) { if (!geneLengthMap.ContainsKey(geneValues[0])) { throw new Exception(string.Format("No gene {0} found at gene length file {1}", geneValues[0], options.GeneLengthFile)); } } sampleCounts = (from sample in counts.Samples select sampleReads[sample]).ToArray(); geneLengths = (from geneValues in counts.GeneValues select geneLengthMap[geneValues[0]]).ToArray(); for (int iGene = 0; iGene < geneLengths.Length; iGene++) { for (int iSample = 0; iSample < sampleCounts.Length; iSample++) { counts.Count[iGene, iSample] = counts.Count[iGene, iSample] * 1000000000 / (geneLengths[iGene] * sampleCounts[iSample]); } } return counts; }