public List <MultivariateNegativeBinomial> InitializeNegativeBinomialEmission(List <List <double> > data, int nHiddenStates, List <double> haploidMean) { int nDimensions = _nSamples; var variance = new List <double>(nDimensions); var tmpDistributions = new List <MultivariateNegativeBinomial>(); for (int dimension = 0; dimension < nDimensions; dimension++) { double meanHolder = data.Sum(datapoint => datapoint[dimension]); haploidMean.Add(meanHolder / data.Count / 2.0); variance.Add(CanvasCommon.Utilities.Variance(data.Select(x => x[dimension]).ToList())); } // remove outliers double maxThreshold = haploidMean.Max() * nHiddenStates; RemoveOutliers(data, maxThreshold); var maxValues = data.Select(x => Convert.ToInt32(x.Max())).ToList().Max(); for (int CN = 0; CN < nHiddenStates; CN++) { var tmpMean = Vector <double> .Build.Dense(haploidMean.Select(x => Math.Max(CN, 0.1) * x).ToArray()); // if few hidden states, increase the last CN state by diploid rather than haploid increment if (nHiddenStates < 5 && CN - 1 == nHiddenStates) { tmpMean = Vector <double> .Build.Dense(haploidMean.Select(x => Math.Max(CN, 0.5) * x + x).ToArray()); } var tmpDistribution = new MultivariateNegativeBinomial(tmpMean.ToList(), variance, maxValues + 10); tmpDistributions.Add(tmpDistribution); } return(tmpDistributions); }
public List <MultivariateNegativeBinomial> InitializeNegativeBinomialEmission(List <List <double> > data, int nHiddenStates, List <double> haploidMean, List <double> medians = null, List <double> pseudoVariances = null) { int nDimensions = _nSamples; var variance = new List <double>(nDimensions); var tmpDistributions = new List <MultivariateNegativeBinomial>(); for (int dimension = 0; dimension < nDimensions; dimension++) { double median = Math.Max(1d, CanvasCommon.Utilities.Median(data.Select(datapoint => datapoint[dimension]))); if (medians == null) { haploidMean.Add(median / 2.0); variance.Add(CanvasCommon.Utilities.Variance(data.Select(x => x[dimension]).ToList())); } else { haploidMean.Add(medians[dimension] / 2.0); variance.Add(pseudoVariances[dimension]); } } // remove outliers double maxThreshold = haploidMean.Max() * nHiddenStates; RemoveOutliers(data, maxThreshold); var maxValues = data.Select(x => Convert.ToInt32(x.Max())).ToList().Max(); for (int CN = 0; CN < nHiddenStates; CN++) { var tmpMean = Vector <double> .Build.Dense(haploidMean.Select(x => Math.Max(CN, 0.1) * x).ToArray()); // if few hidden states, increase the last CN state by diploid rather than haploid increment if (nHiddenStates < 5 && CN - 1 == nHiddenStates) { tmpMean = Vector <double> .Build.Dense(haploidMean.Select(x => Math.Max(CN, 0.5) * x + x).ToArray()); } //var tmpVar = pseudoVariances == null ? // variance // : variance.Select(x => (Math.Max(CN, 0.5) / 2d) * (Math.Max(CN, 0.5) / 2d) * x); var tmpVar = variance; var tmpDistribution = new MultivariateNegativeBinomial(tmpMean.ToList(), tmpVar.ToList(), maxValues + 10); tmpDistributions.Add(tmpDistribution); } return(tmpDistributions); }