Exemplo n.º 1
0
        public List <MultivariateNegativeBinomial> InitializeNegativeBinomialEmission(List <List <double> > data, int nHiddenStates, List <double> haploidMean)
        {
            int nDimensions      = _nSamples;
            var variance         = new List <double>(nDimensions);
            var tmpDistributions = new List <MultivariateNegativeBinomial>();

            for (int dimension = 0; dimension < nDimensions; dimension++)
            {
                double meanHolder = data.Sum(datapoint => datapoint[dimension]);
                haploidMean.Add(meanHolder / data.Count / 2.0);
                variance.Add(CanvasCommon.Utilities.Variance(data.Select(x => x[dimension]).ToList()));
            }

            // remove outliers
            double maxThreshold = haploidMean.Max() * nHiddenStates;

            RemoveOutliers(data, maxThreshold);
            var maxValues = data.Select(x => Convert.ToInt32(x.Max())).ToList().Max();

            for (int CN = 0; CN < nHiddenStates; CN++)
            {
                var tmpMean = Vector <double> .Build.Dense(haploidMean.Select(x => Math.Max(CN, 0.1) * x).ToArray());

                // if few hidden states, increase the last CN state by diploid rather than haploid increment
                if (nHiddenStates < 5 && CN - 1 == nHiddenStates)
                {
                    tmpMean = Vector <double> .Build.Dense(haploidMean.Select(x => Math.Max(CN, 0.5) * x + x).ToArray());
                }
                var tmpDistribution = new MultivariateNegativeBinomial(tmpMean.ToList(), variance, maxValues + 10);
                tmpDistributions.Add(tmpDistribution);
            }

            return(tmpDistributions);
        }
Exemplo n.º 2
0
        public List <MultivariateNegativeBinomial> InitializeNegativeBinomialEmission(List <List <double> > data, int nHiddenStates, List <double> haploidMean, List <double> medians = null, List <double> pseudoVariances = null)
        {
            int nDimensions      = _nSamples;
            var variance         = new List <double>(nDimensions);
            var tmpDistributions = new List <MultivariateNegativeBinomial>();

            for (int dimension = 0; dimension < nDimensions; dimension++)
            {
                double median = Math.Max(1d, CanvasCommon.Utilities.Median(data.Select(datapoint => datapoint[dimension])));
                if (medians == null)
                {
                    haploidMean.Add(median / 2.0);
                    variance.Add(CanvasCommon.Utilities.Variance(data.Select(x => x[dimension]).ToList()));
                }
                else
                {
                    haploidMean.Add(medians[dimension] / 2.0);
                    variance.Add(pseudoVariances[dimension]);
                }
            }

            // remove outliers
            double maxThreshold = haploidMean.Max() * nHiddenStates;

            RemoveOutliers(data, maxThreshold);
            var maxValues = data.Select(x => Convert.ToInt32(x.Max())).ToList().Max();

            for (int CN = 0; CN < nHiddenStates; CN++)
            {
                var tmpMean = Vector <double> .Build.Dense(haploidMean.Select(x => Math.Max(CN, 0.1) * x).ToArray());

                // if few hidden states, increase the last CN state by diploid rather than haploid increment
                if (nHiddenStates < 5 && CN - 1 == nHiddenStates)
                {
                    tmpMean = Vector <double> .Build.Dense(haploidMean.Select(x => Math.Max(CN, 0.5) * x + x).ToArray());
                }
                //var tmpVar = pseudoVariances == null ?
                //    variance
                //  : variance.Select(x => (Math.Max(CN, 0.5) / 2d) * (Math.Max(CN, 0.5) / 2d) * x);
                var tmpVar          = variance;
                var tmpDistribution = new MultivariateNegativeBinomial(tmpMean.ToList(), tmpVar.ToList(), maxValues + 10);
                tmpDistributions.Add(tmpDistribution);
            }

            return(tmpDistributions);
        }