Пример #1
0
        public LocusAggregate __call__(int locus_idx)
        {
            /*
             * Create a new LocusAggregate representing data from a single locus across all samples
             *
             * Args:
             *  locus_idx(int): Global locus index(will be automatically adjusted by relative offset in constructor)
             *
             *
             * Returns:
             * LocusAggregate: Data for a single locus aggregated across all samples
             */

            LocusAggregate locus_aggregate = new LocusAggregate();

            int relative_locus_idx = locus_idx - this.relative_offset;

            foreach (LocusAggregate sample_buffer in this.buffer)
            {
                locus_aggregate.genotypes.Add(
                    sample_buffer.genotypes[relative_locus_idx]);
                locus_aggregate.b_allele_freqs.Add(
                    sample_buffer.b_allele_freqs[relative_locus_idx]);
                locus_aggregate.log_r_ratios.Add(
                    sample_buffer.log_r_ratios[relative_locus_idx]);
                locus_aggregate.x_intensities.Add(
                    sample_buffer.x_intensities[relative_locus_idx]);
                locus_aggregate.y_intensities.Add(
                    sample_buffer.y_intensities[relative_locus_idx]);
                locus_aggregate.transforms.Add(
                    sample_buffer.transforms[relative_locus_idx]);
            }
            return(locus_aggregate);
        }
Пример #2
0
        public LocusAggregate __call__(GenotypeCalls sample_data)
        {
            int            locus_offset    = this.locus_offset;
            int            loci_buffer     = this.loci_buffer_size;
            LocusAggregate locus_aggregate = new LocusAggregate();

            locus_aggregate.genotypes = new List <byte>(sample_data.get_genotypes(
                                                            locus_offset, loci_buffer_size)).Cast <int>().ToList();
            locus_aggregate.scores = sample_data.get_genotype_scores(
                locus_offset, loci_buffer_size).Cast <float?>().ToList();
            if (sample_data.version >= 4)
            {
                locus_aggregate.b_allele_freqs = sample_data.get_ballele_freqs(locus_offset, loci_buffer_size).Cast <float?>().ToList();
                locus_aggregate.log_r_ratios   = sample_data.get_logr_ratios(locus_offset, loci_buffer_size).Cast <float?>().ToList();
            }
            else
            {
                locus_aggregate.b_allele_freqs = Enumerable.Repeat <float?>(null, loci_buffer_size).ToList();
                locus_aggregate.log_r_ratios   = Enumerable.Repeat <float?>(null, loci_buffer_size).ToList();
            }
            locus_aggregate.x_intensities = sample_data.get_raw_x_intensities(locus_offset, loci_buffer_size).Cast <float?>().ToList();
            locus_aggregate.y_intensities = sample_data.get_raw_y_intensities(locus_offset, loci_buffer_size).Cast <float?>().ToList();
            List <NormalizationTransform> transforms = sample_data.get_normalization_transforms();

            locus_aggregate.transforms = this.normalization_lookups.Skip(locus_offset).Take(loci_buffer_size).Select(x => transforms[x]).ToList();
            return(locus_aggregate);
        }
Пример #3
0
        public static IEnumerable <List <object> > aggregate_samples(List <GenotypeCalls> samples, IEnumerable <int> loci, Func <LocusAggregate, object> callback,
                                                                     List <int> normalization_lookups, int bin_size = 100000000)
        {
            /*
             *  Generate LocusAggregate information from a collection of samples. Will call the callback
             * function for a LocusAggregate object for each specified locus index and yield the result.
             *
             * Args:
             *  samples(list(GenotypeCalls)): The samples to aggregate for each locus
             *
             * loci(iter(int)): Enumerates the loci indices of interest(must be sorted in ascending order)
             *
             * callback(func): A function that takes a LocusAggregate and return a new result
             *  bin_size(int): Used to determine how much data will be loaded into memory at one time. Larger bin size will use more memory and(generally) run faster. This bin_size already accounts for how many samples are being handled.
             *
             * Yields:
             *
             * Result of callback function
             *
             */
            // figure out how many loci to load at once
            int loci_batch_size = (int)(bin_size / (float)samples.Count) + 1;

            foreach (List <int> loci_group in LocusAggregate.group_loci(loci.ToList(), loci_batch_size))
            {
                // read in the buffer for this group of loci
                List <LocusAggregate> buffer = LocusAggregate.load_buffer(
                    samples, loci_group[0], loci_group[-1] - loci_group[0] + 1, normalization_lookups);

                // generate corresponding locus aggregates
                List <LocusAggregate> aggregates = loci_group.Select(x => new GenerateLocusAggregate(buffer, loci_group[0]).__call__(x)).ToList();

                foreach (List <object> result in aggregates.Select(x => callback(x)).ToList())
                {
                    yield return(result);
                }
            }
        }