public LocusAggregate __call__(int locus_idx) { /* * Create a new LocusAggregate representing data from a single locus across all samples * * Args: * locus_idx(int): Global locus index(will be automatically adjusted by relative offset in constructor) * * * Returns: * LocusAggregate: Data for a single locus aggregated across all samples */ LocusAggregate locus_aggregate = new LocusAggregate(); int relative_locus_idx = locus_idx - this.relative_offset; foreach (LocusAggregate sample_buffer in this.buffer) { locus_aggregate.genotypes.Add( sample_buffer.genotypes[relative_locus_idx]); locus_aggregate.b_allele_freqs.Add( sample_buffer.b_allele_freqs[relative_locus_idx]); locus_aggregate.log_r_ratios.Add( sample_buffer.log_r_ratios[relative_locus_idx]); locus_aggregate.x_intensities.Add( sample_buffer.x_intensities[relative_locus_idx]); locus_aggregate.y_intensities.Add( sample_buffer.y_intensities[relative_locus_idx]); locus_aggregate.transforms.Add( sample_buffer.transforms[relative_locus_idx]); } return(locus_aggregate); }
public LocusAggregate __call__(GenotypeCalls sample_data) { int locus_offset = this.locus_offset; int loci_buffer = this.loci_buffer_size; LocusAggregate locus_aggregate = new LocusAggregate(); locus_aggregate.genotypes = new List <byte>(sample_data.get_genotypes( locus_offset, loci_buffer_size)).Cast <int>().ToList(); locus_aggregate.scores = sample_data.get_genotype_scores( locus_offset, loci_buffer_size).Cast <float?>().ToList(); if (sample_data.version >= 4) { locus_aggregate.b_allele_freqs = sample_data.get_ballele_freqs(locus_offset, loci_buffer_size).Cast <float?>().ToList(); locus_aggregate.log_r_ratios = sample_data.get_logr_ratios(locus_offset, loci_buffer_size).Cast <float?>().ToList(); } else { locus_aggregate.b_allele_freqs = Enumerable.Repeat <float?>(null, loci_buffer_size).ToList(); locus_aggregate.log_r_ratios = Enumerable.Repeat <float?>(null, loci_buffer_size).ToList(); } locus_aggregate.x_intensities = sample_data.get_raw_x_intensities(locus_offset, loci_buffer_size).Cast <float?>().ToList(); locus_aggregate.y_intensities = sample_data.get_raw_y_intensities(locus_offset, loci_buffer_size).Cast <float?>().ToList(); List <NormalizationTransform> transforms = sample_data.get_normalization_transforms(); locus_aggregate.transforms = this.normalization_lookups.Skip(locus_offset).Take(loci_buffer_size).Select(x => transforms[x]).ToList(); return(locus_aggregate); }
public static IEnumerable <List <object> > aggregate_samples(List <GenotypeCalls> samples, IEnumerable <int> loci, Func <LocusAggregate, object> callback, List <int> normalization_lookups, int bin_size = 100000000) { /* * Generate LocusAggregate information from a collection of samples. Will call the callback * function for a LocusAggregate object for each specified locus index and yield the result. * * Args: * samples(list(GenotypeCalls)): The samples to aggregate for each locus * * loci(iter(int)): Enumerates the loci indices of interest(must be sorted in ascending order) * * callback(func): A function that takes a LocusAggregate and return a new result * bin_size(int): Used to determine how much data will be loaded into memory at one time. Larger bin size will use more memory and(generally) run faster. This bin_size already accounts for how many samples are being handled. * * Yields: * * Result of callback function * */ // figure out how many loci to load at once int loci_batch_size = (int)(bin_size / (float)samples.Count) + 1; foreach (List <int> loci_group in LocusAggregate.group_loci(loci.ToList(), loci_batch_size)) { // read in the buffer for this group of loci List <LocusAggregate> buffer = LocusAggregate.load_buffer( samples, loci_group[0], loci_group[-1] - loci_group[0] + 1, normalization_lookups); // generate corresponding locus aggregates List <LocusAggregate> aggregates = loci_group.Select(x => new GenerateLocusAggregate(buffer, loci_group[0]).__call__(x)).ToList(); foreach (List <object> result in aggregates.Select(x => callback(x)).ToList()) { yield return(result); } } }