public static MultisetKmer <Ty> ToMultisetVarKmer <Ty>(this IEnumerable <DiscreteEventSeries <Ty> > series, int k) { MultisetKmer <Ty> multiset = new MultisetKmer <Ty>(k); series.ForEach(item => multiset.AddDiscreteEventSeriesVarKmer(item, k)); return(multiset); }
public static MultisetKmer <Ty> ToMultisetVarKmer <Ty>(this DiscreteEventSeries <Ty> series, int k) { MultisetKmer <Ty> multiset = new MultisetKmer <Ty>(k); multiset.AddDiscreteEventSeriesVarKmer(series, k); return(multiset); }
public static MultisetKmer <A> MultisetKmerUnion <A>(this IEnumerable <MultisetKmer <A> > sets) { //TODO check they all have the same k? MultisetKmer <A> d = new MultisetKmer <A>(sets.First().maxK); //sets.Select (multiset => multiset.Count).Max()); //TODO add number sets.ForEach(aset => aset.ForEach(kvp => d.AddKmer(kvp.Key, kvp.Value))); return(d); }
/* * private IEnumerable<TupleStruct<Kmer<Ty>, double>> ExtractUncharacteristicKmersForClass (int classIndex, MultisetKmer<Ty> thisClass, MultisetKmer<Ty> baseline) * { * //This will only work with an enormous amount of data for low k. * } */ //Calculation: //Synthesize features for an item. //TODO: Enforce contract public double[] SynthesizeFeatures(DiscreteEventSeries <Ty> item) { double[] vals = new double[kmerCount]; MultisetKmer <Ty> ms = item.ToMultisetVarKmer <Ty>(k); foreach (KeyValuePair <Kmer <Ty>, int> kvp in ms) { int index = 0; if (kmersOntoIndex.TryGetValue(kvp.Key, out index)) { vals[index] = kvp.Value / (double)ms.Size((int)kvp.Key.Count); } } return(vals); }
protected override IEventSeriesScalarRegressor <Ty>[] CreateRegressors(DiscreteSeriesDatabase <Ty> data) { //Partition into class and classless groups. Tuple <IEnumerable <DiscreteEventSeries <Ty> >, IEnumerable <DiscreteEventSeries <Ty> > > partitioned = data.data.Partition(item => item.labels.ContainsKey(ClassificationCriterion)); IEnumerable <DiscreteEventSeries <Ty> > noClass = partitioned.Item2; //This item does not have a class over the category label for which the feature synthezer is being created. IEnumerable <DiscreteEventSeries <Ty> > inClass = partitioned.Item1; IEnumerable <IGrouping <string, DiscreteEventSeries <Ty> > > groupings = inClass.GroupBy(item => item.labels[ClassificationCriterion]); //Establish multisets for each class (parallelized). Tuple <string, MultisetKmer <Ty> >[] classSets = groupings.AsParallel().Select(grp => Tuple.Create(grp.Key, grp.ToMultisetVarKmer(k))).ToArray(); //Used twice. Make it an array. //Establish the baseline (all data) MultisetKmer <Ty> baseline = noClass.ToMultisetVarKmer(k).Cons(classSets.Select(a => a.Item2)).MultisetKmerUnion(); //Create regressors (in parallel). return(classSets.AsParallel().Select(ntp => new ItemVarKmerFrequencyRegressor <Ty>(ntp.Item1, minSignificantCount, smoothingAmount, featuresToUse, k, baseline, ntp.Item2)).ToArray()); }
/* * private IEnumerable<TupleStruct<Kmer<Ty>, double>> ExtractUncharacteristicKmersForClass (int classIndex, MultisetKmer<Ty> thisClass, MultisetKmer<Ty> baseline) * { * //This will only work with an enormous amount of data for low k. * } */ //Calculation: //Synthesize features for an item. //TODO: Enforce contract public double[] SynthesizeFeatures(DiscreteEventSeries <Ty> item) { double[] vals = new double[classCount]; MultisetKmer <Ty> ms = item.ToMultisetVarKmer <Ty>(k); foreach (KeyValuePair <Kmer <Ty>, int> kvp in ms) { Dictionary <int, double> classesWithKvp; if (learnedCharacteristicKmers.TryGetValue(kvp.Key, out classesWithKvp)) { //Console.WriteLine ("\tFound kmer " + kvp.Key + "."); foreach (KeyValuePair <int, double> @class in classesWithKvp) { //Console.WriteLine ("\t\tClass " + @class.Key + ", Value " + @class.Value + ", Times " + kvp.Value); vals[@class.Key] += kvp.Value * @class.Value; } } } return(vals); }
public void TrainModelRatios(MultisetKmer <A> baselineClass, MultisetKmer <A> thisClass) { List <KeyValuePair <Kmer <A>, double> > rawModel = new List <KeyValuePair <Kmer <A>, double> >(); int totalCount = 0; foreach (Kmer <A> key in thisClass.Keys) { int thisCount = thisClass.getCount(key); totalCount += thisCount; if (thisCount > regressor.minSignificantCount) { double thisFrac = thisClass.GetKeyFrac(key); double baseFrac = baselineClass.GetKeyFracLaplace(key, regressor.smoothingAmount); if (thisFrac > baseFrac) { rawModel.Add(key, thisFrac / baseFrac); } } } regressor.finalizeModel(rawModel, totalCount); }
// //Kmer variadic k multiset // public static void AddDiscreteEventSeriesVarKmer <Ty>(this MultisetKmer <Ty> multiset, DiscreteEventSeries <Ty> series, int k) { Ty[] arr = series.data; multiset.ConsumeEventSeriesKmer(arr); }
//Extract characteristic kmers (top n more common than baseline that occur at least q times). private IEnumerable <TupleStruct <Kmer <Ty>, double> > ExtractCharacteristicKmersForClass(int classIndex, MultisetKmer <Ty> thisClass, MultisetKmer <Ty> baseline) { List <TupleStruct <Kmer <Ty>, double> > thisClassCharacteristicKmersStore = new List <TupleStruct <Kmer <Ty>, double> >(); foreach (KeyValuePair <Kmer <Ty>, int> kvp in thisClass) { if (kvp.Value > minKmerCount) { double thisFreq = kvp.Value / (double)thisClass.Size(kvp.Key.Count); double baseFreq = baseline.GetKeyFracLaplace(kvp.Key, smoothingAmt); //Console.WriteLine ("Class: " + classIndex + " Kmer: " + kvp.Value + ", class freq " + thisFreq + ", base freq " + baseFreq); //TODO: Advanced logic. if (thisFreq > baseFreq) { double kmerValue = thisFreq / baseFreq - 1; //Console.WriteLine ("Adding kmer " + kvp.Key + " weight " + kmerValue + " for class " + classIndex); thisClassCharacteristicKmersStore.Add(new TupleStruct <Kmer <Ty>, double>(kvp.Key, kmerValue)); } } } return(thisClassCharacteristicKmersStore.OrderByDescending(tup => Math.Abs(tup.Item2)).Take((int)kmersToTake)); }
public static ClassCharacteristicSetKmer<A> BuildSubtractiveDifference(string name, MultisetKmer<A> baselineClass, MultisetKmer<A> thisClass, uint countCutoff) { ClassCharacteristicSetKmer<A> newSet = new ClassCharacteristicSetKmer<A>(name, Math.Min (baselineClass.maxK, thisClass.maxK)); //TODO statistically significant? //TODO diffence amount? foreach(Kmer<A> key in thisClass.Keys){ if(thisClass.getCount(key) > countCutoff){ double thisFrac = thisClass.GetKeyFrac(key); double baseFrac = baselineClass.GetKeyFrac (key); if(thisFrac > baseFrac){ newSet.Add (key, thisFrac - baseFrac); } } } //TODO select top x? //double[] function? return newSet; }
public ItemVarKmerFrequencyRegressor(string name, int minSignificantCount, int smoothingAmount, int featuresToUse, int k, MultisetKmer <A> baselineClass, MultisetKmer <A> thisClass) : this(name, minSignificantCount, smoothingAmount, featuresToUse, k) { TrainModelRatios(baselineClass, thisClass); }