public void TrainModelRatios(MultisetKmer <A> baselineClass, MultisetKmer <A> thisClass) { List <KeyValuePair <Kmer <A>, double> > rawModel = new List <KeyValuePair <Kmer <A>, double> >(); int totalCount = 0; foreach (Kmer <A> key in thisClass.Keys) { int thisCount = thisClass.getCount(key); totalCount += thisCount; if (thisCount > regressor.minSignificantCount) { double thisFrac = thisClass.GetKeyFrac(key); double baseFrac = baselineClass.GetKeyFracLaplace(key, regressor.smoothingAmount); if (thisFrac > baseFrac) { rawModel.Add(key, thisFrac / baseFrac); } } } regressor.finalizeModel(rawModel, totalCount); }
//Extract characteristic kmers (top n more common than baseline that occur at least q times). private IEnumerable <TupleStruct <Kmer <Ty>, double> > ExtractCharacteristicKmersForClass(int classIndex, MultisetKmer <Ty> thisClass, MultisetKmer <Ty> baseline) { List <TupleStruct <Kmer <Ty>, double> > thisClassCharacteristicKmersStore = new List <TupleStruct <Kmer <Ty>, double> >(); foreach (KeyValuePair <Kmer <Ty>, int> kvp in thisClass) { if (kvp.Value > minKmerCount) { double thisFreq = kvp.Value / (double)thisClass.Size(kvp.Key.Count); double baseFreq = baseline.GetKeyFracLaplace(kvp.Key, smoothingAmt); //Console.WriteLine ("Class: " + classIndex + " Kmer: " + kvp.Value + ", class freq " + thisFreq + ", base freq " + baseFreq); //TODO: Advanced logic. if (thisFreq > baseFreq) { double kmerValue = thisFreq / baseFreq - 1; //Console.WriteLine ("Adding kmer " + kvp.Key + " weight " + kmerValue + " for class " + classIndex); thisClassCharacteristicKmersStore.Add(new TupleStruct <Kmer <Ty>, double>(kvp.Key, kmerValue)); } } } return(thisClassCharacteristicKmersStore.OrderByDescending(tup => Math.Abs(tup.Item2)).Take((int)kmersToTake)); }