예제 #1
0
        public static MultisetKmer <Ty> ToMultisetVarKmer <Ty>(this IEnumerable <DiscreteEventSeries <Ty> > series, int k)
        {
            MultisetKmer <Ty> multiset = new MultisetKmer <Ty>(k);

            series.ForEach(item => multiset.AddDiscreteEventSeriesVarKmer(item, k));
            return(multiset);
        }
예제 #2
0
        public static MultisetKmer <Ty> ToMultisetVarKmer <Ty>(this DiscreteEventSeries <Ty> series, int k)
        {
            MultisetKmer <Ty> multiset = new MultisetKmer <Ty>(k);

            multiset.AddDiscreteEventSeriesVarKmer(series, k);
            return(multiset);
        }
예제 #3
0
        public static MultisetKmer <A> MultisetKmerUnion <A>(this IEnumerable <MultisetKmer <A> > sets)
        {
            //TODO check they all have the same k?

            MultisetKmer <A> d = new MultisetKmer <A>(sets.First().maxK);            //sets.Select (multiset => multiset.Count).Max());

            //TODO add number
            sets.ForEach(aset => aset.ForEach(kvp => d.AddKmer(kvp.Key, kvp.Value)));
            return(d);
        }
예제 #4
0
        /*
         * private IEnumerable<TupleStruct<Kmer<Ty>, double>> ExtractUncharacteristicKmersForClass (int classIndex, MultisetKmer<Ty> thisClass, MultisetKmer<Ty> baseline)
         * {
         *      //This will only work with an enormous amount of data for low k.
         * }
         */


        //Calculation:

        //Synthesize features for an item.
        //TODO: Enforce contract
        public double[] SynthesizeFeatures(DiscreteEventSeries <Ty> item)
        {
            double[] vals = new double[kmerCount];

            MultisetKmer <Ty> ms = item.ToMultisetVarKmer <Ty>(k);

            foreach (KeyValuePair <Kmer <Ty>, int> kvp in ms)
            {
                int index = 0;
                if (kmersOntoIndex.TryGetValue(kvp.Key, out index))
                {
                    vals[index] = kvp.Value / (double)ms.Size((int)kvp.Key.Count);
                }
            }

            return(vals);
        }
예제 #5
0
        protected override IEventSeriesScalarRegressor <Ty>[] CreateRegressors(DiscreteSeriesDatabase <Ty> data)
        {
            //Partition into class and classless groups.
            Tuple <IEnumerable <DiscreteEventSeries <Ty> >, IEnumerable <DiscreteEventSeries <Ty> > > partitioned = data.data.Partition(item => item.labels.ContainsKey(ClassificationCriterion));
            IEnumerable <DiscreteEventSeries <Ty> > noClass = partitioned.Item2;          //This item does not have a class over the category label for which the feature synthezer is being created.

            IEnumerable <DiscreteEventSeries <Ty> > inClass = partitioned.Item1;

            IEnumerable <IGrouping <string, DiscreteEventSeries <Ty> > > groupings = inClass.GroupBy(item => item.labels[ClassificationCriterion]);

            //Establish multisets for each class (parallelized).
            Tuple <string, MultisetKmer <Ty> >[] classSets = groupings.AsParallel().Select(grp => Tuple.Create(grp.Key, grp.ToMultisetVarKmer(k))).ToArray();              //Used twice.  Make it an array.

            //Establish the baseline (all data)
            MultisetKmer <Ty> baseline = noClass.ToMultisetVarKmer(k).Cons(classSets.Select(a => a.Item2)).MultisetKmerUnion();

            //Create regressors (in parallel).
            return(classSets.AsParallel().Select(ntp => new ItemVarKmerFrequencyRegressor <Ty>(ntp.Item1, minSignificantCount, smoothingAmount, featuresToUse, k, baseline, ntp.Item2)).ToArray());
        }
예제 #6
0
        /*
         * private IEnumerable<TupleStruct<Kmer<Ty>, double>> ExtractUncharacteristicKmersForClass (int classIndex, MultisetKmer<Ty> thisClass, MultisetKmer<Ty> baseline)
         * {
         *      //This will only work with an enormous amount of data for low k.
         * }
         */


        //Calculation:

        //Synthesize features for an item.
        //TODO: Enforce contract
        public double[] SynthesizeFeatures(DiscreteEventSeries <Ty> item)
        {
            double[] vals = new double[classCount];

            MultisetKmer <Ty> ms = item.ToMultisetVarKmer <Ty>(k);

            foreach (KeyValuePair <Kmer <Ty>, int> kvp in ms)
            {
                Dictionary <int, double> classesWithKvp;
                if (learnedCharacteristicKmers.TryGetValue(kvp.Key, out classesWithKvp))
                {
                    //Console.WriteLine ("\tFound kmer " + kvp.Key + ".");
                    foreach (KeyValuePair <int, double> @class in classesWithKvp)
                    {
                        //Console.WriteLine ("\t\tClass " + @class.Key + ", Value " + @class.Value + ", Times " + kvp.Value);
                        vals[@class.Key] += kvp.Value * @class.Value;
                    }
                }
            }

            return(vals);
        }
예제 #7
0
        public void TrainModelRatios(MultisetKmer <A> baselineClass, MultisetKmer <A> thisClass)
        {
            List <KeyValuePair <Kmer <A>, double> > rawModel = new List <KeyValuePair <Kmer <A>, double> >();

            int totalCount = 0;

            foreach (Kmer <A> key in thisClass.Keys)
            {
                int thisCount = thisClass.getCount(key);
                totalCount += thisCount;
                if (thisCount > regressor.minSignificantCount)
                {
                    double thisFrac = thisClass.GetKeyFrac(key);
                    double baseFrac = baselineClass.GetKeyFracLaplace(key, regressor.smoothingAmount);
                    if (thisFrac > baseFrac)
                    {
                        rawModel.Add(key, thisFrac / baseFrac);
                    }
                }
            }

            regressor.finalizeModel(rawModel, totalCount);
        }
예제 #8
0
 //
 //Kmer variadic k multiset
 //
 public static void AddDiscreteEventSeriesVarKmer <Ty>(this MultisetKmer <Ty> multiset, DiscreteEventSeries <Ty> series, int k)
 {
     Ty[] arr = series.data;
     multiset.ConsumeEventSeriesKmer(arr);
 }
예제 #9
0
        //Extract characteristic kmers (top n more common than baseline that occur at least q times).
        private IEnumerable <TupleStruct <Kmer <Ty>, double> > ExtractCharacteristicKmersForClass(int classIndex, MultisetKmer <Ty> thisClass, MultisetKmer <Ty> baseline)
        {
            List <TupleStruct <Kmer <Ty>, double> > thisClassCharacteristicKmersStore = new List <TupleStruct <Kmer <Ty>, double> >();

            foreach (KeyValuePair <Kmer <Ty>, int> kvp in thisClass)
            {
                if (kvp.Value > minKmerCount)
                {
                    double thisFreq = kvp.Value / (double)thisClass.Size(kvp.Key.Count);
                    double baseFreq = baseline.GetKeyFracLaplace(kvp.Key, smoothingAmt);

                    //Console.WriteLine ("Class: " +  classIndex + " Kmer: " + kvp.Value + ", class freq " + thisFreq + ", base freq " + baseFreq);

                    //TODO: Advanced logic.
                    if (thisFreq > baseFreq)
                    {
                        double kmerValue = thisFreq / baseFreq - 1;
                        //Console.WriteLine ("Adding kmer " + kvp.Key + " weight " + kmerValue + " for class " + classIndex);
                        thisClassCharacteristicKmersStore.Add(new TupleStruct <Kmer <Ty>, double>(kvp.Key, kmerValue));
                    }
                }
            }
            return(thisClassCharacteristicKmersStore.OrderByDescending(tup => Math.Abs(tup.Item2)).Take((int)kmersToTake));
        }
예제 #10
0
		public static ClassCharacteristicSetKmer<A> BuildSubtractiveDifference(string name, MultisetKmer<A> baselineClass, MultisetKmer<A> thisClass, uint countCutoff)
		{
			ClassCharacteristicSetKmer<A> newSet = new ClassCharacteristicSetKmer<A>(name, Math.Min (baselineClass.maxK, thisClass.maxK));

			//TODO statistically significant?
			//TODO diffence amount?

			foreach(Kmer<A> key in thisClass.Keys){
				if(thisClass.getCount(key) > countCutoff){
					double thisFrac = thisClass.GetKeyFrac(key);
					double baseFrac = baselineClass.GetKeyFrac (key);
					if(thisFrac > baseFrac){
						newSet.Add (key, thisFrac - baseFrac);
					}
				}
			}

			//TODO select top x?
			//double[] function?

			return newSet;
		}
예제 #11
0
 public ItemVarKmerFrequencyRegressor(string name, int minSignificantCount, int smoothingAmount, int featuresToUse, int k, MultisetKmer <A> baselineClass, MultisetKmer <A> thisClass) : this(name, minSignificantCount, smoothingAmount, featuresToUse, k)
 {
     TrainModelRatios(baselineClass, thisClass);
 }