public Prediction <LblT> Predict(BinaryVector example) { Prediction <LblT> pred = new Prediction <LblT>(); double sum = 0; for (int i = 0; i < mIdxToLbl.Length; i++) { //Console.WriteLine("Predicting for {0}", mIdxToLbl[i]); double pc = ((double)mExampleCount[i] + 2.0 / (double)mIdxToLbl.Length) / ((double)mDatasetCount + 2.0); // class prior probability estimate foreach (int featIdx in example) { double pFeat; if (mFeatureProb[i].TryGetValue(featIdx, out pFeat)) { pc *= pFeat; } else if (mFeaturePriors.TryGetValue(featIdx, out pFeat)) { pc *= 2.0 * pFeat / ((double)mExampleCount[i] + 2.0); } } pred.Inner.Add(new KeyDat <double, LblT>(pc, mIdxToLbl[i])); sum += pc; } if (mNormalize && sum > 0) { for (int i = 0; i < pred.Count; i++) { KeyDat <double, LblT> score = pred[i]; pred.Inner[i] = new KeyDat <double, LblT>(score.Key / sum, score.Dat); } } pred.Inner.Sort(DescSort <KeyDat <double, LblT> > .Instance); return(pred); }
public int Compare(KeyDat <double, int> x, KeyDat <double, int> y) { if (Math.Abs(y.Key - x.Key) > 0.000001) { return(y.Key.CompareTo(x.Key)); } return(y.Dat.CompareTo(x.Dat)); }
public int Compare(KeyDat <double, Patch> x, KeyDat <double, Patch> y) { if (Math.Abs(y.Key - x.Key) > 0.000001) { return(y.Key.CompareTo(x.Key)); } return(y.Dat.Idx.CompareTo(x.Dat.Idx)); }
public Prediction <LblT> Predict(BinaryVector example) { Utils.ThrowException(example == null ? new ArgumentNullException("example") : null); Prediction <LblT> pred = new Prediction <LblT>(); double sum = 0; for (int i = 0; i < mIdxToLbl.Length; i++) { double pc; if (!mLogSumExpTrick) { pc = ((double)mExampleCount[i] + 2.0 / (double)mIdxToLbl.Length) / ((double)mDatasetCount + 2.0); // class prior probability estimate foreach (int featIdx in example) { double pFeat; if (mFeatureProb[i].TryGetValue(featIdx, out pFeat)) { pc *= pFeat; } else if (mFeaturePriors.TryGetValue(featIdx, out pFeat)) { pc *= 2.0 * pFeat / ((double)mExampleCount[i] + 2.0); // m-estimate (m = 2, feature count = 0) } } } else // log-sum-exp trick (slower but prevents underflowing) { pc = Math.Log(((double)mExampleCount[i] + 2.0 / (double)mIdxToLbl.Length) / ((double)mDatasetCount + 2.0)); foreach (int featIdx in example) { double pFeat; if (mFeatureProb[i].TryGetValue(featIdx, out pFeat)) { pc += Math.Log(pFeat); } else if (mFeaturePriors.TryGetValue(featIdx, out pFeat)) { pc += Math.Log(2.0 * pFeat / ((double)mExampleCount[i] + 2.0)); } } pc = Math.Exp(pc); } pred.Inner.Add(new KeyDat <double, LblT>(pc, mIdxToLbl[i])); sum += pc; } if (mNormalize && sum > 0) { for (int i = 0; i < pred.Count; i++) { KeyDat <double, LblT> score = pred[i]; pred.Inner[i] = new KeyDat <double, LblT>(score.Key / sum, score.Dat); } } pred.Inner.Sort(DescSort <KeyDat <double, LblT> > .Instance); return(pred); }
public Prediction <LblT> Predict(SparseVector <double> example) { Utils.ThrowException(mDatasetMtx == null ? new InvalidOperationException() : null); Utils.ThrowException(example == null ? new ArgumentNullException("example") : null); ArrayList <KeyDat <double, LblT> > tmp = new ArrayList <KeyDat <double, LblT> >(mLabels.Count); double[] dotProdSimVec = ModelUtils.GetDotProductSimilarity(mDatasetMtx, mLabels.Count, example); for (int i = 0; i < mLabels.Count; i++) { tmp.Add(new KeyDat <double, LblT>(dotProdSimVec[i], mLabels[i])); } tmp.Sort(DescSort <KeyDat <double, LblT> > .Instance); Dictionary <LblT, double> voting = new Dictionary <LblT, double>(mLblCmp); int n = Math.Min(mK, tmp.Count); double value; if (mSoftVoting) // "soft" voting { for (int i = 0; i < n; i++) { KeyDat <double, LblT> item = tmp[i]; if (!voting.TryGetValue(item.Dat, out value)) { voting.Add(item.Dat, item.Key); } else { voting[item.Dat] = value + item.Key; } } } else // normal voting { for (int i = 0; i < n; i++) { KeyDat <double, LblT> item = tmp[i]; if (!voting.TryGetValue(item.Dat, out value)) { voting.Add(item.Dat, 1); } else { voting[item.Dat] = value + 1.0; } } } Prediction <LblT> classifierResult = new Prediction <LblT>(); foreach (KeyValuePair <LblT, double> item in voting) { classifierResult.Inner.Add(new KeyDat <double, LblT>(item.Value, item.Key)); } classifierResult.Inner.Sort(DescSort <KeyDat <double, LblT> > .Instance); return(classifierResult); }
public Prediction <LblT> Predict(ExT example) { Utils.ThrowException(mExamples == null ? new InvalidOperationException() : null); Utils.ThrowException(example == null ? new ArgumentNullException("example") : null); ArrayList <KeyDat <double, LabeledExample <LblT, ExT> > > tmp = new ArrayList <KeyDat <double, LabeledExample <LblT, ExT> > >(mExamples.Count); foreach (LabeledExample <LblT, ExT> labeledExample in mExamples) { double sim = mSimilarity.GetSimilarity(example, labeledExample.Example); tmp.Add(new KeyDat <double, LabeledExample <LblT, ExT> >(sim, labeledExample)); } tmp.Sort(DescSort <KeyDat <double, LabeledExample <LblT, ExT> > > .Instance); Dictionary <LblT, double> voting = new Dictionary <LblT, double>(mLblCmp); int n = Math.Min(mK, tmp.Count); double value; if (mSoftVoting) // "soft" voting { for (int i = 0; i < n; i++) { KeyDat <double, LabeledExample <LblT, ExT> > item = tmp[i]; if (!voting.TryGetValue(item.Dat.Label, out value)) { voting.Add(item.Dat.Label, item.Key); } else { voting[item.Dat.Label] = value + item.Key; } } } else // normal voting { for (int i = 0; i < n; i++) { KeyDat <double, LabeledExample <LblT, ExT> > item = tmp[i]; if (!voting.TryGetValue(item.Dat.Label, out value)) { voting.Add(item.Dat.Label, 1); } else { voting[item.Dat.Label] = value + 1.0; } } } Prediction <LblT> classifierResult = new Prediction <LblT>(); foreach (KeyValuePair <LblT, double> item in voting) { classifierResult.Inner.Add(new KeyDat <double, LblT>(item.Value, item.Key)); } classifierResult.Inner.Sort(DescSort <KeyDat <double, LblT> > .Instance); return(classifierResult); }
public TextBlock[] GetAnnotatedBlocks(string selector, int spanStart, int spanEnd) { // TODO: set mAnnotationIndex to null if annotation array changes Utils.ThrowException(mAnnotationIndex == null ? new InvalidOperationException() : null); Utils.ThrowException(selector == null ? new ArgumentNullException("selector") : null); Utils.ThrowException(spanStart < 0 ? new ArgumentOutOfRangeException("spanStart") : null); Utils.ThrowException(spanEnd < spanStart ? new ArgumentOutOfRangeException("SpanEnd") : null); KeyDat <int, Annotation> key = new KeyDat <int, Annotation>(spanStart, null); ArrayList <TextBlock> blocks = new ArrayList <TextBlock>(); int idx = mAnnotationIndex.BinarySearch(key); if (idx < 0) { idx = ~idx; } else { while (idx >= 0 && mAnnotationIndex[idx].Key == key.Key) { idx--; } idx++; } for (int i = idx; i < mAnnotationIndex.Count; i++) { Annotation annotation = mAnnotationIndex[i].Dat; if (annotation.SpanStart > spanEnd) { break; } if (annotation.SpanEnd <= spanEnd) { if (annotation.Type.StartsWith(selector)) { blocks.Add(annotation.GetAnnotatedBlock(mText)); } } } return(blocks.ToArray()); }
public static Prediction <LblT> Classify <LblT>(BinaryVector binVec, SparseMatrix <double> .ReadOnly lambdas, LblT[] idxToLbl, bool normalize) { SparseVector <double> vec = ModelUtils.ConvertExample <SparseVector <double> >(binVec); Prediction <LblT> scores = new Prediction <LblT>(); double sum = 0; foreach (IdxDat <SparseVector <double> .ReadOnly> row in lambdas) { double score = Math.Exp(DotProductSimilarity.Instance.GetSimilarity(row.Dat, vec)); scores.Inner.Add(new KeyDat <double, LblT>(score, idxToLbl[row.Idx])); sum += score; } if (normalize && sum > 0) { for (int i = 0; i < scores.Count; i++) { KeyDat <double, LblT> score = scores[i]; scores.Inner[i] = new KeyDat <double, LblT>(score.Key / sum, score.Dat); } } scores.Inner.Sort(DescSort <KeyDat <double, LblT> > .Instance); return(scores); }
public static Prediction <LblT> Classify <LblT>(BinaryVector binVec, Dictionary <int, double>[] lambdas, LblT[] idxToLbl, bool normalize) { Prediction <LblT> scores = new Prediction <LblT>(); double sum = 0; int i = 0; foreach (Dictionary <int, double> row in lambdas) { if (row != null) { double score = 0; foreach (int idx in binVec) { double val; if (row.TryGetValue(idx, out val)) { score += val; } } score = Math.Exp(score); scores.Inner.Add(new KeyDat <double, LblT>(score, idxToLbl[i])); sum += score; } i++; } if (normalize && sum > 0) { for (i = 0; i < scores.Count; i++) { KeyDat <double, LblT> score = scores[i]; scores.Inner[i] = new KeyDat <double, LblT>(score.Key / sum, score.Dat); } } scores.Inner.Sort(DescSort <KeyDat <double, LblT> > .Instance); return(scores); }
public ClusteringResult Cluster(IUnlabeledExampleCollection <SparseVector <double> > dataset) { Utils.ThrowException(dataset == null ? new ArgumentNullException("dataset") : null); Utils.ThrowException(dataset.Count < mK ? new ArgumentValueException("dataset") : null); mDataset = new UnlabeledDataset <SparseVector <double> >(dataset); ClusteringResult clustering = null; double globalBestClustQual = 0; for (int trial = 1; trial <= mTrials; trial++) { mLogger.Info("Cluster", "Clustering trial {0} of {1} ...", trial, mTrials); ArrayList <CentroidData> centroids = new ArrayList <CentroidData>(mK); ArrayList <int> bestSeeds = null; for (int i = 0; i < mK; i++) { centroids.Add(new CentroidData()); } // select seed items double minSim = double.MaxValue; ArrayList <int> tmp = new ArrayList <int>(mDataset.Count); for (int i = 0; i < mDataset.Count; i++) { tmp.Add(i); } for (int k = 0; k < 3; k++) { ArrayList <SparseVector <double> > seeds = new ArrayList <SparseVector <double> >(mK); tmp.Shuffle(mRnd); for (int i = 0; i < mK; i++) { seeds.Add(mDataset[tmp[i]]); } // assess quality of seed items double simAvg = 0; foreach (SparseVector <double> seed1 in seeds) { foreach (SparseVector <double> seed2 in seeds) { if (seed1 != seed2) { simAvg += DotProductSimilarity.Instance.GetSimilarity(seed1, seed2); } } } simAvg /= (double)(mK * mK - mK); //Console.WriteLine(simAvg); if (simAvg < minSim) { minSim = simAvg; bestSeeds = new ArrayList <int>(mK); for (int i = 0; i < mK; i++) { bestSeeds.Add(tmp[i]); } } } ArrayList <KeyDat <double, int> > medoids = new ArrayList <KeyDat <double, int> >(mK); for (int i = 0; i < mK; i++) { centroids[i].Items.Add(bestSeeds[i]); centroids[i].Update(mDataset); centroids[i].UpdateCentroidLen(); medoids.Add(new KeyDat <double, int>(-1, bestSeeds[i])); } double[,] dotProd = new double[mDataset.Count, mK]; SparseMatrix <double> dsMat = ModelUtils.GetTransposedMatrix(mDataset); // main loop int iter = 0; double bestClustQual = 0; double clustQual; while (true) { iter++; mLogger.Info("Cluster", "Iteration {0} ...", iter); clustQual = 0; // assign items to clusters //StopWatch stopWatch = new StopWatch(); int j = 0; foreach (CentroidData cen in centroids) { SparseVector <double> cenVec = cen.GetSparseVector(); double[] dotProdSimVec = ModelUtils.GetDotProductSimilarity(dsMat, mDataset.Count, cenVec); for (int i = 0; i < dotProdSimVec.Length; i++) { if (dotProdSimVec[i] > 0) { dotProd[i, j] = dotProdSimVec[i]; } } j++; } for (int dsInstIdx = 0; dsInstIdx < mDataset.Count; dsInstIdx++) { double maxSim = double.MinValue; ArrayList <int> candidates = new ArrayList <int>(); for (int cenIdx = 0; cenIdx < mK; cenIdx++) { double sim = dotProd[dsInstIdx, cenIdx]; if (sim > maxSim) { maxSim = sim; candidates.Clear(); candidates.Add(cenIdx); } else if (sim == maxSim) { candidates.Add(cenIdx); } } if (candidates.Count > 1) { candidates.Shuffle(mRnd); } if (candidates.Count > 0) // *** is this always true? { centroids[candidates[0]].Items.Add(dsInstIdx); clustQual += maxSim; if (medoids[candidates[0]].Key < maxSim) { medoids[candidates[0]] = new KeyDat <double, int>(maxSim, dsInstIdx); } } } //Console.WriteLine(stopWatch.TotalMilliseconds); clustQual /= (double)mDataset.Count; mLogger.Info("Cluster", "Quality: {0:0.0000}", clustQual); // compute new centroids for (int i = 0; i < mK; i++) { centroids[i].Update(mDataset); centroids[i].UpdateCentroidLen(); } // check if done if (iter > 1 && clustQual - bestClustQual <= mEps) { break; } bestClustQual = clustQual; for (int i = 0; i < medoids.Count; i++) { medoids[i] = new KeyDat <double, int>(-1, medoids[i].Dat); } } if (trial == 1 || clustQual > globalBestClustQual) { globalBestClustQual = clustQual; mCentroids = centroids; mMedoids = medoids; // save the result clustering = new ClusteringResult(); for (int i = 0; i < mK; i++) { clustering.AddRoot(new Cluster()); clustering.Roots.Last.Items.AddRange(centroids[i].Items); } } } return(clustering); }
public override void Run(object[] args) { // *** ArrayList *** Output.WriteLine("*** ArrayList ***"); Output.WriteLine(); // create an ArrayList Output.WriteLine("Create an ArrayList ..."); ArrayList <int> list = new ArrayList <int>(new int[] { 1, 2, 3 }); Output.WriteLine(list); // add more items Output.WriteLine("Add more items ..."); list.AddRange(new int[] { 6, 5, 4 }); Output.WriteLine(list); // sort descendingly Output.WriteLine("Sort descendingly ..."); list.Sort(DescSort <int> .Instance); Output.WriteLine(list); // shuffle Output.WriteLine("Shuffle ..."); list.Shuffle(new Random(1)); Output.WriteLine(list); // convert to array of double Output.WriteLine("Convert to array of double ..."); double[] array = list.ToArray <double>(); Output.WriteLine(new ArrayList <double>(array)); // convert to ArrayList of string Output.WriteLine("Convert to ArrayList of string ..."); ArrayList <string> list2 = new ArrayList <string>(list.ToArray <string>()); Output.WriteLine(list2); // get items Output.WriteLine("Get items ..."); Output.WriteLine(list[0]); Output.WriteLine(list[1]); // set items Output.WriteLine("Set items ..."); list[0] = 3; list[1] = 2; Output.WriteLine(list); // get length Output.WriteLine("Get length ..."); Output.WriteLine(list.Count); Output.WriteLine(); // *** Set *** Output.WriteLine("*** Set ***"); Output.WriteLine(); // create Set Output.WriteLine("Create Set ..."); Set <int> set = new Set <int>(new int[] { 1, 2, 3 }); Output.WriteLine(set); // check for items Output.WriteLine("Check for items ..."); Output.WriteLine(set.Contains(1)); Output.WriteLine(set.Contains(4)); // add more items (note the duplicate) Output.WriteLine("Add more items ..."); set.AddRange(new int[] { 6, 5, 4, 3 }); Output.WriteLine(set); // remove some items Output.WriteLine("Remove some items ..."); set.RemoveRange(new int[] { 1, 3 }); set.Remove(5); Output.WriteLine(set); // create another Set Output.WriteLine("Create another Set ..."); Set <int> set2 = new Set <int>(new int[] { 1, 2, 3, 4, 5 }); Output.WriteLine(set2); // compute union Output.WriteLine("Compute union ..."); Output.WriteLine(Set <int> .Union(set, set2)); // compute difference Output.WriteLine("Compute difference ..."); Output.WriteLine(Set <int> .Difference(set, set2)); // compute intersection Output.WriteLine("Compute intersection ..."); Output.WriteLine(Set <int> .Intersection(set, set2)); // compute Jaccard similarity Output.WriteLine("Compute Jaccard similarity ..."); Output.WriteLine(Set <int> .JaccardSimilarity(set, set2)); // convert to array Output.WriteLine("Convert to array ..."); int[] array2 = set2.ToArray(); Output.WriteLine(new ArrayList <int>(array2)); // convert to Set of string Output.WriteLine("Convert to Set of string ..."); Set <string> set3 = new Set <string>(set2.ToArray <string>()); Output.WriteLine(set3); // get length Output.WriteLine("Get length ..."); Output.WriteLine(set3.Count); Output.WriteLine(); /* * // *** BinaryVector *** * Output.WriteLine("*** BinaryVector ***"); * Output.WriteLine(); * // create BinaryVector * Output.WriteLine("Create BinaryVector ..."); * BinaryVector<char> binVec = new BinaryVector<char>(new char[] { 'a', 'b', 'c' }); * Output.WriteLine((object) binVec); * // check for items * Output.WriteLine("Check for items ..."); * Output.WriteLine((bool) binVec.Contains('a')); * Output.WriteLine((bool) binVec.Contains('d')); * // add more items (note the duplicate) * Output.WriteLine("Add more items ..."); * binVec.AddRange(new char[] { 'f', 'e', 'd', 'c' }); * Output.WriteLine((object) binVec); * // remove some items * Output.WriteLine("Remove some items ..."); * binVec.RemoveRange(new char[] { 'a', 'c' }); * binVec.Remove('e'); * Output.WriteLine((object) binVec); * // convert to array * Output.WriteLine("Convert to array ..."); * char[] array3 = binVec.ToArray(); * Output.WriteLine(new ArrayList<char>(array3)); * // convert to BinaryVector of string * Output.WriteLine("Convert to BinaryVector of string ..."); * BinaryVector<string> binVec2 = new BinaryVector<string>(binVec.ToArray<string>()); * Output.WriteLine((object) binVec2); * // get items * Output.WriteLine("Get items ..."); * Output.WriteLine((int) binVec2[0]); * Output.WriteLine((int) binVec2[1]); * // get length * Output.WriteLine("Get length ..."); * Output.WriteLine((int) binVec2.Count); * Output.WriteLine(); */ // *** Pair *** Output.WriteLine("*** Pair ***"); Output.WriteLine(); // create Pair Output.WriteLine("Create Pair ..."); Pair <int, string> pair = new Pair <int, string>(3, "dogs"); Output.WriteLine(pair); // create another Pair Output.WriteLine("Create another Pair ..."); Pair <int, string> pair2 = new Pair <int, string>(3, "cats"); Output.WriteLine(pair2); // compare Output.WriteLine("Compare ..."); Output.WriteLine(pair == pair2); // make a change Output.WriteLine("Make a change ..."); pair.Second = "cats"; Output.WriteLine(pair); // compare again Output.WriteLine("Compare again ..."); Output.WriteLine(pair == pair2); Output.WriteLine(); // *** KeyDat *** Output.WriteLine("*** KeyDat ***"); Output.WriteLine(); // create KeyDat Output.WriteLine("Create KeyDat ..."); KeyDat <int, string> keyDat = new KeyDat <int, string>(3, "dogs"); Output.WriteLine(keyDat); // create another KeyDat Output.WriteLine("Create another KeyDat ..."); KeyDat <int, string> keyDat2 = new KeyDat <int, string>(3, "cats"); Output.WriteLine(keyDat2); // compare Output.WriteLine("Compare ..."); Output.WriteLine(keyDat == keyDat2); // make a change Output.WriteLine("Make a change ..."); keyDat.Key = 4; Output.WriteLine(keyDat); // compare again Output.WriteLine("Compare again ..."); Output.WriteLine(keyDat == keyDat2); Output.WriteLine(keyDat > keyDat2); Output.WriteLine(); // *** IdxDat *** Output.WriteLine("*** IdxDat ***"); Output.WriteLine(); // create an IdxDat Output.WriteLine("Create an IdxDat ..."); IdxDat <string> idxDat = new IdxDat <string>(3, "dogs"); Output.WriteLine(idxDat); // create another IdxDat Output.WriteLine("Create another IdxDat ..."); IdxDat <string> idxDat2 = new IdxDat <string>(4, "cats"); Output.WriteLine(idxDat2); // compare Output.WriteLine("Compare ..."); Output.WriteLine(idxDat == idxDat2); // make a change //idxDat.Idx = 4; // not possible to change index idxDat.Dat = "cats"; Output.WriteLine(idxDat); // compare again Output.WriteLine("Compare again ..."); Output.WriteLine(idxDat == idxDat2); Output.WriteLine(idxDat < idxDat2); Output.WriteLine(); // *** ArrayList of KeyDat *** Output.WriteLine("*** ArrayList of KeyDat ***"); Output.WriteLine(); // create an ArrayList of KeyDat Output.WriteLine("Create an ArrayList of KeyDat ..."); ArrayList <KeyDat <double, string> > listKeyDat = new ArrayList <KeyDat <double, string> >(new KeyDat <double, string>[] { new KeyDat <double, string>(2.4, "cats"), new KeyDat <double, string>(3.3, "dogs"), new KeyDat <double, string>(4.2, "lizards") }); Output.WriteLine(listKeyDat); // sort descendingly Output.WriteLine("Sort descendingly ..."); listKeyDat.Sort(DescSort <KeyDat <double, string> > .Instance); Output.WriteLine(listKeyDat); // find item with bisection Output.WriteLine("Find item with bisection ..."); int idx = listKeyDat.BinarySearch(new KeyDat <double, string>(3.3), DescSort <KeyDat <double, string> > .Instance); Output.WriteLine(idx); idx = listKeyDat.BinarySearch(new KeyDat <double, string>(3), DescSort <KeyDat <double, string> > .Instance); Output.WriteLine(~idx); // remove item Output.WriteLine("Remove item ..."); listKeyDat.Remove(new KeyDat <double, string>(3.3)); Output.WriteLine(listKeyDat); // get first and last item Output.WriteLine("Get first and last item ..."); Output.WriteLine(listKeyDat.First); Output.WriteLine(listKeyDat.Last); }