// TODO: exceptions
        public Vector2D[] Update(int numDequeue, IEnumerable <SparseVector <double> > newInst, bool test, LayoutSettings settings, ref PtInfo[] ptInfo, int _count)
        {
            // clustering
            mLogger.Info("Update", "Clustering ...");
            /*prof*/ StopWatch sw = new StopWatch();

            mKMeans.Eps = mKMeansEps;
            int iter = 0;

            mKMeans.Update(numDequeue, newInst, ref iter);
            /*prof*/ sw.Save("cl.txt", _count, iter.ToString());
            // determine reference instances
            /*prof*/ sw.Reset();
            UnlabeledDataset <SparseVector <double> > dsRefInst = new UnlabeledDataset <SparseVector <double> >();
            UnlabeledDataset <SparseVector <double> > dsNewInst = new UnlabeledDataset <SparseVector <double> >(newInst);

            foreach (SparseVector <double> centroid in mKMeans.GetCentroids())
            {
                dsRefInst.Add(centroid); // dataset of reference instances
                dsNewInst.Add(centroid); // dataset of new instances
            }
            // position reference instances
            mLogger.Info("Update", "Positioning reference instances ...");
            SparseMatrix <double>    simMtx = ModelUtils.GetDotProductSimilarity(dsRefInst, mSimThresh, /*fullMatrix=*/ false);
            StressMajorizationLayout sm     = new StressMajorizationLayout(dsRefInst.Count, new DistFunc(simMtx));

            sm.Random   = mRandom;
            sm.MaxSteps = int.MaxValue;
            sm.MinDiff  = 1E-3;
            mRefPos     = sm.ComputeLayout(/*settings=*/ null, mRefPos /*make this a property!!!*/);
            /*prof*/ sw.Save("sm.txt", _count);
            // k-NN
            /*prof*/ sw.Reset();
            DateTime t = DateTime.Now;

            mLogger.Info("Update", "Computing similarities ...");
            // update list of neighborhoods
            mPatches.RemoveRange(mDataset.Count - mKClust, mKClust);
            mPatches.RemoveRange(0, numDequeue);
            // remove instances from [dataset and] neighborhoods
            foreach (Patch patch in mPatches)
            {
                if (patch.Min != null && (patch.Min.Idx < numDequeue || patch.Max.Idx >= mDataset.Count - mKClust))
                {
                    int oldCount = patch.List.Count;
                    ArrayList <KeyDat <double, Patch> > tmp = new ArrayList <KeyDat <double, Patch> >();
                    foreach (KeyDat <double, Patch> item in patch.List)
                    {
                        if (item.Dat.Idx >= numDequeue && item.Dat.Idx < mDataset.Count - mKClust)
                        {
                            tmp.Add(item);
                        }
                        //else
                        //{
                        //    Console.WriteLine("Remove {0}", item.Dat.Idx - numDequeue);
                        //}
                    }
                    patch.List = tmp;
                    patch.ProcessList();
                    patch.NeedUpdate = patch.List.Count < mKNn && oldCount >= mKNn;
                }
            }
            // update dataset
            mDataset.RemoveRange(mDataset.Count - mKClust, mKClust);
            mDataset.RemoveRange(0, numDequeue);
            // add new instances to dataset
            int preAddCount = mDataset.Count;

            mDataset.AddRange(dsNewInst);
            // precompute transposed matrices
            SparseMatrix <double> trNewInst = ModelUtils.GetTransposedMatrix(dsNewInst);
            SparseMatrix <double> trDataset = ModelUtils.GetTransposedMatrix(mDataset);

            // add new instances to neighborhoods
            for (int i = 0; i < dsNewInst.Count; i++)
            {
                mPatches.Add(new Patch(-1));
                mPatches.Last.NeedUpdate = true;
            }
            for (int i = 0; i < mPatches.Count; i++)
            {
                mPatches[i].Idx = i;
            }
            for (int i = 0; i < mPatches.Count; i++)
            {
                Patch patch = mPatches[i];
                SparseVector <double> vec = mDataset[i];
                if (vec != null)
                {
                    if (patch.NeedUpdate) // full update required
                    {
                        //if (i == 1347) { Console.WriteLine("full update"); }
                        SparseVector <double>             simVec = ModelUtils.GetDotProductSimilarity(trDataset, mDataset.Count, vec, mSimThresh);
                        ArrayList <KeyDat <double, int> > tmp    = new ArrayList <KeyDat <double, int> >();
                        foreach (IdxDat <double> item in simVec)
                        {
                            if (item.Idx != i)
                            {
                                tmp.Add(new KeyDat <double, int>(item.Dat, item.Idx));
                            }
                        }
                        tmp.Sort(new Comparer2());
                        int count = Math.Min(tmp.Count, mKNnExt);
                        patch.List.Clear();
                        for (int j = 0; j < count; j++)
                        {
                            patch.List.Add(new KeyDat <double, Patch>(tmp[j].Key, mPatches[tmp[j].Dat]));
                        }
                        patch.ProcessList();
                        patch.NeedUpdate = false;
                    }
                    else // only new instances need to be considered
                    {
                        //if (i == 1347) { Console.WriteLine("partial update"); }
                        SparseVector <double> simVec = ModelUtils.GetDotProductSimilarity(trNewInst, dsNewInst.Count, vec, mSimThresh);
                        // check if further processing is needed
                        bool needMerge = false;
                        if (test)
                        {
                            foreach (IdxDat <double> item in simVec)
                            {
                                if (item.Dat >= patch.MinSim)
                                {
                                    needMerge = true;
                                    //Console.WriteLine("{0} {1}", item.Dat, patch.MinSim);
                                    break;
                                }
                            }
                        }
                        else
                        {
                            foreach (IdxDat <double> item in simVec)
                            {
                                if (item.Dat > patch.MinSim)
                                {
                                    needMerge = true;
                                    //Console.WriteLine("{0} {1}", item.Dat, patch.MinSim);
                                    break;
                                }
                            }
                        }
                        if (needMerge || patch.List.Count < mKNn)
                        {
                            //if (i == 1347) { Console.WriteLine("merge"); }
                            int oldCount = patch.List.Count;
                            ArrayList <KeyDat <double, Patch> > tmp = new ArrayList <KeyDat <double, Patch> >();
                            foreach (IdxDat <double> item in simVec)
                            {
                                tmp.Add(new KeyDat <double, Patch>(item.Dat, mPatches[item.Idx + preAddCount]));
                            }
                            // merge the two lists
                            // TODO: speed this up
                            patch.List.AddRange(tmp);
                            patch.List.Sort(new Comparer());
                            // trim list to size
                            if (oldCount >= mKNn)
                            {
                                patch.List.RemoveRange(oldCount, patch.List.Count - oldCount);
                            }
                            patch.ProcessList();
                        }
                    }
                }
            }
            /*prof*/ sw.Save("knn.txt", _count);
            // *** Test ***
            sw.Reset();
            ModelUtils.GetDotProductSimilarity(mDataset, mSimThresh, /*fullMatrix=*/ true);
            sw.Save("selfSim.txt", _count, mDataset.Count.ToString());
            if (test)
            {
                simMtx = ModelUtils.GetDotProductSimilarity(mDataset, mSimThresh, /*fullMatrix=*/ true);
                ArrayList <Patch> patches = new ArrayList <Patch>();
                for (int i = 0; i < mDataset.Count; i++)
                {
                    patches.Add(new Patch(i));
                }
                foreach (IdxDat <SparseVector <double> > simMtxRow in simMtx)
                {
                    if (simMtxRow.Dat.Count <= 1)
                    {
                        mLogger.Warn("Update", "Instance #{0} has no neighborhood.", simMtxRow.Idx);
                    }
                    ArrayList <KeyDat <double, int> > knn = new ArrayList <KeyDat <double, int> >(simMtxRow.Dat.Count);
                    foreach (IdxDat <double> item in simMtxRow.Dat)
                    {
                        if (item.Idx != simMtxRow.Idx)
                        {
                            knn.Add(new KeyDat <double, int>(item.Dat, item.Idx));
                        }
                    }
                    knn.Sort(new Comparer2());
                    int count = Math.Min(knn.Count, mKNnExt);
                    for (int i = 0; i < count; i++)
                    {
                        patches[simMtxRow.Idx].List.Add(new KeyDat <double, Patch>(knn[i].Key, patches[knn[i].Dat]));
                    }
                    patches[simMtxRow.Idx].ProcessList();
                }
                // compare
                if (patches.Count != mPatches.Count)
                {
                    throw new Exception("Count mismatch.");
                }
                for (int i = 0; i < mPatches.Count; i++)
                {
                    if (patches[i].List.Count < mKNn && patches[i].List.Count != mPatches[i].List.Count)
                    {
                        Console.WriteLine(mPatches[i].List.Count);
                        Console.WriteLine(patches[i].List.Count);
                        Output(mPatches[i].List);
                        Output(patches[i].List);
                        Console.WriteLine(i);
                        throw new Exception("List count mismatch.");
                    }
                    int count = Math.Min(mPatches[i].List.Count, mKNn);
                    for (int j = 0; j < count; j++)
                    {
                        //Console.WriteLine("{4} {0}-{1} {2}-{3}", mPatches[i].List[j].Key, mPatches[i].List[j].Dat.Idx, patches[i].List[j].Key, patches[i].List[j].Dat.Idx, i);
                        if (mPatches[i].List[j].Key != patches[i].List[j].Key || mPatches[i].List[j].Dat.Idx != patches[i].List[j].Dat.Idx)
                        {
                            Console.WriteLine("i:{4} fast:{0}-{1} slow:{2}-{3}", mPatches[i].List[j].Key, mPatches[i].List[j].Dat.Idx, patches[i].List[j].Key, patches[i].List[j].Dat.Idx, i);
                            int idxFast = mPatches[i].List[j].Dat.Idx;
                            int idxSlow = patches[i].List[j].Dat.Idx;
                            Console.WriteLine("slow @ fast idx: {0}", GetKey(patches[i].List, idxFast));
                            Console.WriteLine("fast @ slow idx: {0}", GetKey(mPatches[i].List, idxSlow));
                            throw new Exception("Patch item mismatch.");
                        }
                    }
                }
            }
            // *** End of test ***
            //Console.WriteLine("Number of patches: {0}", mPatches.Count);
            //int waka = 0;
            //foreach (Patch patch in mPatches)
            //{
            //    waka += patch.List.Count;
            //}
            //Console.WriteLine("Avg list size: {0}", (double)waka / (double)mPatches.Count);
            Console.WriteLine((DateTime.Now - t).TotalMilliseconds);
            /*prof*/ sw.Reset();
            mLogger.Info("Update", "Constructing system of linear equations ...");
            LabeledDataset <double, SparseVector <double> > lsqrDs = new LabeledDataset <double, SparseVector <double> >();

            Vector2D[] layout = new Vector2D[mDataset.Count - mKClust];
            foreach (Patch patch in mPatches)
            {
                int count = Math.Min(patch.List.Count, mKNn);
                SparseVector <double> eq = new SparseVector <double>();
                double wgt = 1.0 / (double)count;
                for (int i = 0; i < count; i++)
                {
                    eq.InnerIdx.Add(patch.List[i].Dat.Idx);
                    eq.InnerDat.Add(-wgt);
                }
                eq.InnerIdx.Sort(); // *** sort only indices
                eq[patch.Idx] = 1;
                lsqrDs.Add(0, eq);
            }
            for (int i = mDataset.Count - mKClust, j = 0; i < mDataset.Count; i++, j++)
            {
                SparseVector <double> eq = new SparseVector <double>(new IdxDat <double>[] { new IdxDat <double>(i, 1) });
                lsqrDs.Add(mRefPos[j].X, eq);
            }
            LSqrModel lsqr = new LSqrModel();

            mSolX.RemoveRange(0, numDequeue);
            double[] aux = new double[mKClust];
            mSolX.CopyTo(mSolX.Count - mKClust, aux, 0, mKClust);
            mSolX.RemoveRange(mSolX.Count - mKClust, mKClust);
            foreach (SparseVector <double> newVec in newInst)
            {
                mSolX.Add(0);
            }
            mSolX.AddRange(aux);
            lsqr.InitialSolution = mSolX.ToArray();
            lsqr.Train(lsqrDs);
            mSolX = lsqr.Solution.GetWritableCopy();
            //for (int i = 0; i < lsqr.InitialSolution.Length; i++)
            //{
            //    Console.WriteLine("{0}\t{1}", lsqr.InitialSolution[i], lsqr.Solution[i]);
            //}
            for (int i = 0; i < layout.Length; i++)
            {
                layout[i].X = lsqr.Solution[i];
            }
            for (int i = lsqrDs.Count - mKClust, j = 0; i < lsqrDs.Count; i++, j++)
            {
                lsqrDs[i].Label = mRefPos[j].Y;
            }
            mSolY.RemoveRange(0, numDequeue);
            aux = new double[mKClust];
            mSolY.CopyTo(mSolY.Count - mKClust, aux, 0, mKClust);
            mSolY.RemoveRange(mSolY.Count - mKClust, mKClust);
            foreach (SparseVector <double> newVec in newInst)
            {
                mSolY.Add(0);
            }
            mSolY.AddRange(aux);
            lsqr.InitialSolution = mSolY.ToArray();
            lsqr.Train(lsqrDs);
            mSolY = lsqr.Solution.GetWritableCopy();
            for (int i = 0; i < layout.Length; i++)
            {
                layout[i].Y = lsqr.Solution[i];
            }
            /*prof*/ sw.Save("lsqr.txt", _count);
            // -----------------------------------------------------------------
            // make ptInfo
            // -----------------------------------------------------------------
            ptInfo = new PtInfo[layout.Length];
            int ii = 0;

            foreach (Vector2D pt in layout)
            {
                ptInfo[ii]     = new PtInfo();
                ptInfo[ii].X   = pt.X;
                ptInfo[ii].Y   = pt.Y;
                ptInfo[ii].Vec = mDataset[ii];
                ii++;
            }
            // -----------------------------------------------------------------
            return(settings == null ? layout : settings.AdjustLayout(layout));
        }
Ejemplo n.º 2
0
        public Vector2D[] ComputeLayout(LayoutSettings settings)
        {
            UnlabeledDataset <SparseVector <double> > dataset = new UnlabeledDataset <SparseVector <double> >(mDataset);

            // clustering
            mLogger.Info("ComputeLayout", "Clustering ...");
            KMeansFast kMeans = new KMeansFast(mKClust);

            kMeans.Eps    = mKMeansEps;
            kMeans.Random = mRandom;
            kMeans.Trials = 1;
            ClusteringResult clustering = kMeans.Cluster(mDataset); // throws ArgumentValueException
            // determine reference instances
            UnlabeledDataset <SparseVector <double> > dsRefInst = new UnlabeledDataset <SparseVector <double> >();

            foreach (Cluster cluster in clustering.Roots)
            {
                SparseVector <double> centroid
                    = cluster.Items.Count > 0 ? cluster.ComputeCentroid(mDataset, CentroidType.NrmL2) : new SparseVector <double>();
                dsRefInst.Add(centroid); // dataset of reference instances
                dataset.Add(centroid);   // add centroids to the main dataset
            }
            // position reference instances
            mLogger.Info("ComputeLayout", "Positioning reference instances ...");
            SparseMatrix <double>    simMtx = ModelUtils.GetDotProductSimilarity(dsRefInst, mSimThresh, /*fullMatrix=*/ false);
            StressMajorizationLayout sm     = new StressMajorizationLayout(dsRefInst.Count, new DistFunc(simMtx));

            sm.Random = mRandom;
            Vector2D[] centrPos = sm.ComputeLayout();
            // k-NN
            mLogger.Info("ComputeLayout", "Computing similarities ...");
            simMtx = ModelUtils.GetDotProductSimilarity(dataset, mSimThresh, /*fullMatrix=*/ true);
            mLogger.Info("ComputeLayout", "Constructing system of linear equations ...");
            LabeledDataset <double, SparseVector <double> > lsqrDs = new LabeledDataset <double, SparseVector <double> >();

            foreach (IdxDat <SparseVector <double> > simMtxRow in simMtx)
            {
                if (simMtxRow.Dat.Count <= 1)
                {
                    mLogger.Warn("ComputeLayout", "Instance #{0} has no neighborhood.", simMtxRow.Idx);
                }
                ArrayList <KeyDat <double, int> > knn = new ArrayList <KeyDat <double, int> >(simMtxRow.Dat.Count);
                foreach (IdxDat <double> item in simMtxRow.Dat)
                {
                    if (item.Idx != simMtxRow.Idx)
                    {
                        knn.Add(new KeyDat <double, int>(item.Dat, item.Idx));
                    }
                }
                knn.Sort(DescSort <KeyDat <double, int> > .Instance);
                int count = Math.Min(knn.Count, mKNN);
                SparseVector <double> eq = new SparseVector <double>();
                double wgt = 1.0 / (double)count;
                for (int i = 0; i < count; i++)
                {
                    eq.InnerIdx.Add(knn[i].Dat);
                    eq.InnerDat.Add(-wgt);
                }
                eq.InnerIdx.Sort(); // *** sort only indices
                eq[simMtxRow.Idx] = 1;
                lsqrDs.Add(0, eq);
            }
            Vector2D[] layout = new Vector2D[dataset.Count - mKClust];
            for (int i = dataset.Count - mKClust, j = 0; i < dataset.Count; i++, j++)
            {
                SparseVector <double> eq = new SparseVector <double>(new IdxDat <double>[] { new IdxDat <double>(i, 1) });
                lsqrDs.Add(centrPos[j].X, eq);
            }
            LSqrModel lsqr = new LSqrModel();

            lsqr.Train(lsqrDs);
            for (int i = 0; i < layout.Length; i++)
            {
                layout[i].X = lsqr.Solution[i];
            }
            for (int i = lsqrDs.Count - mKClust, j = 0; i < lsqrDs.Count; i++, j++)
            {
                lsqrDs[i].Label = centrPos[j].Y;
            }
            lsqr.Train(lsqrDs);
            for (int i = 0; i < layout.Length; i++)
            {
                layout[i].Y = lsqr.Solution[i];
            }
            return(settings == null ? layout : settings.AdjustLayout(layout));
        }
        public Vector2D[] ComputeLayout(LayoutSettings settings)
        {
            // clustering
            mLogger.Info("ComputeLayout", "Clustering ...");
            mKMeans        = new IncrementalKMeans(mKClust);
            mKMeans.Eps    = mKMeansEps;
            mKMeans.Random = mRandom;
            mKMeans.Trials = 3;
            ClusteringResult clustering = mKMeans.Cluster(mDataset); // throws ArgumentValueException
            // determine reference instances
            UnlabeledDataset <SparseVector <double> > dsRefInst = new UnlabeledDataset <SparseVector <double> >();

            foreach (SparseVector <double> centroid in mKMeans.GetCentroids())
            {
                dsRefInst.Add(centroid); // dataset of reference instances
                mDataset.Add(centroid);  // add centroids to the main dataset
            }
            // position reference instances
            mLogger.Info("ComputeLayout", "Positioning reference instances ...");
            SparseMatrix <double>    simMtx = ModelUtils.GetDotProductSimilarity(dsRefInst, mSimThresh, /*fullMatrix=*/ false);
            StressMajorizationLayout sm     = new StressMajorizationLayout(dsRefInst.Count, new DistFunc(simMtx));

            sm.Random   = mRandom;
            sm.MaxSteps = int.MaxValue;
            sm.MinDiff  = 0.00001;
            mRefPos     = sm.ComputeLayout();
            // k-NN
            mLogger.Info("ComputeLayout", "Computing similarities ...");
            simMtx = ModelUtils.GetDotProductSimilarity(mDataset, mSimThresh, /*fullMatrix=*/ true);
            mLogger.Info("ComputeLayout", "Constructing system of linear equations ...");
            LabeledDataset <double, SparseVector <double> > lsqrDs = new LabeledDataset <double, SparseVector <double> >();

            mPatches = new ArrayList <Patch>(mDataset.Count);
            for (int i = 0; i < mDataset.Count; i++)
            {
                mPatches.Add(new Patch(i));
            }
            foreach (IdxDat <SparseVector <double> > simMtxRow in simMtx)
            {
                if (simMtxRow.Dat.Count <= 1)
                {
                    mLogger.Warn("ComputeLayout", "Instance #{0} has no neighborhood.", simMtxRow.Idx);
                }
                ArrayList <KeyDat <double, int> > knn = new ArrayList <KeyDat <double, int> >(simMtxRow.Dat.Count);
                foreach (IdxDat <double> item in simMtxRow.Dat)
                {
                    if (item.Idx != simMtxRow.Idx)
                    {
                        knn.Add(new KeyDat <double, int>(item.Dat, item.Idx));
                    }
                }
                knn.Sort(DescSort <KeyDat <double, int> > .Instance);
                int count = Math.Min(knn.Count, mKNnExt);
                for (int i = 0; i < count; i++)
                {
                    mPatches[simMtxRow.Idx].List.Add(new KeyDat <double, Patch>(knn[i].Key, mPatches[knn[i].Dat]));
                }
                mPatches[simMtxRow.Idx].ProcessList();
                count = Math.Min(knn.Count, mKNn);
                SparseVector <double> eq = new SparseVector <double>();
                double wgt = 1.0 / (double)count;
                for (int i = 0; i < count; i++)
                {
                    eq.InnerIdx.Add(knn[i].Dat);
                    eq.InnerDat.Add(-wgt);
                }
                eq.InnerIdx.Sort(); // *** sort only indices
                eq[simMtxRow.Idx] = 1;
                lsqrDs.Add(0, eq);
            }
            Vector2D[] layout = new Vector2D[mDataset.Count - mKClust];
            for (int i = mDataset.Count - mKClust, j = 0; i < mDataset.Count; i++, j++)
            {
                SparseVector <double> eq = new SparseVector <double>(new IdxDat <double>[] { new IdxDat <double>(i, 1) });
                lsqrDs.Add(mRefPos[j].X, eq);
            }
            LSqrModel lsqr = new LSqrModel();

            lsqr.Train(lsqrDs);
            mSolX = lsqr.Solution.GetWritableCopy();
            for (int i = 0; i < layout.Length; i++)
            {
                layout[i].X = lsqr.Solution[i];
            }
            for (int i = lsqrDs.Count - mKClust, j = 0; i < lsqrDs.Count; i++, j++)
            {
                lsqrDs[i].Label = mRefPos[j].Y;
            }
            lsqr.Train(lsqrDs);
            mSolY = lsqr.Solution.GetWritableCopy();
            for (int i = 0; i < layout.Length; i++)
            {
                layout[i].Y = lsqr.Solution[i];
            }
            return(settings == null ? layout : settings.AdjustLayout(layout));
        }