//Project the covariance matrix A on to Omega: Y <- A * Omega
        //A = X' * X / n, where X = data - mean
        //Note that the covariance matrix is not computed explicitly
        private static void Project(IHost host, FeatureFloatVectorCursor.Factory cursorFactory, ref VBuffer <Float> mean, VBuffer <Float>[] omega, VBuffer <Float>[] y, out long numBad)
        {
            Contracts.AssertValue(host, "host");
            host.AssertNonEmpty(omega);
            host.Assert(Utils.Size(y) == omega.Length); // Size of Y and Omega: dimension x oversampled rank
            int numCols = omega.Length;

            for (int i = 0; i < y.Length; ++i)
            {
                VBufferUtils.Clear(ref y[i]);
            }

            bool  center = mean.IsDense;
            Float n      = 0;
            long  count  = 0;

            using (var pch = host.StartProgressChannel("Project covariance matrix"))
                using (var cursor = cursorFactory.Create())
                {
                    pch.SetHeader(new ProgressHeader(new[] { "rows" }), e => e.SetProgress(0, count));
                    while (cursor.MoveNext())
                    {
                        if (center)
                        {
                            VectorUtils.AddMult(ref cursor.Features, cursor.Weight, ref mean);
                        }
                        for (int i = 0; i < numCols; i++)
                        {
                            VectorUtils.AddMult(
                                ref cursor.Features,
                                cursor.Weight * VectorUtils.DotProduct(ref omega[i], ref cursor.Features),
                                ref y[i]);
                        }
                        n += cursor.Weight;
                        count++;
                    }
                    pch.Checkpoint(count);
                    numBad = cursor.SkippedRowCount;
                }

            Contracts.Check(n > 0, "Empty training data");
            Float invn = 1 / n;

            for (var i = 0; i < numCols; ++i)
            {
                VectorUtils.ScaleBy(ref y[i], invn);
            }

            if (center)
            {
                VectorUtils.ScaleBy(ref mean, invn);
                for (int i = 0; i < numCols; i++)
                {
                    VectorUtils.AddMult(ref mean, -VectorUtils.DotProduct(ref omega[i], ref mean), ref y[i]);
                }
            }
        }
        private PcaPredictor TrainCore(IChannel ch, RoleMappedData data, int dimension)
        {
            Host.AssertValue(ch);
            ch.AssertValue(data);

            if (_rank > dimension)
            {
                throw ch.Except("Rank ({0}) cannot be larger than the original dimension ({1})", _rank, dimension);
            }
            int oversampledRank = Math.Min(_rank + _oversampling, dimension);

            //exact: (size of the 2 big matrices + other minor allocations) / (2^30)
            Double memoryUsageEstimate = 2.0 * dimension * oversampledRank * sizeof(Float) / 1e9;

            if (memoryUsageEstimate > 2)
            {
                ch.Info("Estimate memory usage: {0:G2} GB. If running out of memory, reduce rank and oversampling factor.", memoryUsageEstimate);
            }

            var y    = Zeros(oversampledRank, dimension);
            var mean = _center ? VBufferUtils.CreateDense <Float>(dimension) : VBufferUtils.CreateEmpty <Float>(dimension);

            var omega = GaussianMatrix(oversampledRank, dimension, _seed);

            var  cursorFactory = new FeatureFloatVectorCursor.Factory(data, CursOpt.Features | CursOpt.Weight);
            long numBad;

            Project(Host, cursorFactory, ref mean, omega, y, out numBad);
            if (numBad > 0)
            {
                ch.Warning("Skipped {0} instances with missing features/weights during training", numBad);
            }

            //Orthonormalize Y in-place using stabilized Gram Schmidt algorithm.
            //Ref: https://en.wikipedia.org/wiki/Gram-Schmidt#Algorithm
            for (var i = 0; i < oversampledRank; ++i)
            {
                var v = y[i];
                VectorUtils.ScaleBy(ref v, 1 / VectorUtils.Norm(y[i]));

                // Make the next vectors in the queue orthogonal to the orthonormalized vectors.
                for (var j = i + 1; j < oversampledRank; ++j) //subtract the projection of y[j] on v.
                {
                    VectorUtils.AddMult(ref v, -VectorUtils.DotProduct(ref v, ref y[j]), ref y[j]);
                }
            }
            var q = y;     // q in QR decomposition.

            var b = omega; // reuse the memory allocated by Omega.

            Project(Host, cursorFactory, ref mean, q, b, out numBad);

            //Compute B2 = B' * B
            var b2 = new Float[oversampledRank * oversampledRank];

            for (var i = 0; i < oversampledRank; ++i)
            {
                for (var j = i; j < oversampledRank; ++j)
                {
                    b2[i * oversampledRank + j] = b2[j * oversampledRank + i] = VectorUtils.DotProduct(ref b[i], ref b[j]);
                }
            }

            Float[] smallEigenvalues;// eigenvectors and eigenvalues of the small matrix B2.
            Float[] smallEigenvectors;
            EigenUtils.EigenDecomposition(b2, out smallEigenvalues, out smallEigenvectors);
            PostProcess(b, smallEigenvalues, smallEigenvectors, dimension, oversampledRank);

            return(new PcaPredictor(Host, _rank, b, ref mean));
        }