/// <summary> /// Split one cluster into two clusters according its mean /// </summary> /// <param name="iStart"></param> /// <param name="iEnd"></param> /// <param name="depth"></param> /// <param name="maxDepth"></param> private void SplitCluster(int iStart, int iEnd, int depth, int maxDepth) { if (iStart > iEnd) { return; } double mean, variance; ComputeVariables(iStart, iEnd, out mean, out variance); if (depth == maxDepth) { VQCluster c = new VQCluster(mean, variance, iStart, iEnd); vqClusters.Add(c); } else { //Split the cluster into two clusters according mean value int i; for (i = iStart; i <= iEnd; i++) { //The following data will be greater than mean value, so we split it here if (dataSet[i] > mean) { break; } } SplitCluster(iStart, i - 1, depth + 1, maxDepth); SplitCluster(i, iEnd, depth + 1, maxDepth); } }
/// <summary> /// Build codebook according given data set /// </summary> /// <param name="vqSize"></param> /// <returns></returns> public double BuildCodebook(int vqSize) { if (vqSize > dataSetSize) { Logger.WriteLine(Logger.Level.err, "VQ size should not be greater than data size."); return(-1); } Logger.WriteLine("Sorting data set (size: {0})...", dataSetSize); dataSet.Sort(0, dataSetSize); Logger.WriteLine("min={0}, max={1}", dataSet[0], dataSet[dataSetSize - 1]); //Set entire data as a single cluster, and then split it double mean, var; ComputeVariables(0, dataSetSize - 1, out mean, out var); VQCluster c = new VQCluster(mean, var, 0, dataSetSize - 1); vqClusters = new List <VQCluster>(); vqClusters.Add(c); //Split clusters according its variance values while (vqClusters.Count < vqSize) { int maxVarClusterId = MaxVarianceClusterId(); if (maxVarClusterId < 0) { break; // no more to split } //Split the cluster into two and remove the orginal one SplitCluster(vqClusters[maxVarClusterId].iStart, vqClusters[maxVarClusterId].iEnd, 0, 1); vqClusters.RemoveAt(maxVarClusterId); } //Adjust clusters according their mean values AdjustCluster(); //Final codebook vqSize = vqClusters.Count; codebook = new double[vqSize]; double distortion = 0; for (int i = 0; i < vqSize; i++) { codebook[i] = vqClusters[i].mean; for (int j = vqClusters[i].iStart; j <= vqClusters[i].iEnd; j++) { double diff = dataSet[j] - codebook[i]; distortion += diff * diff; } } distortion = Math.Sqrt(distortion / dataSetSize); return(distortion); }