public static void ClusterToXML(Cluster cluster, String path) { XmlSerializer serializer = new XmlSerializer(typeof(Cluster)); TextWriter textWriter = new StreamWriter(path + "\\" + cluster.clusterCode + ".xml"); serializer.Serialize(textWriter, cluster); textWriter.Close(); }
// stockList is the list of stocks which will be processed by this algorithm, stocks which has too little price ticks are omitted // numberOfClusters is the number of clusters that you are willing to seperatate the stocks // minNoOfTicks is the expected minimum number of ticks that the stock in stockList possess // additionalArgs - number of ticks in a defined sequence public static List<Cluster> Cluster(List<Stock> stockList, int numberOfClusters, int minTimeInterval, string additionalArgs) { int numberOfGenus = 0; int numberInSequence = 0; // check the number of genus foreach (Stock eachStock in stockList) { foreach (Tick eachTick in eachStock.priceList) { numberOfGenus = ((GenusTick)eachTick).degreeOfChange > numberOfGenus ? ((GenusTick)eachTick).degreeOfChange : numberOfGenus; } if (numberOfGenus > 0) break; } if (numberOfGenus == 0) { return new List<Cluster>(); } numberOfGenus = numberOfGenus * 2 + 1; // eliminate the error of yahoo that extra data points may have been given foreach (Stock eachStock in stockList) { int noOfExtraTicks = eachStock.priceList.Count - minTimeInterval; for (int i = 0; i < noOfExtraTicks; i++) { // removing the front element, not removing the i-th element eachStock.priceList.RemoveAt(0); } } // additional question, number of ticks in a defined sequence if (!int.TryParse(additionalArgs, out numberInSequence)) { Console.WriteLine("\nNumber of ticks in a denfined sequence: "); numberInSequence = int.Parse(Console.ReadLine()); } // determine all possible combination in a sequence List<string> possibleSequences = SequenceString(numberOfGenus, numberInSequence); List<Dictionary<string, double>> clustersCenterList = null; // count the occourance of sequences of each stocks Dictionary<Stock, Dictionary<string, int>> masterDict1 = new Dictionary<Stock, Dictionary<string, int>>(); foreach (Stock eachStock in stockList) { Dictionary<string, int> stockSequenceDict = SeqListToIntDict(possibleSequences); // count the occourance of sequences of each stocks for (int i = 0; i < eachStock.priceList.Count - numberInSequence; i++) { Tick[] ticks = new Tick[numberInSequence]; for (int j = 0; j < numberInSequence; j++) { ticks[j] = eachStock.priceList.ElementAt(i + j); } // int count = 0; // string sequence = SequenceString(ticks); // stockSequenceDict.TryGetValue(sequence, out count); // stockSequenceDict.Remove(sequence); // stockSequenceDict.Add(sequence, count + 1); stockSequenceDict[SequenceString(ticks)] += 1; } Console.WriteLine(); // output the result to sysout or log foreach (KeyValuePair<string, int> eachKYP in stockSequenceDict) { if (eachKYP.Value != 0) { Console.WriteLine("SSEC: Stock " + eachStock.stockCode + " has the sequence " + eachKYP.Key + " occurred " + eachKYP.Value + " times"); } } // add the the master table for future similarity calculations masterDict1.Add(eachStock, stockSequenceDict); } // define the clusters with randomly defining a stock into each of them Dictionary<Stock, int> masterDict2 = new Dictionary<Stock, int>(); // -- t Dictionary<Stock, int> masterDict3 = null; // -- t-1 Dictionary<Stock, int> masterDict4 = null; // -- t-2 Random random = new Random(); int test = -1; for (int i = 0; i < numberOfClusters; i++) { Stock pointer = stockList.ElementAt(random.Next(stockList.Count)); Console.WriteLine("SSEC: Stock " + pointer.stockCode + " randomly selected for cluster " + i); if (masterDict2.TryGetValue(pointer, out test)) { i--; continue; } else { masterDict2.Add(pointer, i); } } foreach (Stock eachStock in stockList) { if (!masterDict2.TryGetValue(eachStock, out test)) { masterDict2.Add(eachStock, -1); } } int minimalDistanceCluster; double minimalDistanceClusterDistance; double thisRoundDistance; bool clusterElementChanged = true; // the comparison is performed iteratively until no more element migration between clusters are observed for (int iteration = 1; clusterElementChanged; iteration++) { // initiation - to recalculate the means (centers) of each clusters clusterElementChanged = false; clustersCenterList = CalculateClusterCenter(possibleSequences, masterDict1, masterDict2, numberOfClusters); // perform similarity calculations - for each stocks in the list for (int i = 0; i < stockList.Count; i++) { minimalDistanceCluster = int.MaxValue; minimalDistanceClusterDistance = double.MaxValue; // compare with the center of each clusters for (int j = 0; j < numberOfClusters; j++) { thisRoundDistance = CalculateDistance(possibleSequences, clustersCenterList[j], masterDict1[stockList[i]]); Console.WriteLine("SSEC: Stock " + stockList[i].stockCode + " vs cluster " + j + ", iteration " + iteration + ", distance " + thisRoundDistance); if (thisRoundDistance < minimalDistanceClusterDistance) { minimalDistanceCluster = j; minimalDistanceClusterDistance = thisRoundDistance; } } // if the result is different from the master table, that means the reuslts has been changed if (masterDict2[stockList[i]] != minimalDistanceCluster) { masterDict2[stockList[i]] = minimalDistanceCluster; clusterElementChanged = true; } } // empty cluster check for (int i = 0; i < numberOfClusters; i++) { if (DirtyHelper.CheckWetherClusterHasNoStock(masterDict2, i)) { while (true) { int tobe = new Random().Next(stockList.Count); if (!DirtyHelper.CheckWetherThisStockIsALonelyStock(masterDict2, masterDict2[stockList[tobe]])) { masterDict2[stockList[tobe]] = i; break; } } } } // infinite loop check if (CompareDictionary(masterDict2, masterDict3) || CompareDictionary(masterDict2, masterDict4)) { clusterElementChanged = false; Console.WriteLine("\nSSEC: Infinite loop detected, will not go into next loop."); } else { // if clusters are ok, copy as temp and go to next iteration masterDict4 = masterDict3; masterDict3 = CloneDictionary(masterDict2); } if (iteration == 50) { throw new Exception("more than 100"); } // ********** // Note: massive debug logging here, consider refactoring // ********** Console.WriteLine("SSEC: Iteration " + iteration + " done"); if (iteration % 3 == 0 || !clusterElementChanged) { Console.WriteLine("\nSSEC: Iteration " + iteration + " result"); foreach (KeyValuePair<Stock, int> eachKYP in masterDict2) { Console.Write("SSEC: Stock " + eachKYP.Key.stockCode + ", cluster "); if (masterDict4 != null) { Console.Write(masterDict4[eachKYP.Key] + "->"); } if (masterDict3 != null) { Console.Write(masterDict3[eachKYP.Key] + "->"); } Console.WriteLine(eachKYP.Value); } for (int i = 0; i < numberOfClusters; i++) { Console.WriteLine("SSEC: Cluster " + i + " has " + NumberofElements(masterDict2, i) + " stocks."); } } Console.WriteLine(); } // populate a list of clusters in returning format List<Cluster> toReturn = new List<Cluster>(); for (int i = 0; i < numberOfClusters; i++) { Cluster thisCluster = new Cluster(); thisCluster.clusterCode = i + 1; thisCluster.stockCodeList = new List<int>(); thisCluster.centroid = new List<Tick>(); // propulate the stock codes foreach (KeyValuePair<Stock, int> eachStockClusterKYP in masterDict2) { if (eachStockClusterKYP.Value == i) { thisCluster.stockCodeList.Add(eachStockClusterKYP.Key.stockCode); } } foreach (KeyValuePair<string, double> seqOccuranceKYP in clustersCenterList[i]) { thisCluster.centroid.Add(new FakeTick(seqOccuranceKYP.Key, seqOccuranceKYP.Value)); } toReturn.Add(thisCluster); } return toReturn; }
private static List<Cluster> Cluster(Dictionary<Stock, int> clusters, int numberOfClusters, int minTimeInterval) { List<KeyValuePair<Stock, int>> stocksList = clusters.ToList(); List<List<Tick>> centroidsList = new List<List<Tick>>(); List<int> randomList = new List<int>(); Random random = new Random(); // temp variables int randomInt; int tempInt; double tempDbl; Stock loopingStock; // by blocking the initial assigned element to move between clusters, there may be cases that the // algorithm will move other stocks between 2 clusters forever and try to achieve the optimum. By // detecting whether the current stock cluster mapping same as in the last 2 loops, we can break // the program if this case is encoutered. Dictionary<Stock, int> clusters_oneTimeBefore = null; Dictionary<Stock, int> clusters_twoTimeBefore = null; // randomly assign some stocks into first element of clusters for (int i = 0; i < numberOfClusters; i++) { do { randomInt = random.Next(stocksList.Count); clusters.TryGetValue(stocksList.ElementAt(randomInt).Key, out tempInt); } while (tempInt != 0); loopingStock = stocksList.ElementAt(randomInt).Key; clusters.Remove(loopingStock); clusters.Add(loopingStock, i + 1); randomList.Add(loopingStock.stockCode); Console.WriteLine("K-mean: Stock " + loopingStock.stockCode + " randomly selected for cluster " + (i + 1)); List<Tick> ticksList = new List<Tick>(); // copy the historical price of the stocks to become centroids of clusters for (int j = loopingStock.priceList.Count - minTimeInterval; j < loopingStock.priceList.Count; j++) { NumericTick newTick = new NumericTick(); newTick.change = ((NumericTick)loopingStock.priceList.ElementAt(j)).change; newTick.Time = loopingStock.priceList.ElementAt(j).Time; ticksList.Add(newTick); } centroidsList.Add(ticksList); } // compare each stock with cluster centroids bool exit = false; // debug counter, to indicate which cluster we are in randomInt = 0; while (!exit) { randomInt++; exit = true; foreach (KeyValuePair<Stock, int> stockKYP in stocksList) { double minDistance = 0.0; int minDistanceCluster = 0; for (int i = 0; i < centroidsList.Count; i++) { tempDbl = Distance(stockKYP.Key.priceList, centroidsList.ElementAt(i), stockKYP.Key.stockCode + "", (i + 1) + "", randomInt + ""); if (minDistanceCluster == 0 || tempDbl < minDistance) { minDistance = tempDbl; minDistanceCluster = i + 1; } } // if any cluster assignment of a stock changed, iterate the loop again int j; clusters.TryGetValue(stockKYP.Key, out j); if (!randomList.Exists(delegate(int k) { return k == stockKYP.Key.stockCode; })) { if (j != minDistanceCluster) { clusters.Remove(stockKYP.Key); clusters.Add(stockKYP.Key, minDistanceCluster); exit = false; } } else { Console.WriteLine("K-mean: Initially selected element, distance to debugging purpose only."); } } // recalculate centroid List<Tick> ticksList; int memberInCluster; centroidsList.Clear(); for (int i = 0; i < numberOfClusters; i++) { ticksList = new List<Tick>(); memberInCluster = 0; for (int j = 0; j < minTimeInterval; j++) { ticksList.Add(new NumericTick()); } foreach (KeyValuePair<Stock, int> stockKYP in clusters) { if (stockKYP.Value == (i + 1)) { loopingStock = stockKYP.Key; memberInCluster++; int k = loopingStock.priceList.Count - minTimeInterval; for (int j = k; j < loopingStock.priceList.Count; j++) { // ********** // Note: healthcheck should be made to confirm consistancy of date values // ********** NumericTick newTick = (NumericTick)ticksList.ElementAt(j - k); newTick.change += ((NumericTick)loopingStock.priceList.ElementAt(j)).change; newTick.Time = loopingStock.priceList.ElementAt(j).Time; } } } foreach (NumericTick eachTick in ticksList) { eachTick.change = eachTick.change / memberInCluster; } centroidsList.Add(ticksList); } // infinite loop check if (CompareDictionary(clusters, clusters_oneTimeBefore) || CompareDictionary(clusters, clusters_twoTimeBefore)) { exit = true; Console.WriteLine("\nK-mean: Infinite loop detected, will not go into next loop."); } else { // if clusters are ok, copy as temp and go to next iteration clusters_twoTimeBefore = clusters_oneTimeBefore; clusters_oneTimeBefore = CloneDictionary(clusters); } // ********** // Note: massive debug logging here, consider refactoring // ********** Console.WriteLine("\nK-mean: Iteration " + randomInt + " result"); foreach (KeyValuePair<Stock, int> eachKYP in clusters) { Console.WriteLine("K-mean: Stock " + eachKYP.Key.stockCode + " belongs to cluster " + eachKYP.Value); } for (int i = 1; i <= numberOfClusters; i++) { Console.WriteLine("K-mean: Cluster " + i + " has " + NumberofElements(clusters, i) + " stocks."); } Console.WriteLine(); } // return calculation result List<Cluster> clusterList = new List<Cluster>(); for (int i = 0; i < centroidsList.Count; i++) { Cluster tempCluster = new Cluster(); tempCluster.centroid = centroidsList.ElementAt(i); tempCluster.stockCodeList = new List<int>(); tempCluster.clusterCode = i + 1; foreach (KeyValuePair<Stock, int> eachKYP in clusters) { if (eachKYP.Value == tempCluster.clusterCode) { tempCluster.stockCodeList.Add(eachKYP.Key.stockCode); } } clusterList.Add(tempCluster); } return clusterList; }
public static List<Cluster> Cluster(List<Stock> stockList, int numberOfClusters, int minTimeInterval, string additionalArgs) { SIMILARITY_MULTIPER = double.Parse(additionalArgs); int numberOfGenus = 0; // check the number of genus foreach (Stock eachStock in stockList) { foreach (Tick eachTick in eachStock.priceList) { numberOfGenus = ((GenusTick)eachTick).degreeOfChange > numberOfGenus ? ((GenusTick)eachTick).degreeOfChange : numberOfGenus; } if (numberOfGenus > 0) break; } if (numberOfGenus == 0) { return new List<Cluster>(); } numberOfGenus = numberOfGenus * 2 + 1; // eliminate the error of yahoo that extra data points may have been given foreach (Stock eachStock in stockList) { int noOfExtraTicks = eachStock.priceList.Count - minTimeInterval; for (int i = 0; i < noOfExtraTicks; i++) { // removing the front element, not removing the i-th element eachStock.priceList.RemoveAt(0); } } // build up the similarity table, this will be used for kmean Dictionary<string, double> similarityTable = GenerateAndDetect(numberOfGenus, minTimeInterval, stockList); // select random stock code as cluster centers List<Cluster> clusterList = new List<Cluster>(); List<Dictionary<int, double>> clustersCenterList = null; Dictionary<Stock, int> stockClusterMapping = new Dictionary<Stock, int>(); Dictionary<Stock, int> stockClusterMapping_T1 = null; Dictionary<Stock, int> stockClusterMapping_T2 = null; Random random = new Random(); int test; for (int i = 0; i < numberOfClusters; i++) { Stock pointer = stockList.ElementAt(random.Next(stockList.Count)); Console.WriteLine("MOTIF: Stock " + pointer.stockCode + " randomly selected for cluster " + i); if (stockClusterMapping.TryGetValue(pointer, out test)) { i--; continue; } else { stockClusterMapping.Add(pointer, i); } } foreach (Stock eachStock in stockList) { if (!stockClusterMapping.TryGetValue(eachStock, out test)) { stockClusterMapping.Add(eachStock, -1); } } int minimalDistanceCluster; double minimalDistanceClusterDistance; double thisRoundDistance; bool clusterElementChanged = true; // the comparison is performed iteratively until no more element migration between clusters are observed for (int iteration = 1; clusterElementChanged; iteration++) { // initiation - to recalculate the means (centers) of each clusters clusterElementChanged = false; clustersCenterList = CalculateClusterCenter(numberOfClusters, stockClusterMapping, similarityTable); // perform similarity calculations - for each stocks in the list for (int i = 0; i < stockList.Count; i++) { minimalDistanceCluster = int.MaxValue; minimalDistanceClusterDistance = double.MaxValue; // compare with the center of each clusters for (int j = 0; j < numberOfClusters; j++) { thisRoundDistance = CalculateDistance(stockList[i].stockCode, stockList, clustersCenterList[j], similarityTable); Console.WriteLine("MOTIF: Stock " + stockList[i].stockCode + " vs cluster " + j + ", iteration " + iteration + ", distance " + thisRoundDistance); if (thisRoundDistance < minimalDistanceClusterDistance) { minimalDistanceCluster = j; minimalDistanceClusterDistance = thisRoundDistance; } } // if the result is different from the master table, that means the reuslts has been changed if (stockClusterMapping[stockList[i]] != minimalDistanceCluster) { stockClusterMapping[stockList[i]] = minimalDistanceCluster; clusterElementChanged = true; } } // empty cluster check for (int i = 0; i < numberOfClusters; i++) { if (DirtyHelper.CheckWetherClusterHasNoStock(stockClusterMapping, i)) { while (true) { int tobe = new Random().Next(stockList.Count); if (!DirtyHelper.CheckWetherThisStockIsALonelyStock(stockClusterMapping, stockClusterMapping[stockList[tobe]])) { stockClusterMapping[stockList[tobe]] = i; break; } } } } // infinite loop check if (CompareDictionary(stockClusterMapping, stockClusterMapping_T1) || CompareDictionary(stockClusterMapping, stockClusterMapping_T2)) { clusterElementChanged = false; Console.WriteLine("\nMOTIF: Infinite loop detected, will not go into next loop."); } else { // if clusters are ok, copy as temp and go to next iteration stockClusterMapping_T2 = stockClusterMapping_T1; stockClusterMapping_T1 = CloneDictionary(stockClusterMapping); } // ********** // Note: massive debug logging here, consider refactoring // ********** Console.WriteLine("MOTIF: Iteration " + iteration + " done"); if (iteration % 3 == 0 || !clusterElementChanged) { Console.WriteLine("\nMOTIF: Iteration " + iteration + " result"); foreach (KeyValuePair<Stock, int> eachKYP in stockClusterMapping) { Console.Write("MOTIF: Stock " + eachKYP.Key.stockCode + ", cluster "); if (stockClusterMapping_T2 != null) { Console.Write(stockClusterMapping_T2[eachKYP.Key] + "->"); } if (stockClusterMapping_T1 != null) { Console.Write(stockClusterMapping_T1[eachKYP.Key] + "->"); } Console.WriteLine(eachKYP.Value); } Console.WriteLine(); for (int i = 0; i < numberOfClusters; i++) { Console.WriteLine("MOTIF: Cluster " + i + " has " + NumberofElements(stockClusterMapping, i) + " stocks."); } } Console.WriteLine(); } // populate a list of clusters in returning format List<Cluster> toReturn = new List<Cluster>(); for (int i = 0; i < numberOfClusters; i++) { Cluster thisCluster = new Cluster(); thisCluster.clusterCode = i + 1; thisCluster.stockCodeList = new List<int>(); thisCluster.centroid = new List<Tick>(); // propulate the stock codes foreach (KeyValuePair<Stock, int> eachStockClusterKYP in stockClusterMapping) { if (eachStockClusterKYP.Value == i) { thisCluster.stockCodeList.Add(eachStockClusterKYP.Key.stockCode); } } foreach (KeyValuePair<int, double> seqOccuranceKYP in clustersCenterList[i]) { thisCluster.centroid.Add(new FakeTick(seqOccuranceKYP.Key + "", seqOccuranceKYP.Value)); } toReturn.Add(thisCluster); } return toReturn; }