private static List<Cluster> Cluster(Dictionary<Stock, int> clusters, int numberOfClusters, int minTimeInterval) { List<KeyValuePair<Stock, int>> stocksList = clusters.ToList(); List<List<Tick>> centroidsList = new List<List<Tick>>(); List<int> randomList = new List<int>(); Random random = new Random(); // temp variables int randomInt; int tempInt; double tempDbl; Stock loopingStock; // by blocking the initial assigned element to move between clusters, there may be cases that the // algorithm will move other stocks between 2 clusters forever and try to achieve the optimum. By // detecting whether the current stock cluster mapping same as in the last 2 loops, we can break // the program if this case is encoutered. Dictionary<Stock, int> clusters_oneTimeBefore = null; Dictionary<Stock, int> clusters_twoTimeBefore = null; // randomly assign some stocks into first element of clusters for (int i = 0; i < numberOfClusters; i++) { do { randomInt = random.Next(stocksList.Count); clusters.TryGetValue(stocksList.ElementAt(randomInt).Key, out tempInt); } while (tempInt != 0); loopingStock = stocksList.ElementAt(randomInt).Key; clusters.Remove(loopingStock); clusters.Add(loopingStock, i + 1); randomList.Add(loopingStock.stockCode); Console.WriteLine("K-mean: Stock " + loopingStock.stockCode + " randomly selected for cluster " + (i + 1)); List<Tick> ticksList = new List<Tick>(); // copy the historical price of the stocks to become centroids of clusters for (int j = loopingStock.priceList.Count - minTimeInterval; j < loopingStock.priceList.Count; j++) { NumericTick newTick = new NumericTick(); newTick.change = ((NumericTick)loopingStock.priceList.ElementAt(j)).change; newTick.Time = loopingStock.priceList.ElementAt(j).Time; ticksList.Add(newTick); } centroidsList.Add(ticksList); } // compare each stock with cluster centroids bool exit = false; // debug counter, to indicate which cluster we are in randomInt = 0; while (!exit) { randomInt++; exit = true; foreach (KeyValuePair<Stock, int> stockKYP in stocksList) { double minDistance = 0.0; int minDistanceCluster = 0; for (int i = 0; i < centroidsList.Count; i++) { tempDbl = Distance(stockKYP.Key.priceList, centroidsList.ElementAt(i), stockKYP.Key.stockCode + "", (i + 1) + "", randomInt + ""); if (minDistanceCluster == 0 || tempDbl < minDistance) { minDistance = tempDbl; minDistanceCluster = i + 1; } } // if any cluster assignment of a stock changed, iterate the loop again int j; clusters.TryGetValue(stockKYP.Key, out j); if (!randomList.Exists(delegate(int k) { return k == stockKYP.Key.stockCode; })) { if (j != minDistanceCluster) { clusters.Remove(stockKYP.Key); clusters.Add(stockKYP.Key, minDistanceCluster); exit = false; } } else { Console.WriteLine("K-mean: Initially selected element, distance to debugging purpose only."); } } // recalculate centroid List<Tick> ticksList; int memberInCluster; centroidsList.Clear(); for (int i = 0; i < numberOfClusters; i++) { ticksList = new List<Tick>(); memberInCluster = 0; for (int j = 0; j < minTimeInterval; j++) { ticksList.Add(new NumericTick()); } foreach (KeyValuePair<Stock, int> stockKYP in clusters) { if (stockKYP.Value == (i + 1)) { loopingStock = stockKYP.Key; memberInCluster++; int k = loopingStock.priceList.Count - minTimeInterval; for (int j = k; j < loopingStock.priceList.Count; j++) { // ********** // Note: healthcheck should be made to confirm consistancy of date values // ********** NumericTick newTick = (NumericTick)ticksList.ElementAt(j - k); newTick.change += ((NumericTick)loopingStock.priceList.ElementAt(j)).change; newTick.Time = loopingStock.priceList.ElementAt(j).Time; } } } foreach (NumericTick eachTick in ticksList) { eachTick.change = eachTick.change / memberInCluster; } centroidsList.Add(ticksList); } // infinite loop check if (CompareDictionary(clusters, clusters_oneTimeBefore) || CompareDictionary(clusters, clusters_twoTimeBefore)) { exit = true; Console.WriteLine("\nK-mean: Infinite loop detected, will not go into next loop."); } else { // if clusters are ok, copy as temp and go to next iteration clusters_twoTimeBefore = clusters_oneTimeBefore; clusters_oneTimeBefore = CloneDictionary(clusters); } // ********** // Note: massive debug logging here, consider refactoring // ********** Console.WriteLine("\nK-mean: Iteration " + randomInt + " result"); foreach (KeyValuePair<Stock, int> eachKYP in clusters) { Console.WriteLine("K-mean: Stock " + eachKYP.Key.stockCode + " belongs to cluster " + eachKYP.Value); } for (int i = 1; i <= numberOfClusters; i++) { Console.WriteLine("K-mean: Cluster " + i + " has " + NumberofElements(clusters, i) + " stocks."); } Console.WriteLine(); } // return calculation result List<Cluster> clusterList = new List<Cluster>(); for (int i = 0; i < centroidsList.Count; i++) { Cluster tempCluster = new Cluster(); tempCluster.centroid = centroidsList.ElementAt(i); tempCluster.stockCodeList = new List<int>(); tempCluster.clusterCode = i + 1; foreach (KeyValuePair<Stock, int> eachKYP in clusters) { if (eachKYP.Value == tempCluster.clusterCode) { tempCluster.stockCodeList.Add(eachKYP.Key.stockCode); } } clusterList.Add(tempCluster); } return clusterList; }
static void Main(string[] args) { int ticksCombining; string filePath; string outPath; Console.WriteLine("########################################"); Console.WriteLine("# Module:\tFYP_TimingStockData"); Console.WriteLine("# Author:\tThomas Tong"); Console.WriteLine("# Email:\[email protected]"); Console.WriteLine("########################################\n"); if (args.Length != 3 || !int.TryParse(args[0], out ticksCombining)) { do { Console.WriteLine("\nCombining number of ticks (positive number, -1 to combine daily, -2 monthly, -3 yearly: "); } while (!int.TryParse(Console.ReadLine(), out ticksCombining)); } if (args.Length != 3) { Console.WriteLine("\nEnter input path (e.g. C:\\Temp\\Stock): "); filePath = Console.ReadLine(); } else { filePath = args[1]; } if (args.Length != 3) { Console.WriteLine("\nEnter output path (e.g. C:\\Temp\\Stock): "); outPath = Console.ReadLine(); } else { outPath = args[2]; } Console.WriteLine(""); // here we start everything - create directory if not exist if (!Directory.Exists(outPath)) { Directory.CreateDirectory(outPath); } // remove all files in the output folder first - added 8 feb Array.ForEach(Directory.GetFiles(outPath), delegate(string path) { File.Delete(path); }); List<Stock> stockList = new List<Stock>(); foreach (string fileName in Directory.GetFiles(filePath)) { if (fileName.EndsWith(".xml")) { Console.Write("Reading " + fileName + "..."); Stock stock = XMLHelper.StockFromXML(fileName); Console.WriteLine(stock.priceList.Count + " tick"); stockList.Add(stock); } } Console.WriteLine("\nNumber of stocks to be processed: " + stockList.Count); List<Tick> newPriceList = null; NumericTick nt = null; UInt64 u_int_64 = 0; // start processing for (int h = 0; h < stockList.Count; h++) { Stock eachStock = stockList.ElementAt(h); newPriceList = new List<Tick>(); if (ticksCombining > 0) { for (int i = 1; i <= eachStock.priceList.Count; i++) { if (nt == null) { nt = new NumericTick(); nt.open = ((NumericTick)eachStock.priceList[i - 1]).open; nt.low = ((NumericTick)eachStock.priceList[i - 1]).low; } if (((NumericTick)eachStock.priceList[i - 1]).high > nt.high) { nt.high = ((NumericTick)eachStock.priceList[i - 1]).high; } if (((NumericTick)eachStock.priceList[i - 1]).low < nt.low) { nt.low = ((NumericTick)eachStock.priceList[i - 1]).low; } u_int_64 += UInt64.Parse(((NumericTick)eachStock.priceList[i - 1]).volume); // EOC / EOL - add to list in current iteration if (i % ticksCombining == 0 || i == eachStock.priceList.Count) { nt.close = ((NumericTick)eachStock.priceList[i - 1]).close; nt.adjustedClose = ((NumericTick)eachStock.priceList[i - 1]).adjustedClose; nt.volume = u_int_64 + ""; nt.id = Identifier.N; nt.Time = eachStock.priceList[i - 1].Time; newPriceList.Add(nt); nt = null; u_int_64 = 0; } } } else if (ticksCombining == -1) { // weekly DayOfWeek temp = eachStock.priceList[0].Time.DayOfWeek; for (int i = 1; i <= eachStock.priceList.Count; i++) { // EOC - add to list in next iteration if (eachStock.priceList[i - 1].Time.DayOfWeek < temp) { nt.id = Identifier.N; nt.Time = eachStock.priceList[i - 1].Time; nt.volume = u_int_64 + ""; newPriceList.Add(nt); nt = null; u_int_64 = 0; } if (nt == null) { nt = new NumericTick(); nt.open = ((NumericTick)eachStock.priceList[i - 1]).open; nt.low = ((NumericTick)eachStock.priceList[i - 1]).low; } if (((NumericTick)eachStock.priceList[i - 1]).high > nt.high) { nt.high = ((NumericTick)eachStock.priceList[i - 1]).high; } if (((NumericTick)eachStock.priceList[i - 1]).low < nt.low) { nt.high = ((NumericTick)eachStock.priceList[i - 1]).low; } nt.close = ((NumericTick)eachStock.priceList[i - 1]).close; nt.adjustedClose = ((NumericTick)eachStock.priceList[i - 1]).adjustedClose; temp = eachStock.priceList[i - 1].Time.DayOfWeek; u_int_64 += UInt64.Parse(((NumericTick)eachStock.priceList[i - 1]).volume); // EOL - add to list in current iteration if (i == eachStock.priceList.Count) { nt.id = Identifier.N; nt.Time = eachStock.priceList[i - 1].Time; nt.volume = u_int_64 + ""; newPriceList.Add(nt); nt = null; u_int_64 = 0; } } } else if (ticksCombining == -2) { // monthly int temp = eachStock.priceList[0].Time.Day; for (int i = 1; i <= eachStock.priceList.Count; i++) { // EOC - add to list in next iteration if (eachStock.priceList[i - 1].Time.Day < temp) { nt.id = Identifier.N; nt.Time = eachStock.priceList[i - 1].Time; nt.volume = u_int_64 + ""; newPriceList.Add(nt); nt = null; u_int_64 = 0; } if (nt == null) { nt = new NumericTick(); nt.open = ((NumericTick)eachStock.priceList[i - 1]).open; nt.low = ((NumericTick)eachStock.priceList[i - 1]).low; } if (((NumericTick)eachStock.priceList[i - 1]).high > nt.high) { nt.high = ((NumericTick)eachStock.priceList[i - 1]).high; } if (((NumericTick)eachStock.priceList[i - 1]).low < nt.low) { nt.high = ((NumericTick)eachStock.priceList[i - 1]).low; } nt.close = ((NumericTick)eachStock.priceList[i - 1]).close; nt.adjustedClose = ((NumericTick)eachStock.priceList[i - 1]).adjustedClose; temp = eachStock.priceList[i - 1].Time.Day; u_int_64 += UInt64.Parse(((NumericTick)eachStock.priceList[i - 1]).volume); // EOL - add to list in current iteration if (i == eachStock.priceList.Count) { nt.id = Identifier.N; nt.Time = eachStock.priceList[i - 1].Time; nt.volume = u_int_64 + ""; newPriceList.Add(nt); nt = null; u_int_64 = 0; } } } else if (ticksCombining == -3) { // yearly int temp = eachStock.priceList[0].Time.DayOfYear; for (int i = 1; i <= eachStock.priceList.Count; i++) { // EOC - add to list in next iteration if (eachStock.priceList[i - 1].Time.DayOfYear < temp) { nt.id = Identifier.N; nt.Time = eachStock.priceList[i - 1].Time; newPriceList.Add(nt); nt = null; } if (nt == null) { nt = new NumericTick(); nt.open = ((NumericTick)eachStock.priceList[i - 1]).open; nt.low = ((NumericTick)eachStock.priceList[i - 1]).low; } if (((NumericTick)eachStock.priceList[i - 1]).high > nt.high) { nt.high = ((NumericTick)eachStock.priceList[i - 1]).high; } if (((NumericTick)eachStock.priceList[i - 1]).low < nt.low) { nt.high = ((NumericTick)eachStock.priceList[i - 1]).low; } nt.close = ((NumericTick)eachStock.priceList[i - 1]).close; nt.adjustedClose = ((NumericTick)eachStock.priceList[i - 1]).adjustedClose; temp = eachStock.priceList[i - 1].Time.Day; // EOL - add to list in current iteration if (i == eachStock.priceList.Count) { nt.id = Identifier.N; nt.Time = eachStock.priceList[i - 1].Time; newPriceList.Add(nt); nt = null; } } } for (int i = 1; i < newPriceList.Count; i++) { ((NumericTick)newPriceList[i]).change = (((NumericTick)newPriceList[i]).close - ((NumericTick)newPriceList[i - 1]).close) * 100 / ((NumericTick)newPriceList[i - 1]).close; ((NumericTick)newPriceList[i]).adjustedChange = (((NumericTick)newPriceList[i]).adjustedClose - ((NumericTick)newPriceList[i - 1]).adjustedClose) * 100 / ((NumericTick)newPriceList[i - 1]).adjustedClose; } eachStock.priceList = newPriceList; Console.WriteLine("Writing stock " + eachStock.stockCode + "..."); XMLHelper.StockToXML(eachStock, outPath); } }
static void DownloadStockPrice(Stock stock, int d_from, int m_from, int y_from, int d_to, int m_to, int y_to) { // ********** // Note: price tick in yahoo finance is in reverse order // ********** String url = String.Format( "http://ichart.yahoo.com/table.csv?s={0:0000}.HK&a={1:00}&b={2}&c={3:0000}&d={4:00}&e={5}&f={6:0000}&g=d&ignore=.csv", stock.stockCode, m_from - 1, d_from, y_from, m_to - 1, d_to, y_to ); // ^HSI special handling if (stock.stockCode == 0) { url = url.Replace("0000.HK", "^HSI"); } String text = FetchWebsite(url); stock.priceList = new List<Tick>(); // ********** // Note: csv reader library by Sebastien Lorion: http://www.codeproject.com/KB/database/CsvReader.aspx // ********** NumericTick lastTick = new NumericTick(); // open the file "data.csv" which is a CSV file with headers using (CsvReader csv = new CsvReader(new StringReader(text), true)) { int fieldCount = csv.FieldCount; string[] headers = csv.GetFieldHeaders(); while (csv.ReadNextRecord()) { // code sample shipped with library // for (int i = 0; i < fieldCount; i++) // Console.Write(string.Format("{0} = {1};", // headers[i], csv[i])); // Console.WriteLine(); string tempDate = csv[0]; string tempOpen = csv[1]; string tempHigh = csv[2]; string tempLow = csv[3]; string tempClose = csv[4]; string volume = csv[5]; string adjClose = csv[6]; NumericTick newTick = new NumericTick(); newTick.Time = new DateTime( int.Parse(tempDate.Substring(0, 4)), int.Parse(tempDate.Substring(5, 2)), int.Parse(tempDate.Substring(8, 2))); newTick.change = 0; newTick.adjustedChange = 0; newTick.high = double.Parse(tempHigh); newTick.low = double.Parse(tempLow); newTick.open = double.Parse(tempOpen); newTick.close = double.Parse(tempClose); newTick.adjustedClose = double.Parse(adjClose); newTick.volume = volume; newTick.id = Identifier.N; if (newTick.close == 0) continue; // add a dummy tick if the price collected is not continuum if (stock.priceList.Count != 0) { for (int i = lastTick.Time.Subtract(newTick.Time).Days; i > 1; i--) { NumericTick dummyTick = new NumericTick(); dummyTick.Time = lastTick.Time.AddDays(-1); dummyTick.change = 0; dummyTick.adjustedChange = 0; dummyTick.volume = "0"; dummyTick.high = newTick.close; dummyTick.low = newTick.close; dummyTick.open = newTick.close; dummyTick.close = newTick.close; dummyTick.adjustedClose = newTick.adjustedClose; stock.priceList.Add(dummyTick); lastTick = dummyTick; } } stock.priceList.Add(newTick); lastTick = newTick; } } // reverse the tick order to ascending and calculate the change stock.priceList.Reverse(); double lastPrice = 0.0; double lastAdjPrice = 0.0; foreach (NumericTick eachTick in stock.priceList) { eachTick.change = ((eachTick.close - lastPrice) * 100); if (eachTick.change != 0) { eachTick.change /= lastPrice; } eachTick.adjustedChange = ((eachTick.adjustedClose - lastAdjPrice) * 100); if (eachTick.adjustedChange != 0) { eachTick.adjustedChange /= lastAdjPrice; } lastPrice = eachTick.close; lastAdjPrice = eachTick.adjustedClose; } }