Exemple #1
0
 public static void ClusterToXML(Cluster cluster, String path)
 {
     XmlSerializer serializer = new XmlSerializer(typeof(Cluster));
     TextWriter textWriter = new StreamWriter(path + "\\" + cluster.clusterCode + ".xml");
     serializer.Serialize(textWriter, cluster);
     textWriter.Close();
 }
Exemple #2
0
        // stockList is the list of stocks which will be processed by this algorithm, stocks which has too little price ticks are omitted
        // numberOfClusters is the number of clusters that you are willing to seperatate the stocks
        // minNoOfTicks is the expected minimum number of ticks that the stock in stockList possess
        // additionalArgs - number of ticks in a defined sequence
        public static List<Cluster> Cluster(List<Stock> stockList, int numberOfClusters, int minTimeInterval, string additionalArgs)
        {
            int numberOfGenus = 0;
            int numberInSequence = 0;

            // check the number of genus
            foreach (Stock eachStock in stockList)
            {
                foreach (Tick eachTick in eachStock.priceList)
                {
                    numberOfGenus = ((GenusTick)eachTick).degreeOfChange > numberOfGenus ?
                        ((GenusTick)eachTick).degreeOfChange : numberOfGenus;
                }
                if (numberOfGenus > 0) break;
            }

            if (numberOfGenus == 0)
            {
                return new List<Cluster>();
            }

            numberOfGenus = numberOfGenus * 2 + 1;

            // eliminate the error of yahoo that extra data points may have been given
            foreach (Stock eachStock in stockList)
            {
                int noOfExtraTicks = eachStock.priceList.Count - minTimeInterval;
                for (int i = 0; i < noOfExtraTicks; i++)
                {
                    // removing the front element, not removing the i-th element
                    eachStock.priceList.RemoveAt(0);
                }
            }

            // additional question, number of ticks in a defined sequence
            if (!int.TryParse(additionalArgs, out numberInSequence))
            {
                Console.WriteLine("\nNumber of ticks in a denfined sequence: ");
                numberInSequence = int.Parse(Console.ReadLine());
            }

            // determine all possible combination in a sequence
            List<string> possibleSequences = SequenceString(numberOfGenus, numberInSequence);
            List<Dictionary<string, double>> clustersCenterList = null;

            // count the occourance of sequences of each stocks
            Dictionary<Stock, Dictionary<string, int>> masterDict1 = new Dictionary<Stock, Dictionary<string, int>>();

            foreach (Stock eachStock in stockList)
            {
                Dictionary<string, int> stockSequenceDict = SeqListToIntDict(possibleSequences);

                // count the occourance of sequences of each stocks
                for (int i = 0; i < eachStock.priceList.Count - numberInSequence; i++)
                {
                    Tick[] ticks = new Tick[numberInSequence];
                    for (int j = 0; j < numberInSequence; j++)
                    {
                        ticks[j] = eachStock.priceList.ElementAt(i + j);
                    }

                    // int count = 0;
                    // string sequence = SequenceString(ticks);
                    // stockSequenceDict.TryGetValue(sequence, out count);
                    // stockSequenceDict.Remove(sequence);
                    // stockSequenceDict.Add(sequence, count + 1);
                    stockSequenceDict[SequenceString(ticks)] += 1;
                }

                Console.WriteLine();

                // output the result to sysout or log
                foreach (KeyValuePair<string, int> eachKYP in stockSequenceDict)
                {
                    if (eachKYP.Value != 0)
                    {
                        Console.WriteLine("SSEC: Stock " + eachStock.stockCode +
                            " has the sequence " + eachKYP.Key + " occurred " + eachKYP.Value + " times");
                    }
                }

                // add the the master table for future similarity calculations
                masterDict1.Add(eachStock, stockSequenceDict);
            }

            // define the clusters with randomly defining a stock into each of them
            Dictionary<Stock, int> masterDict2 = new Dictionary<Stock, int>(); // -- t
            Dictionary<Stock, int> masterDict3 = null; // -- t-1
            Dictionary<Stock, int> masterDict4 = null; // -- t-2
            Random random = new Random();
            int test = -1;

            for (int i = 0; i < numberOfClusters; i++)
            {
                Stock pointer = stockList.ElementAt(random.Next(stockList.Count));
                Console.WriteLine("SSEC: Stock " + pointer.stockCode + " randomly selected for cluster " + i);

                if (masterDict2.TryGetValue(pointer, out test))
                {
                    i--;
                    continue;
                }
                else
                {
                    masterDict2.Add(pointer, i);
                }
            }

            foreach (Stock eachStock in stockList)
            {
                if (!masterDict2.TryGetValue(eachStock, out test))
                {
                    masterDict2.Add(eachStock, -1);
                }
            }

            int minimalDistanceCluster;
            double minimalDistanceClusterDistance;
            double thisRoundDistance;
            bool clusterElementChanged = true;

            // the comparison is performed iteratively until no more element migration between clusters are observed
            for (int iteration = 1; clusterElementChanged; iteration++)
            {
                // initiation - to recalculate the means (centers) of each clusters
                clusterElementChanged = false;
                clustersCenterList = CalculateClusterCenter(possibleSequences, masterDict1, masterDict2, numberOfClusters);

                // perform similarity calculations - for each stocks in the list
                for (int i = 0; i < stockList.Count; i++)
                {
                    minimalDistanceCluster = int.MaxValue;
                    minimalDistanceClusterDistance = double.MaxValue;

                    // compare with the center of each clusters
                    for (int j = 0; j < numberOfClusters; j++)
                    {
                        thisRoundDistance = CalculateDistance(possibleSequences, clustersCenterList[j], masterDict1[stockList[i]]);
                        Console.WriteLine("SSEC: Stock " + stockList[i].stockCode + " vs cluster " + j + ", iteration " + iteration + ", distance " + thisRoundDistance);

                        if (thisRoundDistance < minimalDistanceClusterDistance)
                        {
                            minimalDistanceCluster = j;
                            minimalDistanceClusterDistance = thisRoundDistance;
                        }
                    }

                    // if the result is different from the master table, that means the reuslts has been changed
                    if (masterDict2[stockList[i]] != minimalDistanceCluster)
                    {
                        masterDict2[stockList[i]] = minimalDistanceCluster;
                        clusterElementChanged = true;
                    }
                }

                // empty cluster check
                for (int i = 0; i < numberOfClusters; i++)
                {
                    if (DirtyHelper.CheckWetherClusterHasNoStock(masterDict2, i))
                    {
                        while (true)
                        {
                            int tobe = new Random().Next(stockList.Count);
                            if (!DirtyHelper.CheckWetherThisStockIsALonelyStock(masterDict2, masterDict2[stockList[tobe]]))
                            {
                                masterDict2[stockList[tobe]] = i;
                                break;
                            }
                        }
                    }
                }

                // infinite loop check
                if (CompareDictionary(masterDict2, masterDict3) || CompareDictionary(masterDict2, masterDict4))
                {
                    clusterElementChanged = false;
                    Console.WriteLine("\nSSEC: Infinite loop detected, will not go into next loop.");
                }
                else
                {
                    // if clusters are ok, copy as temp and go to next iteration
                    masterDict4 = masterDict3;
                    masterDict3 = CloneDictionary(masterDict2);
                }

                if (iteration == 50)
                {
                    throw new Exception("more than 100");
                }

                // **********
                // Note: massive debug logging here, consider refactoring
                // **********

                Console.WriteLine("SSEC: Iteration " + iteration + " done");

                if (iteration % 3 == 0 || !clusterElementChanged)
                {
                    Console.WriteLine("\nSSEC: Iteration " + iteration + " result");

                    foreach (KeyValuePair<Stock, int> eachKYP in masterDict2)
                    {
                        Console.Write("SSEC: Stock " + eachKYP.Key.stockCode + ", cluster ");

                        if (masterDict4 != null)
                        {
                            Console.Write(masterDict4[eachKYP.Key] + "->");
                        }

                        if (masterDict3 != null)
                        {
                            Console.Write(masterDict3[eachKYP.Key] + "->");
                        }

                        Console.WriteLine(eachKYP.Value);
                    }

                    for (int i = 0; i < numberOfClusters; i++)
                    {
                        Console.WriteLine("SSEC: Cluster " + i + " has " + NumberofElements(masterDict2, i) + " stocks.");
                    }
                }

                Console.WriteLine();
            }

            // populate a list of clusters in returning format
            List<Cluster> toReturn = new List<Cluster>();

            for (int i = 0; i < numberOfClusters; i++)
            {
                Cluster thisCluster = new Cluster();
                thisCluster.clusterCode = i + 1;
                thisCluster.stockCodeList = new List<int>();
                thisCluster.centroid = new List<Tick>();

                // propulate the stock codes
                foreach (KeyValuePair<Stock, int> eachStockClusterKYP in masterDict2)
                {
                    if (eachStockClusterKYP.Value == i)
                    {
                        thisCluster.stockCodeList.Add(eachStockClusterKYP.Key.stockCode);
                    }
                }

                foreach (KeyValuePair<string, double> seqOccuranceKYP in clustersCenterList[i])
                {
                    thisCluster.centroid.Add(new FakeTick(seqOccuranceKYP.Key, seqOccuranceKYP.Value));
                }

                toReturn.Add(thisCluster);
            }

            return toReturn;
        }
        private static List<Cluster> Cluster(Dictionary<Stock, int> clusters, int numberOfClusters, int minTimeInterval)
        {
            List<KeyValuePair<Stock, int>> stocksList = clusters.ToList();
            List<List<Tick>> centroidsList = new List<List<Tick>>();
            List<int> randomList = new List<int>();
            Random random = new Random();

            // temp variables
            int randomInt;
            int tempInt;
            double tempDbl;
            Stock loopingStock;

            // by blocking the initial assigned element to move between clusters, there may be cases that the 
            // algorithm will move other stocks between 2 clusters forever and try to achieve the optimum. By
            // detecting whether the current stock cluster mapping same as in the last 2 loops, we can break
            // the program if this case is encoutered.
            Dictionary<Stock, int> clusters_oneTimeBefore = null;
            Dictionary<Stock, int> clusters_twoTimeBefore = null;

            // randomly assign some stocks into first element of clusters
            for (int i = 0; i < numberOfClusters; i++)
            {
                do
                {
                    randomInt = random.Next(stocksList.Count);
                    clusters.TryGetValue(stocksList.ElementAt(randomInt).Key, out tempInt);
                } while (tempInt != 0);

                loopingStock = stocksList.ElementAt(randomInt).Key;
                clusters.Remove(loopingStock);
                clusters.Add(loopingStock, i + 1);

                randomList.Add(loopingStock.stockCode);
                Console.WriteLine("K-mean: Stock " + loopingStock.stockCode + " randomly selected for cluster " + (i + 1));

                List<Tick> ticksList = new List<Tick>();

                // copy the historical price of the stocks to become centroids of clusters
                for (int j = loopingStock.priceList.Count - minTimeInterval; j < loopingStock.priceList.Count; j++)
                {
                    NumericTick newTick = new NumericTick();
                    newTick.change = ((NumericTick)loopingStock.priceList.ElementAt(j)).change;
                    newTick.Time = loopingStock.priceList.ElementAt(j).Time;
                    ticksList.Add(newTick);
                }

                centroidsList.Add(ticksList);
            }

            // compare each stock with cluster centroids
            bool exit = false;

            // debug counter, to indicate which cluster we are in
            randomInt = 0;

            while (!exit)
            {
                randomInt++;
                exit = true;

                foreach (KeyValuePair<Stock, int> stockKYP in stocksList)
                {
                    double minDistance = 0.0;
                    int minDistanceCluster = 0;

                    for (int i = 0; i < centroidsList.Count; i++)
                    {
                        tempDbl = Distance(stockKYP.Key.priceList, centroidsList.ElementAt(i),
                            stockKYP.Key.stockCode + "", (i + 1) + "", randomInt + "");

                        if (minDistanceCluster == 0 || tempDbl < minDistance)
                        {
                            minDistance = tempDbl;
                            minDistanceCluster = i + 1;
                        }
                    }

                    // if any cluster assignment of a stock changed, iterate the loop again
                    int j;
                    clusters.TryGetValue(stockKYP.Key, out j);

                    if (!randomList.Exists(delegate(int k) { return k == stockKYP.Key.stockCode; }))
                    {
                        if (j != minDistanceCluster)
                        {
                            clusters.Remove(stockKYP.Key);
                            clusters.Add(stockKYP.Key, minDistanceCluster);
                            exit = false;
                        }
                    }
                    else
                    {
                        Console.WriteLine("K-mean: Initially selected element, distance to debugging purpose only.");
                    }
                }

                // recalculate centroid
                List<Tick> ticksList;
                int memberInCluster;
                centroidsList.Clear();

                for (int i = 0; i < numberOfClusters; i++)
                {
                    ticksList = new List<Tick>();
                    memberInCluster = 0;
                    for (int j = 0; j < minTimeInterval; j++)
                    {
                        ticksList.Add(new NumericTick());
                    }

                    foreach (KeyValuePair<Stock, int> stockKYP in clusters)
                    {
                        if (stockKYP.Value == (i + 1))
                        {
                            loopingStock = stockKYP.Key;
                            memberInCluster++;

                            int k = loopingStock.priceList.Count - minTimeInterval;
                            for (int j = k; j < loopingStock.priceList.Count; j++)
                            {
                                // **********
                                // Note: healthcheck should be made to confirm consistancy of date values
                                // **********

                                NumericTick newTick = (NumericTick)ticksList.ElementAt(j - k);
                                newTick.change += ((NumericTick)loopingStock.priceList.ElementAt(j)).change;
                                newTick.Time = loopingStock.priceList.ElementAt(j).Time;
                            }
                        }
                    }

                    foreach (NumericTick eachTick in ticksList)
                    {
                        eachTick.change = eachTick.change / memberInCluster;
                    }
                    centroidsList.Add(ticksList);
                }

                // infinite loop check
                if (CompareDictionary(clusters, clusters_oneTimeBefore) || CompareDictionary(clusters, clusters_twoTimeBefore))
                {
                    exit = true;
                    Console.WriteLine("\nK-mean: Infinite loop detected, will not go into next loop.");
                }
                else
                {
                    // if clusters are ok, copy as temp and go to next iteration
                    clusters_twoTimeBefore = clusters_oneTimeBefore;
                    clusters_oneTimeBefore = CloneDictionary(clusters);
                }

                // **********
                // Note: massive debug logging here, consider refactoring
                // **********

                Console.WriteLine("\nK-mean: Iteration " + randomInt + " result");

                foreach (KeyValuePair<Stock, int> eachKYP in clusters)
                {
                    Console.WriteLine("K-mean: Stock " + eachKYP.Key.stockCode + " belongs to cluster " + eachKYP.Value);
                }

                for (int i = 1; i <= numberOfClusters; i++)
                {
                    Console.WriteLine("K-mean: Cluster " + i + " has " + NumberofElements(clusters, i) + " stocks.");
                }

                Console.WriteLine();
            }

            // return calculation result
            List<Cluster> clusterList = new List<Cluster>();

            for (int i = 0; i < centroidsList.Count; i++)
            {
                Cluster tempCluster = new Cluster();
                tempCluster.centroid = centroidsList.ElementAt(i);
                tempCluster.stockCodeList = new List<int>();
                tempCluster.clusterCode = i + 1;

                foreach (KeyValuePair<Stock, int> eachKYP in clusters)
                {
                    if (eachKYP.Value == tempCluster.clusterCode)
                    {
                        tempCluster.stockCodeList.Add(eachKYP.Key.stockCode);
                    }
                }

                clusterList.Add(tempCluster);
            }

            return clusterList;
        }
        public static List<Cluster> Cluster(List<Stock> stockList, int numberOfClusters, int minTimeInterval, string additionalArgs)
        {
            SIMILARITY_MULTIPER = double.Parse(additionalArgs);
            int numberOfGenus = 0;

            // check the number of genus
            foreach (Stock eachStock in stockList)
            {
                foreach (Tick eachTick in eachStock.priceList)
                {
                    numberOfGenus = ((GenusTick)eachTick).degreeOfChange > numberOfGenus ?
                        ((GenusTick)eachTick).degreeOfChange : numberOfGenus;
                }
                if (numberOfGenus > 0) break;
            }

            if (numberOfGenus == 0)
            {
                return new List<Cluster>();
            }

            numberOfGenus = numberOfGenus * 2 + 1;

            // eliminate the error of yahoo that extra data points may have been given
            foreach (Stock eachStock in stockList)
            {
                int noOfExtraTicks = eachStock.priceList.Count - minTimeInterval;
                for (int i = 0; i < noOfExtraTicks; i++)
                {
                    // removing the front element, not removing the i-th element
                    eachStock.priceList.RemoveAt(0);
                }
            }

            // build up the similarity table, this will be used for kmean
            Dictionary<string, double> similarityTable = GenerateAndDetect(numberOfGenus, minTimeInterval, stockList);

            // select random stock code as cluster centers
            List<Cluster> clusterList = new List<Cluster>();
            List<Dictionary<int, double>> clustersCenterList = null;
            Dictionary<Stock, int> stockClusterMapping = new Dictionary<Stock, int>();
            Dictionary<Stock, int> stockClusterMapping_T1 = null;
            Dictionary<Stock, int> stockClusterMapping_T2 = null;
            Random random = new Random();
            int test;

            for (int i = 0; i < numberOfClusters; i++)
            {
                Stock pointer = stockList.ElementAt(random.Next(stockList.Count));
                Console.WriteLine("MOTIF: Stock " + pointer.stockCode + " randomly selected for cluster " + i);

                if (stockClusterMapping.TryGetValue(pointer, out test))
                {
                    i--;
                    continue;
                }
                else
                {
                    stockClusterMapping.Add(pointer, i);
                }
            }

            foreach (Stock eachStock in stockList)
            {
                if (!stockClusterMapping.TryGetValue(eachStock, out test))
                {
                    stockClusterMapping.Add(eachStock, -1);
                }
            }

            int minimalDistanceCluster;
            double minimalDistanceClusterDistance;
            double thisRoundDistance;
            bool clusterElementChanged = true;

            // the comparison is performed iteratively until no more element migration between clusters are observed
            for (int iteration = 1; clusterElementChanged; iteration++)
            {
                // initiation - to recalculate the means (centers) of each clusters
                clusterElementChanged = false;
                clustersCenterList = CalculateClusterCenter(numberOfClusters, stockClusterMapping, similarityTable);

                // perform similarity calculations - for each stocks in the list
                for (int i = 0; i < stockList.Count; i++)
                {
                    minimalDistanceCluster = int.MaxValue;
                    minimalDistanceClusterDistance = double.MaxValue;

                    // compare with the center of each clusters
                    for (int j = 0; j < numberOfClusters; j++)
                    {
                        thisRoundDistance = CalculateDistance(stockList[i].stockCode, stockList, clustersCenterList[j], similarityTable);
                        Console.WriteLine("MOTIF: Stock " + stockList[i].stockCode + " vs cluster " + j + ", iteration " + iteration + ", distance " + thisRoundDistance);

                        if (thisRoundDistance < minimalDistanceClusterDistance)
                        {
                            minimalDistanceCluster = j;
                            minimalDistanceClusterDistance = thisRoundDistance;
                        }
                    }

                    // if the result is different from the master table, that means the reuslts has been changed
                    if (stockClusterMapping[stockList[i]] != minimalDistanceCluster)
                    {
                        stockClusterMapping[stockList[i]] = minimalDistanceCluster;
                        clusterElementChanged = true;
                    }
                }

                // empty cluster check
                for (int i = 0; i < numberOfClusters; i++)
                {
                    if (DirtyHelper.CheckWetherClusterHasNoStock(stockClusterMapping, i))
                    {
                        while (true)
                        {
                            int tobe = new Random().Next(stockList.Count);
                            if (!DirtyHelper.CheckWetherThisStockIsALonelyStock(stockClusterMapping, stockClusterMapping[stockList[tobe]]))
                            {
                                stockClusterMapping[stockList[tobe]] = i;
                                break;
                            }
                        }
                    }
                }

                // infinite loop check
                if (CompareDictionary(stockClusterMapping, stockClusterMapping_T1) || CompareDictionary(stockClusterMapping, stockClusterMapping_T2))
                {
                    clusterElementChanged = false;
                    Console.WriteLine("\nMOTIF: Infinite loop detected, will not go into next loop.");
                }
                else
                {
                    // if clusters are ok, copy as temp and go to next iteration
                    stockClusterMapping_T2 = stockClusterMapping_T1;
                    stockClusterMapping_T1 = CloneDictionary(stockClusterMapping);
                }

                // **********
                // Note: massive debug logging here, consider refactoring
                // **********

                Console.WriteLine("MOTIF: Iteration " + iteration + " done");

                if (iteration % 3 == 0 || !clusterElementChanged)
                {
                    Console.WriteLine("\nMOTIF: Iteration " + iteration + " result");

                    foreach (KeyValuePair<Stock, int> eachKYP in stockClusterMapping)
                    {
                        Console.Write("MOTIF: Stock " + eachKYP.Key.stockCode + ", cluster ");

                        if (stockClusterMapping_T2 != null)
                        {
                            Console.Write(stockClusterMapping_T2[eachKYP.Key] + "->");
                        }

                        if (stockClusterMapping_T1 != null)
                        {
                            Console.Write(stockClusterMapping_T1[eachKYP.Key] + "->");
                        }

                        Console.WriteLine(eachKYP.Value);
                    }

                    Console.WriteLine();

                    for (int i = 0; i < numberOfClusters; i++)
                    {
                        Console.WriteLine("MOTIF: Cluster " + i + " has " + NumberofElements(stockClusterMapping, i) + " stocks.");
                    }
                }

                Console.WriteLine();
            }

            // populate a list of clusters in returning format
            List<Cluster> toReturn = new List<Cluster>();

            for (int i = 0; i < numberOfClusters; i++)
            {
                Cluster thisCluster = new Cluster();
                thisCluster.clusterCode = i + 1;
                thisCluster.stockCodeList = new List<int>();
                thisCluster.centroid = new List<Tick>();

                // propulate the stock codes
                foreach (KeyValuePair<Stock, int> eachStockClusterKYP in stockClusterMapping)
                {
                    if (eachStockClusterKYP.Value == i)
                    {
                        thisCluster.stockCodeList.Add(eachStockClusterKYP.Key.stockCode);
                    }
                }

                foreach (KeyValuePair<int, double> seqOccuranceKYP in clustersCenterList[i])
                {
                    thisCluster.centroid.Add(new FakeTick(seqOccuranceKYP.Key + "", seqOccuranceKYP.Value));
                }

                toReturn.Add(thisCluster);
            }

            return toReturn;
        }