Beispiel #1
0
 // input the tick objects, convert them into strings
 public static string SequenceString(Tick[] ticks)
 {
     string toReturn = "";
     foreach (Tick tick in ticks)
     {
         toReturn += ((GenusTick)tick).degreeOfChange >= 0 ? "+" + ((GenusTick)tick).degreeOfChange : ((GenusTick)tick).degreeOfChange + "";
     }
     return toReturn;
 }
Beispiel #2
0
 // assisting recursive method for the above public method interface
 private static void SequenceString(int numberOfGenus, int numberInSequence, int seqID, int[] intermidiate, List<string> resultList)
 {
     if (seqID == numberInSequence)
     {
         Tick[] ticks = new Tick[numberInSequence];
         for (int i = 0; i < numberInSequence; i++)
         {
             ticks[i] = new GenusTick(intermidiate[i]);
         }
         resultList.Add(SequenceString(ticks));
     }
     else
     {
         for (int i = 0; i < numberOfGenus; i++)
         {
             intermidiate[seqID] = i - (int)Math.Floor(numberOfGenus / 2.0);
             SequenceString(numberOfGenus, numberInSequence, seqID + 1, (int[])intermidiate.Clone(), resultList);
         }
     }
 }
Beispiel #3
0
        // stockList is the list of stocks which will be processed by this algorithm, stocks which has too little price ticks are omitted
        // numberOfClusters is the number of clusters that you are willing to seperatate the stocks
        // minNoOfTicks is the expected minimum number of ticks that the stock in stockList possess
        // additionalArgs - number of ticks in a defined sequence
        public static List<Cluster> Cluster(List<Stock> stockList, int numberOfClusters, int minTimeInterval, string additionalArgs)
        {
            int numberOfGenus = 0;
            int numberInSequence = 0;

            // check the number of genus
            foreach (Stock eachStock in stockList)
            {
                foreach (Tick eachTick in eachStock.priceList)
                {
                    numberOfGenus = ((GenusTick)eachTick).degreeOfChange > numberOfGenus ?
                        ((GenusTick)eachTick).degreeOfChange : numberOfGenus;
                }
                if (numberOfGenus > 0) break;
            }

            if (numberOfGenus == 0)
            {
                return new List<Cluster>();
            }

            numberOfGenus = numberOfGenus * 2 + 1;

            // eliminate the error of yahoo that extra data points may have been given
            foreach (Stock eachStock in stockList)
            {
                int noOfExtraTicks = eachStock.priceList.Count - minTimeInterval;
                for (int i = 0; i < noOfExtraTicks; i++)
                {
                    // removing the front element, not removing the i-th element
                    eachStock.priceList.RemoveAt(0);
                }
            }

            // additional question, number of ticks in a defined sequence
            if (!int.TryParse(additionalArgs, out numberInSequence))
            {
                Console.WriteLine("\nNumber of ticks in a denfined sequence: ");
                numberInSequence = int.Parse(Console.ReadLine());
            }

            // determine all possible combination in a sequence
            List<string> possibleSequences = SequenceString(numberOfGenus, numberInSequence);
            List<Dictionary<string, double>> clustersCenterList = null;

            // count the occourance of sequences of each stocks
            Dictionary<Stock, Dictionary<string, int>> masterDict1 = new Dictionary<Stock, Dictionary<string, int>>();

            foreach (Stock eachStock in stockList)
            {
                Dictionary<string, int> stockSequenceDict = SeqListToIntDict(possibleSequences);

                // count the occourance of sequences of each stocks
                for (int i = 0; i < eachStock.priceList.Count - numberInSequence; i++)
                {
                    Tick[] ticks = new Tick[numberInSequence];
                    for (int j = 0; j < numberInSequence; j++)
                    {
                        ticks[j] = eachStock.priceList.ElementAt(i + j);
                    }

                    // int count = 0;
                    // string sequence = SequenceString(ticks);
                    // stockSequenceDict.TryGetValue(sequence, out count);
                    // stockSequenceDict.Remove(sequence);
                    // stockSequenceDict.Add(sequence, count + 1);
                    stockSequenceDict[SequenceString(ticks)] += 1;
                }

                Console.WriteLine();

                // output the result to sysout or log
                foreach (KeyValuePair<string, int> eachKYP in stockSequenceDict)
                {
                    if (eachKYP.Value != 0)
                    {
                        Console.WriteLine("SSEC: Stock " + eachStock.stockCode +
                            " has the sequence " + eachKYP.Key + " occurred " + eachKYP.Value + " times");
                    }
                }

                // add the the master table for future similarity calculations
                masterDict1.Add(eachStock, stockSequenceDict);
            }

            // define the clusters with randomly defining a stock into each of them
            Dictionary<Stock, int> masterDict2 = new Dictionary<Stock, int>(); // -- t
            Dictionary<Stock, int> masterDict3 = null; // -- t-1
            Dictionary<Stock, int> masterDict4 = null; // -- t-2
            Random random = new Random();
            int test = -1;

            for (int i = 0; i < numberOfClusters; i++)
            {
                Stock pointer = stockList.ElementAt(random.Next(stockList.Count));
                Console.WriteLine("SSEC: Stock " + pointer.stockCode + " randomly selected for cluster " + i);

                if (masterDict2.TryGetValue(pointer, out test))
                {
                    i--;
                    continue;
                }
                else
                {
                    masterDict2.Add(pointer, i);
                }
            }

            foreach (Stock eachStock in stockList)
            {
                if (!masterDict2.TryGetValue(eachStock, out test))
                {
                    masterDict2.Add(eachStock, -1);
                }
            }

            int minimalDistanceCluster;
            double minimalDistanceClusterDistance;
            double thisRoundDistance;
            bool clusterElementChanged = true;

            // the comparison is performed iteratively until no more element migration between clusters are observed
            for (int iteration = 1; clusterElementChanged; iteration++)
            {
                // initiation - to recalculate the means (centers) of each clusters
                clusterElementChanged = false;
                clustersCenterList = CalculateClusterCenter(possibleSequences, masterDict1, masterDict2, numberOfClusters);

                // perform similarity calculations - for each stocks in the list
                for (int i = 0; i < stockList.Count; i++)
                {
                    minimalDistanceCluster = int.MaxValue;
                    minimalDistanceClusterDistance = double.MaxValue;

                    // compare with the center of each clusters
                    for (int j = 0; j < numberOfClusters; j++)
                    {
                        thisRoundDistance = CalculateDistance(possibleSequences, clustersCenterList[j], masterDict1[stockList[i]]);
                        Console.WriteLine("SSEC: Stock " + stockList[i].stockCode + " vs cluster " + j + ", iteration " + iteration + ", distance " + thisRoundDistance);

                        if (thisRoundDistance < minimalDistanceClusterDistance)
                        {
                            minimalDistanceCluster = j;
                            minimalDistanceClusterDistance = thisRoundDistance;
                        }
                    }

                    // if the result is different from the master table, that means the reuslts has been changed
                    if (masterDict2[stockList[i]] != minimalDistanceCluster)
                    {
                        masterDict2[stockList[i]] = minimalDistanceCluster;
                        clusterElementChanged = true;
                    }
                }

                // empty cluster check
                for (int i = 0; i < numberOfClusters; i++)
                {
                    if (DirtyHelper.CheckWetherClusterHasNoStock(masterDict2, i))
                    {
                        while (true)
                        {
                            int tobe = new Random().Next(stockList.Count);
                            if (!DirtyHelper.CheckWetherThisStockIsALonelyStock(masterDict2, masterDict2[stockList[tobe]]))
                            {
                                masterDict2[stockList[tobe]] = i;
                                break;
                            }
                        }
                    }
                }

                // infinite loop check
                if (CompareDictionary(masterDict2, masterDict3) || CompareDictionary(masterDict2, masterDict4))
                {
                    clusterElementChanged = false;
                    Console.WriteLine("\nSSEC: Infinite loop detected, will not go into next loop.");
                }
                else
                {
                    // if clusters are ok, copy as temp and go to next iteration
                    masterDict4 = masterDict3;
                    masterDict3 = CloneDictionary(masterDict2);
                }

                if (iteration == 50)
                {
                    throw new Exception("more than 100");
                }

                // **********
                // Note: massive debug logging here, consider refactoring
                // **********

                Console.WriteLine("SSEC: Iteration " + iteration + " done");

                if (iteration % 3 == 0 || !clusterElementChanged)
                {
                    Console.WriteLine("\nSSEC: Iteration " + iteration + " result");

                    foreach (KeyValuePair<Stock, int> eachKYP in masterDict2)
                    {
                        Console.Write("SSEC: Stock " + eachKYP.Key.stockCode + ", cluster ");

                        if (masterDict4 != null)
                        {
                            Console.Write(masterDict4[eachKYP.Key] + "->");
                        }

                        if (masterDict3 != null)
                        {
                            Console.Write(masterDict3[eachKYP.Key] + "->");
                        }

                        Console.WriteLine(eachKYP.Value);
                    }

                    for (int i = 0; i < numberOfClusters; i++)
                    {
                        Console.WriteLine("SSEC: Cluster " + i + " has " + NumberofElements(masterDict2, i) + " stocks.");
                    }
                }

                Console.WriteLine();
            }

            // populate a list of clusters in returning format
            List<Cluster> toReturn = new List<Cluster>();

            for (int i = 0; i < numberOfClusters; i++)
            {
                Cluster thisCluster = new Cluster();
                thisCluster.clusterCode = i + 1;
                thisCluster.stockCodeList = new List<int>();
                thisCluster.centroid = new List<Tick>();

                // propulate the stock codes
                foreach (KeyValuePair<Stock, int> eachStockClusterKYP in masterDict2)
                {
                    if (eachStockClusterKYP.Value == i)
                    {
                        thisCluster.stockCodeList.Add(eachStockClusterKYP.Key.stockCode);
                    }
                }

                foreach (KeyValuePair<string, double> seqOccuranceKYP in clustersCenterList[i])
                {
                    thisCluster.centroid.Add(new FakeTick(seqOccuranceKYP.Key, seqOccuranceKYP.Value));
                }

                toReturn.Add(thisCluster);
            }

            return toReturn;
        }