Esempio n. 1
0
        public static void Main(string[] args)
        {
            if (args.Length == 3)
            {
                ROOT_DIR       = args[0];
                NUM_TIMESERIES = int.Parse(args[1]);
                NEWSPLITPOLICY = bool.Parse(args[2]);
            }
            InitalizeGlobalSettings();
            //BaseIndex();

            // Test 1 - Reading from GeneratedRawDataIndex

            /*Index<RawDataFormat> testIndex = Index<RawDataFormat>.Load(Globals.IndexRootDir);
             * Double[] query = Util.RandomWalk(2);
             * IndexEntry result = testIndex.ApproximateSearch(query);
             * List<RawDataFormat> timeSeries = testIndex.ReturnDataFormatFromTermEntry((TermEntry)result);*/


            // Test 2 - Reading from FileRawDataLoader
            //const string DATA_FILE = @"/Users/ppo/Documents/Thesis/test";
            const string DATA_FILE = @"/Users/ppo/Dropbox/Thesis/datasets/SonyAIBORobotSurfaceII/SonyAIBORobotSurfaceII_TRAIN_SHAPELET_32_space";
            Index <RawShapeletFormat> anotherIndex = new Index <RawShapeletFormat>(0, new IndexOptions("root"));
            DataLoader dl = new FromFileRawShapeletLoader(anotherIndex, DATA_FILE, 10000);

            InsertTimeSeries(dl);
            Index <RawShapeletFormat> .Save(Globals.IndexRootDir, anotherIndex);

            Index <RawShapeletFormat> loadBack = Index <RawShapeletFormat> .Load(Globals.IndexRootDir);

            OutputIndex(loadBack, "root");
            using (StreamWriter sw = new StreamWriter(Path.Combine(Globals.IndexRootDir, "indexOutput.csv"))) {
                sw.Write(output);
            }

            /*foreach (IndexEntry i in anotherIndex.GetIndexEntries()) {
             *  if (i is TermEntry) {
             *      ((TermEntry)i).ToString();
             *  } else {
             *      foreach (IndexEntry j in ((SplitEntry<RawShapeletFormat>)i).GetIndexEntries())
             *      {
             *          if (j is TermEntry)
             *          {
             *              ((TermEntry)j).ToString();
             *          } else {
             *              ((SplitEntry<RawShapeletFormat>)j).GetIndexEntries();
             *          }
             *      }
             *  }
             * }*/

            for (int i = 0; i < 100; i++)
            {
                Double[]   query  = Util.RandomWalk(4);
                IndexEntry result = anotherIndex.ApproximateSearch(query);
                List <RawShapeletFormat> timeSeries = anotherIndex.ReturnDataFormatFromTermEntry((TermEntry)result);
            }
            //Console.WriteLine("Press Enter to exit program.");
            //Console.ReadLine();
        }
Esempio n. 2
0
        /// <summary>
        /// Builds an index from the TinyImages dataset
        /// </summary>
        public static void TinyImagesIndex()
        {
            DateTime     startTime = DateTime.Now;
            const string DATA_DIR  = @"D:\jin\TinyImagesBinary";

            Index <Meta1DataFormat> si = new Index <Meta1DataFormat>(0, new IndexOptions("root"));
            DataLoader dl = new TinyImagesDataLoader(si, DATA_DIR);

            InsertTimeSeries(dl);
            Index <Meta1DataFormat> .Save(Globals.IndexRootDir, si);

            Index <Meta1DataFormat> si2 = Index <Meta1DataFormat> .Load(Globals.IndexRootDir);

            DateTime endConstructionTime = DateTime.Now;

            Console.WriteLine("Index Construction Time: {0}", endConstructionTime - startTime);
        }
Esempio n. 3
0
        /// <summary>
        /// Builds an index from the DNA dataset
        /// </summary>
        public static void DnaIndex()
        {
            DateTime startTime = DateTime.Now;

            Util.NormalizationHandler = new Util.Normalize(Util.MeanZero_Normalization);
            const string DATA_FILE = @"M:\Datasets\DNA\Dna2Ts\isax2.0experiment\16.mat.dat";

            Index <Meta2DataFormat> si = new Index <Meta2DataFormat>(0, new IndexOptions("root"));
            DataLoader dl = new DnaDataLoader(si, DATA_FILE);

            InsertTimeSeries(dl);
            Index <Meta2DataFormat> .Save(Globals.IndexRootDir, si);

            Index <Meta2DataFormat> si2 = Index <Meta2DataFormat> .Load(Globals.IndexRootDir);

            DateTime endConstructionTime = DateTime.Now;

            Console.WriteLine("Index Construction Time: {0}", endConstructionTime - startTime);
        }
Esempio n. 4
0
        /// <summary>
        /// Builds an index with randomly generated time series
        /// </summary>
        public static void BaseIndex()
        {
            DateTime startTime = DateTime.Now;
            // index construction
            Index <RawDataFormat> si = new Index <RawDataFormat>(0, new IndexOptions("root"));
            DataLoader            dl = new GeneratedRawDataLoader(si, Globals.TimeSeriesLength, NUM_TIMESERIES, SEED);

            InsertTimeSeries(dl);
            Console.WriteLine();
            Console.WriteLine("Sequential Disk Accesses: " + DiskCost.seqcost);
            Console.WriteLine("Random Disk Accesses: " + DiskCost.rancost);
            Console.WriteLine("Read Disk Accesses: " + DiskCost.readcost);
            Console.WriteLine("Saved cost in buffer: " + DiskCost.savedcost);
            Console.WriteLine();
            Index <RawDataFormat> .Save(Globals.IndexRootDir, si);

            Index <RawDataFormat> si2 = Index <RawDataFormat> .Load(Globals.IndexRootDir);

            DateTime endConstructionTime = DateTime.Now;

            Console.WriteLine("Index Construction Time: {0}", endConstructionTime - startTime);

            // generate some test queries
            const int       NUM_QUERIES = 10;
            List <double[]> queries     = new List <double[]>(NUM_QUERIES);

            for (int i = 0; i < NUM_QUERIES; i++)
            {
                queries.Add(Util.RandomWalk(Globals.TimeSeriesLength));
            }

            // full sequential scan
            Console.WriteLine("Performing full sequential scan.");
            Console.WriteLine("--------------------------------");
            List <IndexFileDist[]> nnInfo = si.KNearestNeighborSequentialScan(10, queries);

            Console.WriteLine();

            // query results
            Console.WriteLine("Performing exact and approximate search.");
            Console.WriteLine("----------------------------------------");
            int counter = 0;

            for (int i = 0; i < NUM_QUERIES; i++)
            {
                IndexFileDist exactResult;
                si.ExactSearch(queries[i], out exactResult);

                IndexFileDist approxResult = Index <RawDataFormat> .MinFileEucDist(queries[i],
                                                                                   si.ApproximateSearch(queries[i]).FileName);

                Assert.IsTrue(exactResult == nnInfo[i][0]);

                if (approxResult == exactResult)
                {
                    counter++;
                    Console.WriteLine(approxResult);
                }
            }
            Console.WriteLine("{0} approximate results == exact results.", counter);
            Console.WriteLine();
        }
Esempio n. 5
0
        public static void SearchQualityExperiment()
        {
            DateTime startTime = DateTime.Now;

            // index construction
            Index <RawDataFormat> si = new Index <RawDataFormat>(0, new IndexOptions("root"));
            DataLoader            dl = new GeneratedRawDataLoader(si, Globals.TimeSeriesLength, NUM_TIMESERIES, SEED);

            InsertTimeSeries(dl);
            Console.WriteLine();
            Console.WriteLine("Sequential Disk Accesses: " + DiskCost.seqcost);
            Console.WriteLine("Random Disk Accesses: " + DiskCost.rancost);
            Console.WriteLine("Read Disk Accesses: " + DiskCost.readcost);
            Console.WriteLine("Saved cost in buffer: " + DiskCost.savedcost);
            Console.WriteLine();
            Index <RawDataFormat> .Save(Globals.IndexRootDir, si);

            Index <RawDataFormat> si2 = Index <RawDataFormat> .Load(Globals.IndexRootDir);

            DateTime endConstructionTime = DateTime.Now;

            Console.WriteLine("Index Construction Time: {0}", endConstructionTime - startTime);

            // avg over queries
            const int       NUM_QUERIES = 100;
            List <double[]> queries     = new List <double[]>(NUM_QUERIES);

            for (int i = 0; i < NUM_QUERIES; i++)
            {
                queries.Add(Util.RandomWalk(Globals.TimeSeriesLength));
            }


            // measured metrics
            double      approxSearchDist     = 0;
            double      approxSearchNodeDist = 0;
            double      approxSearchNodeSize = 0;
            CostCounter exactSearchCosts     = new CostCounter();

            for (int i = 0; i < queries.Count; ++i)
            {
                // exact search
                IndexFileDist eRes;
                exactSearchCosts += si.ExactSearch(queries[i], out eRes);

                // approximate search
                TermEntry approxNode = si.ApproximateSearch(queries[i]);

                double mDist = double.MaxValue;
                List <RawDataFormat> nodeEntries = si.ReturnDataFormatFromTermEntry(approxNode);
                double sumDists = 0;
                foreach (RawDataFormat rd in nodeEntries)
                {
                    double dist = Util.EuclideanDistance(queries[i], rd.GetTimeSeries());
                    sumDists += dist;
                    if (dist < mDist)
                    {
                        mDist = dist;
                    }
                }
                approxSearchDist     += mDist;
                approxSearchNodeDist += sumDists / nodeEntries.Count;
                approxSearchNodeSize += nodeEntries.Count;
            }

            approxSearchDist     /= queries.Count;
            approxSearchNodeDist /= queries.Count;
            approxSearchNodeSize /= queries.Count;
            using (StreamWriter sw = new StreamWriter(Path.Combine(ROOT_DIR, "searchQuality.txt")))
            {
                string baseFormat = string.Format("{0}:NumTs_{1}:Th_{2}:Wl_{3}:NewPolicy", NUM_TIMESERIES, Globals.IndexNumMaxEntries, Globals.TimeSeriesLength, Globals.NewSplitPolicy);
                sw.WriteLine(baseFormat);
                sw.WriteLine("ExactSearchNumIO {0}", exactSearchCosts.IO / (double)queries.Count);
                sw.WriteLine("ExactSearchNumCalcuations {0}", exactSearchCosts.distance / (double)queries.Count);
                sw.WriteLine("ApproxSearchDistance {0}", approxSearchDist);
                sw.WriteLine("ApproxSearchAverageNodeDistance {0}", approxSearchNodeDist);
                sw.WriteLine("ApproxSearchAverageNodeSize {0}", approxSearchNodeSize);

                sw.WriteLine("ValidationString ");
                foreach (double[] query in queries)
                {
                    sw.Write("{0} ", query[1]);
                }
                sw.WriteLine();
            }
        }