Esempio n. 1
0
        public static void Main(string[] args)
        {
            if (args.Length == 3)
            {
                ROOT_DIR       = args[0];
                NUM_TIMESERIES = int.Parse(args[1]);
                NEWSPLITPOLICY = bool.Parse(args[2]);
            }
            InitalizeGlobalSettings();
            //BaseIndex();

            // Test 1 - Reading from GeneratedRawDataIndex

            /*Index<RawDataFormat> testIndex = Index<RawDataFormat>.Load(Globals.IndexRootDir);
             * Double[] query = Util.RandomWalk(2);
             * IndexEntry result = testIndex.ApproximateSearch(query);
             * List<RawDataFormat> timeSeries = testIndex.ReturnDataFormatFromTermEntry((TermEntry)result);*/


            // Test 2 - Reading from FileRawDataLoader
            //const string DATA_FILE = @"/Users/ppo/Documents/Thesis/test";
            const string DATA_FILE = @"/Users/ppo/Dropbox/Thesis/datasets/SonyAIBORobotSurfaceII/SonyAIBORobotSurfaceII_TRAIN_SHAPELET_32_space";
            Index <RawShapeletFormat> anotherIndex = new Index <RawShapeletFormat>(0, new IndexOptions("root"));
            DataLoader dl = new FromFileRawShapeletLoader(anotherIndex, DATA_FILE, 10000);

            InsertTimeSeries(dl);
            Index <RawShapeletFormat> .Save(Globals.IndexRootDir, anotherIndex);

            Index <RawShapeletFormat> loadBack = Index <RawShapeletFormat> .Load(Globals.IndexRootDir);

            OutputIndex(loadBack, "root");
            using (StreamWriter sw = new StreamWriter(Path.Combine(Globals.IndexRootDir, "indexOutput.csv"))) {
                sw.Write(output);
            }

            /*foreach (IndexEntry i in anotherIndex.GetIndexEntries()) {
             *  if (i is TermEntry) {
             *      ((TermEntry)i).ToString();
             *  } else {
             *      foreach (IndexEntry j in ((SplitEntry<RawShapeletFormat>)i).GetIndexEntries())
             *      {
             *          if (j is TermEntry)
             *          {
             *              ((TermEntry)j).ToString();
             *          } else {
             *              ((SplitEntry<RawShapeletFormat>)j).GetIndexEntries();
             *          }
             *      }
             *  }
             * }*/

            for (int i = 0; i < 100; i++)
            {
                Double[]   query  = Util.RandomWalk(4);
                IndexEntry result = anotherIndex.ApproximateSearch(query);
                List <RawShapeletFormat> timeSeries = anotherIndex.ReturnDataFormatFromTermEntry((TermEntry)result);
            }
            //Console.WriteLine("Press Enter to exit program.");
            //Console.ReadLine();
        }
Esempio n. 2
0
        public static void TinyImagesExperiment()
        {
            Index <Meta1DataFormat> si = Index <Meta1DataFormat> .Load(Globals.IndexRootDir);

            string          queryFile = @"F:\Exp\TinyImages_256Len_8Word_2KThreshold\_queries\queries.txt";
            List <double[]> queries   = Util.ReadFiletoDoubleList(queryFile, false);

            for (int i = 0; i < queries.Count; ++i)
            {
                queries[i] = Util.NormalizationHandler(Util.DownSample(queries[i], TinyImagesDataLoader.DOWNSAMPLERATE));
                if (queries[i].Length != Globals.TimeSeriesLength)
                {
                    throw new ApplicationException("queries[i].Length != Globals.TimeSeriesLength");
                }

                TermEntry res = si.ApproximateSearch(queries[i]);
                Console.WriteLine("Query:{0} FileName:{1}", i, res.FileName);

                List <Meta1DataFormat> metas = si.ReturnDataFormatFromTermEntry(res);
                double          bsf          = Double.MaxValue;
                Meta1DataFormat bsfMeta      = new Meta1DataFormat();
                foreach (Meta1DataFormat m in metas)
                {
                    double dist = Util.EuclideanDistance(m.GetTimeSeries(), queries[i]);
                    if (dist < bsf)
                    {
                        bsf     = dist;
                        bsfMeta = m;
                    }
                }
                Console.WriteLine("BsfDist:{0} LocMeta:{1}", bsf, bsfMeta.meta);
            }
        }
Esempio n. 3
0
        public static void InsectExperiment()
        {
            Index <Meta3DataFormat> si = Index <Meta3DataFormat> .Load(Globals.IndexRootDir);

            string queryFile = @"C:\Temp\insect\queries.txt";

            List <double[]> queries = Util.ReadFiletoDoubleList(queryFile, true);

            using (StreamWriter sw = new StreamWriter(@"C:\Temp\insect\output.txt"))
            {
                for (int i = 0; i < queries.Count; ++i)
                {
                    if (queries[i].Length != Globals.TimeSeriesLength)
                    {
                        throw new ApplicationException("queries[i].Length != Globals.TimeSeriesLength");
                    }

                    TermEntry res = si.ApproximateSearch(queries[i]);
                    Console.WriteLine("Query:{0} FileName:{1}", i, res.FileName);

                    List <Meta3DataFormat> metas = si.ReturnDataFormatFromTermEntry(res);
                    double          bsf          = Double.MaxValue;
                    Meta3DataFormat bsfMeta      = new Meta3DataFormat();
                    foreach (Meta3DataFormat m in metas)
                    {
                        double dist = Util.EuclideanDistance(m.GetTimeSeries(), queries[i]);
                        if (dist < bsf)
                        {
                            bsf     = dist;
                            bsfMeta = m;
                        }
                    }
                    Console.WriteLine("BsfDist:{0} Meta1:{1} Meta2:{2}", bsf, bsfMeta.meta1, bsfMeta.meta2);
                    sw.WriteLine(Util.ArrayToString(queries[i]));
                    sw.WriteLine(Util.ArrayToString(bsfMeta.GetTimeSeries()));
                }
            }
        }
Esempio n. 4
0
        /// <summary>
        /// Builds an index with randomly generated time series
        /// </summary>
        public static void BaseIndex()
        {
            DateTime startTime = DateTime.Now;
            // index construction
            Index <RawDataFormat> si = new Index <RawDataFormat>(0, new IndexOptions("root"));
            DataLoader            dl = new GeneratedRawDataLoader(si, Globals.TimeSeriesLength, NUM_TIMESERIES, SEED);

            InsertTimeSeries(dl);
            Console.WriteLine();
            Console.WriteLine("Sequential Disk Accesses: " + DiskCost.seqcost);
            Console.WriteLine("Random Disk Accesses: " + DiskCost.rancost);
            Console.WriteLine("Read Disk Accesses: " + DiskCost.readcost);
            Console.WriteLine("Saved cost in buffer: " + DiskCost.savedcost);
            Console.WriteLine();
            Index <RawDataFormat> .Save(Globals.IndexRootDir, si);

            Index <RawDataFormat> si2 = Index <RawDataFormat> .Load(Globals.IndexRootDir);

            DateTime endConstructionTime = DateTime.Now;

            Console.WriteLine("Index Construction Time: {0}", endConstructionTime - startTime);

            // generate some test queries
            const int       NUM_QUERIES = 10;
            List <double[]> queries     = new List <double[]>(NUM_QUERIES);

            for (int i = 0; i < NUM_QUERIES; i++)
            {
                queries.Add(Util.RandomWalk(Globals.TimeSeriesLength));
            }

            // full sequential scan
            Console.WriteLine("Performing full sequential scan.");
            Console.WriteLine("--------------------------------");
            List <IndexFileDist[]> nnInfo = si.KNearestNeighborSequentialScan(10, queries);

            Console.WriteLine();

            // query results
            Console.WriteLine("Performing exact and approximate search.");
            Console.WriteLine("----------------------------------------");
            int counter = 0;

            for (int i = 0; i < NUM_QUERIES; i++)
            {
                IndexFileDist exactResult;
                si.ExactSearch(queries[i], out exactResult);

                IndexFileDist approxResult = Index <RawDataFormat> .MinFileEucDist(queries[i],
                                                                                   si.ApproximateSearch(queries[i]).FileName);

                Assert.IsTrue(exactResult == nnInfo[i][0]);

                if (approxResult == exactResult)
                {
                    counter++;
                    Console.WriteLine(approxResult);
                }
            }
            Console.WriteLine("{0} approximate results == exact results.", counter);
            Console.WriteLine();
        }
Esempio n. 5
0
        public static void SearchQualityExperiment()
        {
            DateTime startTime = DateTime.Now;

            // index construction
            Index <RawDataFormat> si = new Index <RawDataFormat>(0, new IndexOptions("root"));
            DataLoader            dl = new GeneratedRawDataLoader(si, Globals.TimeSeriesLength, NUM_TIMESERIES, SEED);

            InsertTimeSeries(dl);
            Console.WriteLine();
            Console.WriteLine("Sequential Disk Accesses: " + DiskCost.seqcost);
            Console.WriteLine("Random Disk Accesses: " + DiskCost.rancost);
            Console.WriteLine("Read Disk Accesses: " + DiskCost.readcost);
            Console.WriteLine("Saved cost in buffer: " + DiskCost.savedcost);
            Console.WriteLine();
            Index <RawDataFormat> .Save(Globals.IndexRootDir, si);

            Index <RawDataFormat> si2 = Index <RawDataFormat> .Load(Globals.IndexRootDir);

            DateTime endConstructionTime = DateTime.Now;

            Console.WriteLine("Index Construction Time: {0}", endConstructionTime - startTime);

            // avg over queries
            const int       NUM_QUERIES = 100;
            List <double[]> queries     = new List <double[]>(NUM_QUERIES);

            for (int i = 0; i < NUM_QUERIES; i++)
            {
                queries.Add(Util.RandomWalk(Globals.TimeSeriesLength));
            }


            // measured metrics
            double      approxSearchDist     = 0;
            double      approxSearchNodeDist = 0;
            double      approxSearchNodeSize = 0;
            CostCounter exactSearchCosts     = new CostCounter();

            for (int i = 0; i < queries.Count; ++i)
            {
                // exact search
                IndexFileDist eRes;
                exactSearchCosts += si.ExactSearch(queries[i], out eRes);

                // approximate search
                TermEntry approxNode = si.ApproximateSearch(queries[i]);

                double mDist = double.MaxValue;
                List <RawDataFormat> nodeEntries = si.ReturnDataFormatFromTermEntry(approxNode);
                double sumDists = 0;
                foreach (RawDataFormat rd in nodeEntries)
                {
                    double dist = Util.EuclideanDistance(queries[i], rd.GetTimeSeries());
                    sumDists += dist;
                    if (dist < mDist)
                    {
                        mDist = dist;
                    }
                }
                approxSearchDist     += mDist;
                approxSearchNodeDist += sumDists / nodeEntries.Count;
                approxSearchNodeSize += nodeEntries.Count;
            }

            approxSearchDist     /= queries.Count;
            approxSearchNodeDist /= queries.Count;
            approxSearchNodeSize /= queries.Count;
            using (StreamWriter sw = new StreamWriter(Path.Combine(ROOT_DIR, "searchQuality.txt")))
            {
                string baseFormat = string.Format("{0}:NumTs_{1}:Th_{2}:Wl_{3}:NewPolicy", NUM_TIMESERIES, Globals.IndexNumMaxEntries, Globals.TimeSeriesLength, Globals.NewSplitPolicy);
                sw.WriteLine(baseFormat);
                sw.WriteLine("ExactSearchNumIO {0}", exactSearchCosts.IO / (double)queries.Count);
                sw.WriteLine("ExactSearchNumCalcuations {0}", exactSearchCosts.distance / (double)queries.Count);
                sw.WriteLine("ApproxSearchDistance {0}", approxSearchDist);
                sw.WriteLine("ApproxSearchAverageNodeDistance {0}", approxSearchNodeDist);
                sw.WriteLine("ApproxSearchAverageNodeSize {0}", approxSearchNodeSize);

                sw.WriteLine("ValidationString ");
                foreach (double[] query in queries)
                {
                    sw.Write("{0} ", query[1]);
                }
                sw.WriteLine();
            }
        }
Esempio n. 6
0
        public static void DnaExperiment()
        {
            Util.NormalizationHandler = new Util.Normalize(Util.MeanZero_Normalization);
            // in-memory data, referenced by the index
            const string DATAFOLDER = @"K:\Datasets\DNA\Dna2Ts\Monkey_Binary";

            // load index
            Index <Meta2DataFormat> si = Index <Meta2DataFormat> .Load(Globals.IndexRootDir);

            // populate in-memory data
            DnaDataLoader.LoadDnaToMetaBuffer(DATAFOLDER);

            // generate queries
            DateTime queryStart = DateTime.Now;
            int      numQueries = 0;

            string[] humanChrs = Directory.GetFiles(@"K:\Datasets\DNA\Dna2Ts\Human_Binary", "*.dat");
            Array.Sort(humanChrs, new NaturalStringComparer());
            Dictionary <string, DnaChrResult> queryResult = new Dictionary <string, DnaChrResult>(humanChrs.Length);

            for (int chrNo = 0; chrNo < humanChrs.Length; ++chrNo)
            {
                string chrFile = humanChrs[chrNo];
                GC.Collect();
                using (BinaryReader br = new BinaryReader(new FileStream(chrFile, FileMode.Open, FileAccess.Read)))
                {
                    List <DnaSearchResult> qResults = new List <DnaSearchResult>();
                    // List<Meta2DataFormat> _queryApproxRes = new List<Meta2DataFormat>();
                    // List<double> _dists = new List<double>();
                    // List<int> _queryPos = new List<int>();

                    long fileLength = br.BaseStream.Length / sizeof(int);
                    int  posShift   = Globals.TimeSeriesLength / 4; // shift by quarters

                    double[] dnaChr = new double[(int)Math.Floor((fileLength / sizeof(int)) / (double)DnaDataLoader.SAMPLERATE)];
                    Console.WriteLine("F:{0} OrigLen:{1} newLen:{2} Shift:{3}", chrFile, fileLength, dnaChr.Length, posShift);

                    // downsample
                    int    count = 0;
                    double sum   = 0;
                    for (int i = 0; i < dnaChr.Length; ++i)
                    {
                        sum   = 0;
                        count = 0;
                        while (count < DnaDataLoader.SAMPLERATE)
                        {
                            sum += br.ReadInt32();
                            count++;
                        }
                        dnaChr[i] = sum / DnaDataLoader.SAMPLERATE;
                    }

                    double[] ts = new double[Globals.TimeSeriesLength];
                    for (int pos = 0; pos < dnaChr.Length - Globals.TimeSeriesLength; pos += posShift)
                    {
                        numQueries += 2;
                        Array.Copy(dnaChr, pos, ts, 0, Globals.TimeSeriesLength);
                        double mean = Util.Mean(ts, 0, ts.Length - 1);
                        for (int k = 0; k < ts.Length; ++k)
                        {
                            ts[k] = ts[k] - mean;
                        }

                        TermEntry tEntry = si.ApproximateSearch(ts);
                        List <Meta2DataFormat> termNodeEntries = si.ReturnDataFormatFromTermEntry(tEntry);

                        double          bsfDist = Double.MaxValue;
                        Meta2DataFormat bsfMeta = new Meta2DataFormat();
                        foreach (Meta2DataFormat m in termNodeEntries)
                        {
                            double dist = Util.EuclideanDistance(Util.NormalizationHandler(m.GetTimeSeries()), ts);
                            if (dist < bsfDist)
                            {
                                bsfDist = dist;
                                bsfMeta = m;
                            }
                        }

                        qResults.Add(new DnaSearchResult()
                        {
                            dist        = bsfDist,
                            matchingChr = bsfMeta._chrNo,
                            matchingPos = bsfMeta._pos,
                            queryChr    = chrNo,
                            queryPos    = pos,
                        });


                        // reverse
                        ts              = ts.Reverse().ToArray();
                        tEntry          = si.ApproximateSearch(ts);
                        termNodeEntries = si.ReturnDataFormatFromTermEntry(tEntry);
                        bsfDist         = Double.MaxValue;
                        bsfMeta         = new Meta2DataFormat();
                        foreach (Meta2DataFormat m in termNodeEntries)
                        {
                            double dist = Util.EuclideanDistance(Util.NormalizationHandler(m.GetTimeSeries()), ts);
                            if (dist < bsfDist)
                            {
                                bsfDist = dist;
                                bsfMeta = m;
                            }
                        }

                        qResults.Add(new DnaSearchResult()
                        {
                            dist        = bsfDist,
                            matchingChr = bsfMeta._chrNo,
                            matchingPos = bsfMeta._pos,
                            queryChr    = chrNo,
                            queryPos    = pos,
                        });
                    }
                    queryResult.Add(chrFile, new DnaChrResult()
                    {
                        results = qResults
                    });
                }
            }
            DateTime queryStop = DateTime.Now;

            Console.WriteLine("{0} Queries, {1} TimeElapsed.", numQueries, queryStop - queryStart);
            //// print results
            using (StreamWriter sw = new StreamWriter(Path.Combine(Globals.IndexRootDir, "queryOutput.txt")))
            {
                foreach (KeyValuePair <string, DnaChrResult> kvp in queryResult)
                {
                    //    Console.WriteLine("HumanChromosome:{0}", kvp.Key);
                    //    Console.WriteLine("AverageDistance:{0}", kvp.Value.AverageDistance);
                    //    Console.WriteLine();
                    foreach (DnaSearchResult sr in kvp.Value.results)
                    {
                        sw.WriteLine(sr.ToString());
                    }
                }
            }

            //using (StreamWriter sw = new StreamWriter(Path.Combine(Globals.IndexRootDir, "queryOutputTop.txt")))
            //{
            //    foreach (KeyValuePair<string, DnaChrResult> kvp in queryResult)
            //    {
            //        //    Console.WriteLine("HumanChromosome:{0}", kvp.Key);
            //        //    Console.WriteLine("AverageDistance:{0}", kvp.Value.AverageDistance);
            //        //    Console.WriteLine();
            //        List<DnaSearchResult>sr  = kvp.Value.results;
            //        sr.Sort();
            //        sr = sr.GetRange(0, 10);

            //        Console.WriteLine("For Human Chr:{0}", kvp.Key);
            //        var counts = from q in sr
            //                     group q by q.matchingChr into g
            //                     select new { Chr = g.Key, NumHits = g.Count() };
            //        foreach (var v in counts)
            //            Console.WriteLine("{0} : {1}", v.Chr, v.NumHits);

            //    }
            //    //    //{
            //    //    //    for (int i = 0; i < kvp.Value.queryTs.Count; ++i)
            //    //    //    {
            //    //    //        sw.WriteLine(Util.ArrayToString(kvp.Value.queryTs[i]));
            //    //    //        sw.WriteLine(Util.ArrayToString(Util.NormalizationHandler(kvp.Value.queryApproxRes[i].GetTimeSeries())));
            //    //    //    }
            //    //    //    //foreach (double[] d in kvp.Value.queryTs)
            //    //    //    //    sw.WriteLine(Util.ArrayToString(d));
            //}
        }