Ejemplo n.º 1
0
        public CostCounter ExactSearch(double[] ts, out IndexFileDist bsf)
        {
            CostCounter meas = new CostCounter(0, 0);
            IntervalHeap <IndexEntryDist> pq = new IntervalHeap <IndexEntryDist>(NumIndexEntries);

            // approx search
            TermEntry approx = ApproximateSearch(ts);

            bsf = Index <DATAFORMAT> .MinFileEucDist(ts, approx.FileName);

            meas.IO++;
            meas.distance += approx.NumTimeSeries;

            // initalize pq with IndexEntries at root node
            foreach (IndexEntry e in index.Values)
            {
                pq.Add(new IndexEntryDist(e, Sax.MinDistPAAToiSAX(
                                              Sax.SaxStrToSaxVals(e.SaxWord), options.SaxOpts, ts)));
            }

            while (!pq.IsEmpty)
            {
                IndexEntryDist minInfo  = pq.DeleteMin();
                IndexEntry     minEntry = minInfo.entry;

                if (minInfo.dist >= bsf.distance)
                {
                    break;
                }

                if (minEntry is TermEntry)
                {
                    IndexFileDist posMin = Index <DATAFORMAT> .MinFileEucDist(ts, ((TermEntry)minEntry).FileName);

                    meas.IO++;
                    meas.distance += minEntry.NumTimeSeries;

                    // update bsf
                    if (posMin.distance < bsf.distance)
                    {
                        bsf = posMin;
                    }
                }
                else if (minEntry is SplitEntry <DATAFORMAT> )
                {
                    SplitEntry <DATAFORMAT> sEntry = minEntry as SplitEntry <DATAFORMAT>;
                    foreach (IndexEntry e in sEntry.GetIndexEntries())
                    {
                        pq.Add(new IndexEntryDist(e, Sax.MinDistPAAToiSAX(
                                                      Sax.SaxStrToSaxVals(e.SaxWord), sEntry.Options.SaxOpts, ts)));
                    }
                }
            }
            return(meas);
        }
Ejemplo n.º 2
0
        public List <IndexFileDist[]> KNearestNeighborSequentialScan(int k, List <double[]> tsList)
        {
            CostCounter counter = new CostCounter(0, 0);

            if (k > NumTimeSeries)
            {
                Console.WriteLine("K > number of time series, setting K to number of time series.");
                k = NumTimeSeries;
            }

            List <IntervalHeap <IndexFileDist> > neighbors =
                new List <IntervalHeap <IndexFileDist> >(tsList.Count);

            for (int l = 0; l < tsList.Count; l++)
            {
                neighbors.Add(new IntervalHeap <IndexFileDist>(k + 1));
            }

            Console.Write("Retreiving All Index Files:");
            string[] indexFiles = Directory.GetFiles(Globals.IndexRootDir,
                                                     "*.*.txt", SearchOption.AllDirectories);
            Console.WriteLine(" {0} files.", indexFiles.Length);


            int frac      = indexFiles.Length / 10;
            int srchFiles = 0;
            int srchTs    = 0;
            int pos       = 0;
            int length    = 0;

            byte[]  temp;
            SaxData tmp;

            double[]     data;
            int          line;
            double       dist;
            BinaryReader r;

            foreach (string f in indexFiles)
            {
                // disp update
                if (srchFiles % (frac == 0 ? 1 : frac) == 0)
                {
                    Console.Write("\r{0}", srchFiles);
                }

                srchFiles++;
                counter.IO++;

                using (FileStream sr = new FileStream(f, FileMode.Open, FileAccess.Read))
                {
                    r      = new BinaryReader(sr);
                    pos    = 0;
                    length = (int)r.BaseStream.Length; // get the file lenght
                    line   = 0;
                    while (pos < length)
                    {
                        srchTs++;
                        temp = r.ReadBytes(SaxData.ByteLength(typeof(DATAFORMAT)));
                        tmp  = SaxData.Parse <DATAFORMAT>(temp);
                        data = tmp.dl.GetTimeSeries();

                        for (int query = 0; query < tsList.Count; query++) // compute distance to each query
                        {
                            dist = Util.EuclideanDistance(data, tsList[query]);
                            neighbors[query].Add(new IndexFileDist(f, line + 1, dist));

                            if (neighbors[query].Count > k)  //
                            {
                                neighbors[query].DeleteMax();
                            }
                        }
                        counter.distance += tsList.Count;

                        line++;
                        pos = pos + SaxData.ByteLength(typeof(DATAFORMAT));
                    }
                    r.Close();
                    sr.Close();
                }
            }

            Console.WriteLine();
            Console.WriteLine("{0} files {1} entries searched.", srchFiles, srchTs);

            List <IndexFileDist[]> result = new List <IndexFileDist[]>(tsList.Count);

            for (int l = 0; l < tsList.Count; l++)
            {
                result.Add(new IndexFileDist[k]);
            }

            for (int t = 0; t < tsList.Count; t++)
            {
                for (int i = 0; i < k; i++)
                {
                    result[t][i] = neighbors[t].DeleteMin();
                }
            }

            return(result);
        }
Ejemplo n.º 3
0
        public static void SearchQualityExperiment()
        {
            DateTime startTime = DateTime.Now;

            // index construction
            Index <RawDataFormat> si = new Index <RawDataFormat>(0, new IndexOptions("root"));
            DataLoader            dl = new GeneratedRawDataLoader(si, Globals.TimeSeriesLength, NUM_TIMESERIES, SEED);

            InsertTimeSeries(dl);
            Console.WriteLine();
            Console.WriteLine("Sequential Disk Accesses: " + DiskCost.seqcost);
            Console.WriteLine("Random Disk Accesses: " + DiskCost.rancost);
            Console.WriteLine("Read Disk Accesses: " + DiskCost.readcost);
            Console.WriteLine("Saved cost in buffer: " + DiskCost.savedcost);
            Console.WriteLine();
            Index <RawDataFormat> .Save(Globals.IndexRootDir, si);

            Index <RawDataFormat> si2 = Index <RawDataFormat> .Load(Globals.IndexRootDir);

            DateTime endConstructionTime = DateTime.Now;

            Console.WriteLine("Index Construction Time: {0}", endConstructionTime - startTime);

            // avg over queries
            const int       NUM_QUERIES = 100;
            List <double[]> queries     = new List <double[]>(NUM_QUERIES);

            for (int i = 0; i < NUM_QUERIES; i++)
            {
                queries.Add(Util.RandomWalk(Globals.TimeSeriesLength));
            }


            // measured metrics
            double      approxSearchDist     = 0;
            double      approxSearchNodeDist = 0;
            double      approxSearchNodeSize = 0;
            CostCounter exactSearchCosts     = new CostCounter();

            for (int i = 0; i < queries.Count; ++i)
            {
                // exact search
                IndexFileDist eRes;
                exactSearchCosts += si.ExactSearch(queries[i], out eRes);

                // approximate search
                TermEntry approxNode = si.ApproximateSearch(queries[i]);

                double mDist = double.MaxValue;
                List <RawDataFormat> nodeEntries = si.ReturnDataFormatFromTermEntry(approxNode);
                double sumDists = 0;
                foreach (RawDataFormat rd in nodeEntries)
                {
                    double dist = Util.EuclideanDistance(queries[i], rd.GetTimeSeries());
                    sumDists += dist;
                    if (dist < mDist)
                    {
                        mDist = dist;
                    }
                }
                approxSearchDist     += mDist;
                approxSearchNodeDist += sumDists / nodeEntries.Count;
                approxSearchNodeSize += nodeEntries.Count;
            }

            approxSearchDist     /= queries.Count;
            approxSearchNodeDist /= queries.Count;
            approxSearchNodeSize /= queries.Count;
            using (StreamWriter sw = new StreamWriter(Path.Combine(ROOT_DIR, "searchQuality.txt")))
            {
                string baseFormat = string.Format("{0}:NumTs_{1}:Th_{2}:Wl_{3}:NewPolicy", NUM_TIMESERIES, Globals.IndexNumMaxEntries, Globals.TimeSeriesLength, Globals.NewSplitPolicy);
                sw.WriteLine(baseFormat);
                sw.WriteLine("ExactSearchNumIO {0}", exactSearchCosts.IO / (double)queries.Count);
                sw.WriteLine("ExactSearchNumCalcuations {0}", exactSearchCosts.distance / (double)queries.Count);
                sw.WriteLine("ApproxSearchDistance {0}", approxSearchDist);
                sw.WriteLine("ApproxSearchAverageNodeDistance {0}", approxSearchNodeDist);
                sw.WriteLine("ApproxSearchAverageNodeSize {0}", approxSearchNodeSize);

                sw.WriteLine("ValidationString ");
                foreach (double[] query in queries)
                {
                    sw.Write("{0} ", query[1]);
                }
                sw.WriteLine();
            }
        }