Пример #1
0
        public List <DATAFORMAT> ReturnDataFormatFromTermEntry(TermEntry e)
        {
            if (e.NBuf != -1 || !e.OnDisk)
            {
                throw new ApplicationException("e.NBuf!= -1 || ~e.OnDisk");
            }

            List <DATAFORMAT> tmp = new List <DATAFORMAT>();

            using (BinaryReader br = new BinaryReader(new FileStream(e.FileName, FileMode.Open, FileAccess.Read)))
            {
                long length      = br.BaseStream.Length;
                int  bytesToRead = SaxData.ByteLength(typeof(DATAFORMAT));
                if (Math.IEEERemainder(length, bytesToRead) != 0)
                {
                    throw new ApplicationException("Math.IEEERemainder(br.BaseStream.Length, bytesToRead) != 0");
                }

                int    pos = 0;
                byte[] temp;
                while (pos < length)
                {
                    temp = br.ReadBytes(bytesToRead);
                    if (temp.Length != bytesToRead)
                    {
                        throw new ApplicationException("temp.Length != bytesToRead");
                    }

                    tmp.Add((DATAFORMAT)SaxData.Parse <DATAFORMAT>(temp).dl);
                    pos += bytesToRead;
                }
            }
            return(tmp);
        }
Пример #2
0
        public static void OutputIndex(Index <RawShapeletFormat> index, String parent)
        {
            int splitDepth = index.SplitDepth;

            foreach (IndexEntry i in index.GetIndexEntries())
            {
                if (i is TermEntry)
                {
                    TermEntry term          = ((TermEntry)i);
                    String    saxWord       = term.SaxWord;
                    String    iSaxWord      = term.iSaxWord;
                    String    fileName      = term.FileName;
                    int       numNodes      = term.NumNodes;
                    int       numTimeSeries = term.NumTimeSeries;
                    System.Console.WriteLine("SAX word: {0}, iSax word: {1}, file name: {2}, parent: {3}, split depth: {4}, num nodes: {5}, num TS: {6}", saxWord, iSaxWord, fileName, parent, splitDepth, numNodes, numTimeSeries);
                    String[] line = { saxWord, iSaxWord, "TermEntry", "", fileName, parent, splitDepth.ToString(), "", numNodes.ToString(), numTimeSeries.ToString() };
                    output = output + String.Join(";", line) + "\n";
                }
                else
                {
                    SplitEntry <RawShapeletFormat> split = (SplitEntry <RawShapeletFormat>)i;
                    String saxWord       = split.SaxWord;
                    String iSaxWord      = split.iSaxWord;
                    String baseDir       = split.Options.BaseDir;
                    String maskValue     = split.Options.maskValue();
                    int    numNodes      = split.NumNodes;
                    int    numTimeSeries = split.NumTimeSeries;
                    System.Console.WriteLine("SAX word: {0}, iSax word: {1}, dir: {2}, mask: {3}, split depth: {4}, num nodes: {5}, num TS: {6}", saxWord, iSaxWord, baseDir, maskValue, splitDepth, numNodes, numTimeSeries);
                    String[] line = { saxWord, iSaxWord, "SplitEntry", baseDir, "", parent, splitDepth.ToString(), maskValue, numNodes.ToString(), numTimeSeries.ToString() };
                    output = output + String.Join(";", line) + "\n";
                    Index <RawShapeletFormat> splitIndex = split.GetIndex();
                    OutputIndex(splitIndex, saxWord);
                }
            }
        }
Пример #3
0
        public static void TinyImagesExperiment()
        {
            Index <Meta1DataFormat> si = Index <Meta1DataFormat> .Load(Globals.IndexRootDir);

            string          queryFile = @"F:\Exp\TinyImages_256Len_8Word_2KThreshold\_queries\queries.txt";
            List <double[]> queries   = Util.ReadFiletoDoubleList(queryFile, false);

            for (int i = 0; i < queries.Count; ++i)
            {
                queries[i] = Util.NormalizationHandler(Util.DownSample(queries[i], TinyImagesDataLoader.DOWNSAMPLERATE));
                if (queries[i].Length != Globals.TimeSeriesLength)
                {
                    throw new ApplicationException("queries[i].Length != Globals.TimeSeriesLength");
                }

                TermEntry res = si.ApproximateSearch(queries[i]);
                Console.WriteLine("Query:{0} FileName:{1}", i, res.FileName);

                List <Meta1DataFormat> metas = si.ReturnDataFormatFromTermEntry(res);
                double          bsf          = Double.MaxValue;
                Meta1DataFormat bsfMeta      = new Meta1DataFormat();
                foreach (Meta1DataFormat m in metas)
                {
                    double dist = Util.EuclideanDistance(m.GetTimeSeries(), queries[i]);
                    if (dist < bsf)
                    {
                        bsf     = dist;
                        bsfMeta = m;
                    }
                }
                Console.WriteLine("BsfDist:{0} LocMeta:{1}", bsf, bsfMeta.meta);
            }
        }
Пример #4
0
 public TermEntry(string saxWord, string fileName)
     : this(fileName) // debug
 {
     if (saxWord != TermEntry.FileNameParseSaxStr(fileName))
     {
         throw new ApplicationException("TermEntry inconsistency saxWord does not match fileName.");
     }
 }
Пример #5
0
        public static int CreateNewBuffer(SaxData entry, TermEntry node)
        {
            Buf B = new Buf();

            B.Initialization();
            B.setnode(node);
            B.InsertInBuffer(entry);   // Insert the Time series in the new Buffer
            TBuffer.Add(B);
            return(TBuffer.Count - 1);
        }
Пример #6
0
        public CostCounter ExactSearch(double[] ts, out IndexFileDist bsf)
        {
            CostCounter meas = new CostCounter(0, 0);
            IntervalHeap <IndexEntryDist> pq = new IntervalHeap <IndexEntryDist>(NumIndexEntries);

            // approx search
            TermEntry approx = ApproximateSearch(ts);

            bsf = Index <DATAFORMAT> .MinFileEucDist(ts, approx.FileName);

            meas.IO++;
            meas.distance += approx.NumTimeSeries;

            // initalize pq with IndexEntries at root node
            foreach (IndexEntry e in index.Values)
            {
                pq.Add(new IndexEntryDist(e, Sax.MinDistPAAToiSAX(
                                              Sax.SaxStrToSaxVals(e.SaxWord), options.SaxOpts, ts)));
            }

            while (!pq.IsEmpty)
            {
                IndexEntryDist minInfo  = pq.DeleteMin();
                IndexEntry     minEntry = minInfo.entry;

                if (minInfo.dist >= bsf.distance)
                {
                    break;
                }

                if (minEntry is TermEntry)
                {
                    IndexFileDist posMin = Index <DATAFORMAT> .MinFileEucDist(ts, ((TermEntry)minEntry).FileName);

                    meas.IO++;
                    meas.distance += minEntry.NumTimeSeries;

                    // update bsf
                    if (posMin.distance < bsf.distance)
                    {
                        bsf = posMin;
                    }
                }
                else if (minEntry is SplitEntry <DATAFORMAT> )
                {
                    SplitEntry <DATAFORMAT> sEntry = minEntry as SplitEntry <DATAFORMAT>;
                    foreach (IndexEntry e in sEntry.GetIndexEntries())
                    {
                        pq.Add(new IndexEntryDist(e, Sax.MinDistPAAToiSAX(
                                                      Sax.SaxStrToSaxVals(e.SaxWord), sEntry.Options.SaxOpts, ts)));
                    }
                }
            }
            return(meas);
        }
Пример #7
0
        private bool SplitEntry(TermEntry entry)
        {
            int numEntries = ((TermEntry)entry).NumTimeSeries + 1;

            if (numEntries > Globals.IndexNumMaxEntries)
            {
                return(true);
            }
            else
            {
                return(false);
            }
        }
Пример #8
0
        public static void InsectExperiment()
        {
            Index <Meta3DataFormat> si = Index <Meta3DataFormat> .Load(Globals.IndexRootDir);

            string queryFile = @"C:\Temp\insect\queries.txt";

            List <double[]> queries = Util.ReadFiletoDoubleList(queryFile, true);

            using (StreamWriter sw = new StreamWriter(@"C:\Temp\insect\output.txt"))
            {
                for (int i = 0; i < queries.Count; ++i)
                {
                    if (queries[i].Length != Globals.TimeSeriesLength)
                    {
                        throw new ApplicationException("queries[i].Length != Globals.TimeSeriesLength");
                    }

                    TermEntry res = si.ApproximateSearch(queries[i]);
                    Console.WriteLine("Query:{0} FileName:{1}", i, res.FileName);

                    List <Meta3DataFormat> metas = si.ReturnDataFormatFromTermEntry(res);
                    double          bsf          = Double.MaxValue;
                    Meta3DataFormat bsfMeta      = new Meta3DataFormat();
                    foreach (Meta3DataFormat m in metas)
                    {
                        double dist = Util.EuclideanDistance(m.GetTimeSeries(), queries[i]);
                        if (dist < bsf)
                        {
                            bsf     = dist;
                            bsfMeta = m;
                        }
                    }
                    Console.WriteLine("BsfDist:{0} Meta1:{1} Meta2:{2}", bsf, bsfMeta.meta1, bsfMeta.meta2);
                    sw.WriteLine(Util.ArrayToString(queries[i]));
                    sw.WriteLine(Util.ArrayToString(bsfMeta.GetTimeSeries()));
                }
            }
        }
Пример #9
0
        public void Insert(SaxData input)
        {
            string saxString = Sax.SaxDataRepToSaxStr(input, options.SaxOpts);

            if (splitDepth == 0 && flush == false)
            {
                if (!buffer.ContainsKey(saxString))
                {
                    buffer.Add(saxString, new List <SaxData>());
                }
                buffer[saxString].Add(input);
            }
            else
            {
                if (index.ContainsKey(saxString))
                {
                    IndexEntry entry = index[saxString];
                    if (entry is TermEntry)// if terminal, then search path terminates here
                    {
                        TermEntry tentry      = (TermEntry)entry;
                        string    oldFileName = tentry.FileName;
                        if (SplitEntry(tentry) == false) // check bucket requires a split
                        {
                            tentry.InsertToBuffer(input);
                        }
                        else
                        {
                            List <SaxData> B = tentry.getbuffer();
                            if (B == null)
                            {
                                B = new List <SaxData>();
                            }
                            DiskCost.increasesavedcost(B.Count);

                            ushort[] newMask      = this.options.MaskCopy;
                            ushort[] newSaxString = Sax.SaxStrToSaxVals(saxString);
                            string   newName      = "";
                            for (int i = 0; i < newMask.Length; i++)
                            {
                                newName = newName + newSaxString[i].ToString() + "." + newMask[i].ToString() + "_";
                            }
                            newName = newName.Substring(0, newName.Length - 1);

                            string[] files = Directory.GetFiles(WorkingFolder,
                                                                string.Concat(newName, "*.txt"));

                            //string[] files = Directory.GetFiles(tentry.FileName);
                            if (tentry.OnDisk == true)
                            {
                                Assert.AreEqual(files.Length, 1);
                            }
                            else
                            {
                                Assert.AreEqual(files.Length, 0);
                            }

                            byte[] temp;
                            int    pos         = 0;
                            long   length      = -1;
                            int    bytesToRead = SaxData.ByteLength(typeof(DATAFORMAT));
                            foreach (string f in files)
                            {
                                using (BinaryReader br = new BinaryReader(new FileStream(f, FileMode.Open, FileAccess.Read)))
                                {
                                    length = br.BaseStream.Length;
                                    if (length != 0)
                                    {
                                        DiskCost.increaserandomcost();
                                    }
                                    if (Math.IEEERemainder(length, bytesToRead) != 0)
                                    {
                                        throw new ApplicationException("Math.IEEERemainder(br.BaseStream.Length, bytesToRead) != 0");
                                    }
                                    while (pos < length)
                                    {
                                        temp = br.ReadBytes(bytesToRead);
                                        if (temp.Length != bytesToRead)
                                        {
                                            throw new ApplicationException("temp.Length != bytesToRead");
                                        }

                                        B.Add(SaxData.Parse <DATAFORMAT>(temp));
                                        DiskCost.increasereadcost();
                                        pos += bytesToRead;
                                    }
                                }
                                File.Delete(f);
                            }
                            SplitEntry <DATAFORMAT> newSplit;
                            if (Globals.NewSplitPolicy)
                            {
                                newSplit = new SplitEntry <DATAFORMAT>(saxString, UpdateOptions(B), (byte)(1 + splitDepth));
                            }
                            else
                            {
                                newSplit = new SplitEntry <DATAFORMAT>(saxString, UpdateOptions(null), (byte)(1 + splitDepth));
                            }
                            newSplit.Insert(input);
                            foreach (SaxData S in B)
                            {
                                newSplit.Insert(S);
                            }
                            // update index entry from terminal to split
                            index[saxString] = newSplit;
                        }
                    }
                    else if (entry is SplitEntry <DATAFORMAT> )    // internal node
                    {
                        ((SplitEntry <DATAFORMAT>)entry).Insert(input);
                    }
                }
                else // saxString has not been seen before, create new file and entry
                {
                    ushort[] newMask      = this.options.MaskCopy;
                    ushort[] newSaxString = Sax.SaxStrToSaxVals(saxString);
                    string   newName      = "";
                    for (int i = 0; i < newMask.Length; i++)
                    {
                        newName = newName + newSaxString[i].ToString() + "." + newMask[i].ToString() + "_";
                    }
                    newName = newName.Substring(0, newName.Length - 1);

                    string    newfile  = Path.Combine(WorkingFolder, string.Concat(newName, ".0.txt"));
                    TermEntry newEntry = new TermEntry(saxString, newfile);
                    newEntry.InsertToBuffer(input);
                    index.Add(saxString, newEntry);
                }
            }
        }
Пример #10
0
 public void setnode(TermEntry node)
 {
     Node = node;
 }
Пример #11
0
        public List <SaxData> BL;          // List of TS in the buffer

        public void Initialization()
        {
            BL          = new List <SaxData>();
            Utilization = 0;
            Node        = null;
        }
Пример #12
0
        public static void SearchQualityExperiment()
        {
            DateTime startTime = DateTime.Now;

            // index construction
            Index <RawDataFormat> si = new Index <RawDataFormat>(0, new IndexOptions("root"));
            DataLoader            dl = new GeneratedRawDataLoader(si, Globals.TimeSeriesLength, NUM_TIMESERIES, SEED);

            InsertTimeSeries(dl);
            Console.WriteLine();
            Console.WriteLine("Sequential Disk Accesses: " + DiskCost.seqcost);
            Console.WriteLine("Random Disk Accesses: " + DiskCost.rancost);
            Console.WriteLine("Read Disk Accesses: " + DiskCost.readcost);
            Console.WriteLine("Saved cost in buffer: " + DiskCost.savedcost);
            Console.WriteLine();
            Index <RawDataFormat> .Save(Globals.IndexRootDir, si);

            Index <RawDataFormat> si2 = Index <RawDataFormat> .Load(Globals.IndexRootDir);

            DateTime endConstructionTime = DateTime.Now;

            Console.WriteLine("Index Construction Time: {0}", endConstructionTime - startTime);

            // avg over queries
            const int       NUM_QUERIES = 100;
            List <double[]> queries     = new List <double[]>(NUM_QUERIES);

            for (int i = 0; i < NUM_QUERIES; i++)
            {
                queries.Add(Util.RandomWalk(Globals.TimeSeriesLength));
            }


            // measured metrics
            double      approxSearchDist     = 0;
            double      approxSearchNodeDist = 0;
            double      approxSearchNodeSize = 0;
            CostCounter exactSearchCosts     = new CostCounter();

            for (int i = 0; i < queries.Count; ++i)
            {
                // exact search
                IndexFileDist eRes;
                exactSearchCosts += si.ExactSearch(queries[i], out eRes);

                // approximate search
                TermEntry approxNode = si.ApproximateSearch(queries[i]);

                double mDist = double.MaxValue;
                List <RawDataFormat> nodeEntries = si.ReturnDataFormatFromTermEntry(approxNode);
                double sumDists = 0;
                foreach (RawDataFormat rd in nodeEntries)
                {
                    double dist = Util.EuclideanDistance(queries[i], rd.GetTimeSeries());
                    sumDists += dist;
                    if (dist < mDist)
                    {
                        mDist = dist;
                    }
                }
                approxSearchDist     += mDist;
                approxSearchNodeDist += sumDists / nodeEntries.Count;
                approxSearchNodeSize += nodeEntries.Count;
            }

            approxSearchDist     /= queries.Count;
            approxSearchNodeDist /= queries.Count;
            approxSearchNodeSize /= queries.Count;
            using (StreamWriter sw = new StreamWriter(Path.Combine(ROOT_DIR, "searchQuality.txt")))
            {
                string baseFormat = string.Format("{0}:NumTs_{1}:Th_{2}:Wl_{3}:NewPolicy", NUM_TIMESERIES, Globals.IndexNumMaxEntries, Globals.TimeSeriesLength, Globals.NewSplitPolicy);
                sw.WriteLine(baseFormat);
                sw.WriteLine("ExactSearchNumIO {0}", exactSearchCosts.IO / (double)queries.Count);
                sw.WriteLine("ExactSearchNumCalcuations {0}", exactSearchCosts.distance / (double)queries.Count);
                sw.WriteLine("ApproxSearchDistance {0}", approxSearchDist);
                sw.WriteLine("ApproxSearchAverageNodeDistance {0}", approxSearchNodeDist);
                sw.WriteLine("ApproxSearchAverageNodeSize {0}", approxSearchNodeSize);

                sw.WriteLine("ValidationString ");
                foreach (double[] query in queries)
                {
                    sw.Write("{0} ", query[1]);
                }
                sw.WriteLine();
            }
        }
Пример #13
0
        public static void DnaExperiment()
        {
            Util.NormalizationHandler = new Util.Normalize(Util.MeanZero_Normalization);
            // in-memory data, referenced by the index
            const string DATAFOLDER = @"K:\Datasets\DNA\Dna2Ts\Monkey_Binary";

            // load index
            Index <Meta2DataFormat> si = Index <Meta2DataFormat> .Load(Globals.IndexRootDir);

            // populate in-memory data
            DnaDataLoader.LoadDnaToMetaBuffer(DATAFOLDER);

            // generate queries
            DateTime queryStart = DateTime.Now;
            int      numQueries = 0;

            string[] humanChrs = Directory.GetFiles(@"K:\Datasets\DNA\Dna2Ts\Human_Binary", "*.dat");
            Array.Sort(humanChrs, new NaturalStringComparer());
            Dictionary <string, DnaChrResult> queryResult = new Dictionary <string, DnaChrResult>(humanChrs.Length);

            for (int chrNo = 0; chrNo < humanChrs.Length; ++chrNo)
            {
                string chrFile = humanChrs[chrNo];
                GC.Collect();
                using (BinaryReader br = new BinaryReader(new FileStream(chrFile, FileMode.Open, FileAccess.Read)))
                {
                    List <DnaSearchResult> qResults = new List <DnaSearchResult>();
                    // List<Meta2DataFormat> _queryApproxRes = new List<Meta2DataFormat>();
                    // List<double> _dists = new List<double>();
                    // List<int> _queryPos = new List<int>();

                    long fileLength = br.BaseStream.Length / sizeof(int);
                    int  posShift   = Globals.TimeSeriesLength / 4; // shift by quarters

                    double[] dnaChr = new double[(int)Math.Floor((fileLength / sizeof(int)) / (double)DnaDataLoader.SAMPLERATE)];
                    Console.WriteLine("F:{0} OrigLen:{1} newLen:{2} Shift:{3}", chrFile, fileLength, dnaChr.Length, posShift);

                    // downsample
                    int    count = 0;
                    double sum   = 0;
                    for (int i = 0; i < dnaChr.Length; ++i)
                    {
                        sum   = 0;
                        count = 0;
                        while (count < DnaDataLoader.SAMPLERATE)
                        {
                            sum += br.ReadInt32();
                            count++;
                        }
                        dnaChr[i] = sum / DnaDataLoader.SAMPLERATE;
                    }

                    double[] ts = new double[Globals.TimeSeriesLength];
                    for (int pos = 0; pos < dnaChr.Length - Globals.TimeSeriesLength; pos += posShift)
                    {
                        numQueries += 2;
                        Array.Copy(dnaChr, pos, ts, 0, Globals.TimeSeriesLength);
                        double mean = Util.Mean(ts, 0, ts.Length - 1);
                        for (int k = 0; k < ts.Length; ++k)
                        {
                            ts[k] = ts[k] - mean;
                        }

                        TermEntry tEntry = si.ApproximateSearch(ts);
                        List <Meta2DataFormat> termNodeEntries = si.ReturnDataFormatFromTermEntry(tEntry);

                        double          bsfDist = Double.MaxValue;
                        Meta2DataFormat bsfMeta = new Meta2DataFormat();
                        foreach (Meta2DataFormat m in termNodeEntries)
                        {
                            double dist = Util.EuclideanDistance(Util.NormalizationHandler(m.GetTimeSeries()), ts);
                            if (dist < bsfDist)
                            {
                                bsfDist = dist;
                                bsfMeta = m;
                            }
                        }

                        qResults.Add(new DnaSearchResult()
                        {
                            dist        = bsfDist,
                            matchingChr = bsfMeta._chrNo,
                            matchingPos = bsfMeta._pos,
                            queryChr    = chrNo,
                            queryPos    = pos,
                        });


                        // reverse
                        ts              = ts.Reverse().ToArray();
                        tEntry          = si.ApproximateSearch(ts);
                        termNodeEntries = si.ReturnDataFormatFromTermEntry(tEntry);
                        bsfDist         = Double.MaxValue;
                        bsfMeta         = new Meta2DataFormat();
                        foreach (Meta2DataFormat m in termNodeEntries)
                        {
                            double dist = Util.EuclideanDistance(Util.NormalizationHandler(m.GetTimeSeries()), ts);
                            if (dist < bsfDist)
                            {
                                bsfDist = dist;
                                bsfMeta = m;
                            }
                        }

                        qResults.Add(new DnaSearchResult()
                        {
                            dist        = bsfDist,
                            matchingChr = bsfMeta._chrNo,
                            matchingPos = bsfMeta._pos,
                            queryChr    = chrNo,
                            queryPos    = pos,
                        });
                    }
                    queryResult.Add(chrFile, new DnaChrResult()
                    {
                        results = qResults
                    });
                }
            }
            DateTime queryStop = DateTime.Now;

            Console.WriteLine("{0} Queries, {1} TimeElapsed.", numQueries, queryStop - queryStart);
            //// print results
            using (StreamWriter sw = new StreamWriter(Path.Combine(Globals.IndexRootDir, "queryOutput.txt")))
            {
                foreach (KeyValuePair <string, DnaChrResult> kvp in queryResult)
                {
                    //    Console.WriteLine("HumanChromosome:{0}", kvp.Key);
                    //    Console.WriteLine("AverageDistance:{0}", kvp.Value.AverageDistance);
                    //    Console.WriteLine();
                    foreach (DnaSearchResult sr in kvp.Value.results)
                    {
                        sw.WriteLine(sr.ToString());
                    }
                }
            }

            //using (StreamWriter sw = new StreamWriter(Path.Combine(Globals.IndexRootDir, "queryOutputTop.txt")))
            //{
            //    foreach (KeyValuePair<string, DnaChrResult> kvp in queryResult)
            //    {
            //        //    Console.WriteLine("HumanChromosome:{0}", kvp.Key);
            //        //    Console.WriteLine("AverageDistance:{0}", kvp.Value.AverageDistance);
            //        //    Console.WriteLine();
            //        List<DnaSearchResult>sr  = kvp.Value.results;
            //        sr.Sort();
            //        sr = sr.GetRange(0, 10);

            //        Console.WriteLine("For Human Chr:{0}", kvp.Key);
            //        var counts = from q in sr
            //                     group q by q.matchingChr into g
            //                     select new { Chr = g.Key, NumHits = g.Count() };
            //        foreach (var v in counts)
            //            Console.WriteLine("{0} : {1}", v.Chr, v.NumHits);

            //    }
            //    //    //{
            //    //    //    for (int i = 0; i < kvp.Value.queryTs.Count; ++i)
            //    //    //    {
            //    //    //        sw.WriteLine(Util.ArrayToString(kvp.Value.queryTs[i]));
            //    //    //        sw.WriteLine(Util.ArrayToString(Util.NormalizationHandler(kvp.Value.queryApproxRes[i].GetTimeSeries())));
            //    //    //    }
            //    //    //    //foreach (double[] d in kvp.Value.queryTs)
            //    //    //    //    sw.WriteLine(Util.ArrayToString(d));
            //}
        }