Example #1
0
        private TermEntry MismatchHandler(SaxData dr)
        {
            if (NumIndexEntries > 1)
            {
                string saxString = Sax.SaxDataRepToSaxStr(dr, options.SaxOpts);

                // find last promoted pos
                int pos = 0;
                ReadOnlyCollection <ushort> mask = options.Mask;
                for (int i = 0; i < mask.Count; i++)
                {
                    if (mask[pos] <= mask[i])
                    {
                        pos = i;
                    }
                }

                // search for match
                foreach (string entrySaxString in index.Keys)
                {
                    if (Sax.SaxStrToSaxVals(entrySaxString)[pos] ==
                        Sax.SaxStrToSaxVals(saxString)[pos])
                    {
                        return(SearchHandler(index[entrySaxString], dr));
                    }
                }
            }

            // if no match
            return(SearchHandler(ReturnFirstIndexEntry(), dr));
        }
        public override void LoadIndex()
        {
            ushort     maskval = (ushort)(Math.Log(Globals.SaxMaxCard, 2) - Math.Log(Globals.SaxBaseCard, 2));
            SaxOptions opts    = new SaxOptions(Util.UnsignedShortArray(Globals.SaxWordLength, maskval));

            double[]    tmp = new double[ORIGINAL_LENGTH];
            double[]    ts;
            IDataFormat dl;

            int numFiles = Directory.GetFiles(_dataDir, "*.dat").Length;

            if (numFiles != NUMFILES)
            {
                throw new ApplicationException("numFiles != NUMFILES");
            }

            for (int i = 1; i <= numFiles; ++i)
            {
                string file = Path.Combine(_dataDir, string.Format("i{0}.dat", i));
                if (!File.Exists(file))
                {
                    throw new ApplicationException("!File.Exists(file)");
                }

                using (BinaryReader br = new BinaryReader(new FileStream(file, FileMode.Open, FileAccess.Read)))
                {
                    if (br.BaseStream.Length != EXPECTEDBYTES)
                    {
                        throw new ApplicationException("br.BaseStream.Length != ORIGINAL_LENGTH * NUMTSPERFILE * sizeof(double)");
                    }

                    int bytesRead = br.Read(imageBuffer, 0, EXPECTEDBYTES);
                    if (bytesRead != EXPECTEDBYTES)
                    {
                        throw new ApplicationException("EXPECTEDBYTES");
                    }

                    int pos = 0;
                    while (pos < EXPECTEDBYTES)
                    {
                        Buffer.BlockCopy(imageBuffer, pos, tsBuffer, 0, ORIGINAL_LENGTH * sizeof(double));
                        ts = Util.NormalizationHandler(Util.DownSample(Util.ByteArrayToDoubleArray(tsBuffer), DOWNSAMPLERATE));

                        dl = new Meta1DataFormat(processed, ts);
                        _si.Insert(new SaxData(dl, Sax.ArrayToSaxVals(ts, opts)));
                        processed++;

                        if (processed % Globals.FlushTsVal == 0)
                        {
                            _si.FlushEntries();
                        }

                        pos += ORIGINAL_LENGTH * sizeof(double);
                    }
                    Console.WriteLine("{0} read. TsNum:{1}", Path.GetFileName(file), processed);
                }
            }
            _si.FlushEntries();
        }
Example #3
0
        public TermEntry ApproximateSearch(double[] ts)
        {
            ushort     maskval = (ushort)(Math.Log(Globals.SaxMaxCard, 2) - Math.Log(Globals.SaxBaseCard, 2));
            SaxOptions opts    = new SaxOptions(Util.UnsignedShortArray(Globals.SaxWordLength, maskval));
            SaxData    dr      = new SaxData(Sax.ArrayToSaxVals(ts, opts));

            return(ApproximateSearch(dr));
        }
        public override void LoadIndex()
        {
            ushort     maskval = (ushort)(Math.Log(Globals.SaxMaxCard, 2) - Math.Log(Globals.SaxBaseCard, 2));
            SaxOptions opts    = new SaxOptions(Util.UnsignedShortArray(Globals.SaxWordLength, maskval));

            this.sr = new StreamReader(this.dataFile);

            while (!(this.allRead && this.buffer.Count == 0))
            {
                if (this.buffer.Count == 0)
                {
                    Console.WriteLine(this.processed);
                    string line;
                    while ((line = this.sr.ReadLine()) != null)
                    {
                        double[] line_data = Util.StringToArray(line);
                        //double ts_class = line_data.First();
                        double[] ts = Util.NormalizationHandler(line_data.Skip(1).ToArray());
                        //double[] together = new double[ts.Length + 1];
                        //together[0] = ts_class;
                        ts.CopyTo(line_data, 1);
                        if (!this.tsLength.HasValue)
                        {
                            this.tsLength = (uint)ts.Length;
                        }
                        else
                        if (this.tsLength.Value != ts.Length)
                        {
                            throw new ApplicationException("Inconsistent length when reading from file.");
                        }

                        this.buffer.Enqueue(line_data);
                        if (this.buffer.Count == this.bufferSize)
                        {
                            break;
                        }
                    }
                    if (line == null)
                    {
                        this.allRead = true;
                    }
                }
                else
                {
                    double[]    tmp         = this.buffer.Dequeue();
                    double[]    ts          = tmp.Skip(1).ToArray();
                    double      shapelet_ts = tmp.First();
                    IDataFormat dl          = new RawShapeletFormat(ts, shapelet_ts);
                    this.si.Insert(new SaxData(dl, Sax.ArrayToSaxVals(ts, opts)));
                    this.processed++;
                }
            }

            this.sr.Close();
            //this.si.ForceFlushBuffers();
            this.si.FlushEntries();
            Console.WriteLine("Total: {0}", this.processed);
        }
Example #5
0
        public CostCounter ExactSearch(double[] ts, out IndexFileDist bsf)
        {
            CostCounter meas = new CostCounter(0, 0);
            IntervalHeap <IndexEntryDist> pq = new IntervalHeap <IndexEntryDist>(NumIndexEntries);

            // approx search
            TermEntry approx = ApproximateSearch(ts);

            bsf = Index <DATAFORMAT> .MinFileEucDist(ts, approx.FileName);

            meas.IO++;
            meas.distance += approx.NumTimeSeries;

            // initalize pq with IndexEntries at root node
            foreach (IndexEntry e in index.Values)
            {
                pq.Add(new IndexEntryDist(e, Sax.MinDistPAAToiSAX(
                                              Sax.SaxStrToSaxVals(e.SaxWord), options.SaxOpts, ts)));
            }

            while (!pq.IsEmpty)
            {
                IndexEntryDist minInfo  = pq.DeleteMin();
                IndexEntry     minEntry = minInfo.entry;

                if (minInfo.dist >= bsf.distance)
                {
                    break;
                }

                if (minEntry is TermEntry)
                {
                    IndexFileDist posMin = Index <DATAFORMAT> .MinFileEucDist(ts, ((TermEntry)minEntry).FileName);

                    meas.IO++;
                    meas.distance += minEntry.NumTimeSeries;

                    // update bsf
                    if (posMin.distance < bsf.distance)
                    {
                        bsf = posMin;
                    }
                }
                else if (minEntry is SplitEntry <DATAFORMAT> )
                {
                    SplitEntry <DATAFORMAT> sEntry = minEntry as SplitEntry <DATAFORMAT>;
                    foreach (IndexEntry e in sEntry.GetIndexEntries())
                    {
                        pq.Add(new IndexEntryDist(e, Sax.MinDistPAAToiSAX(
                                                      Sax.SaxStrToSaxVals(e.SaxWord), sEntry.Options.SaxOpts, ts)));
                    }
                }
            }
            return(meas);
        }
Example #6
0
        public TermEntry ApproximateSearch(SaxData dr)
        {
            string saxString = Sax.SaxDataRepToSaxStr(dr, options.SaxOpts);

            if (index.ContainsKey(saxString))
            {
                return(SearchHandler(index[saxString], dr));
            }
            else
            {
                return(MismatchHandler(dr));
            }
        }
        public override void LoadIndex()
        {
            Util.SeedGenerator(seed);
            ushort     maskval = (ushort)(Math.Log(Globals.SaxMaxCard, 2) - Math.Log(Globals.SaxBaseCard, 2));
            SaxOptions opts    = new SaxOptions(Util.UnsignedShortArray(Globals.SaxWordLength, maskval));

            double[]    ts;
            IDataFormat dl;

            while (this.processed < this.numTs)
            {
                ts = Util.RandomWalk(this.tsLength);
                dl = new RawDataFormat(ts);
                this.si.Insert(new SaxData(dl, Sax.ArrayToSaxVals(ts, opts)));  // Continuesly insertion on the first level of buffers ( with no threshold )
                this.processed++;
                Console.Write("\r{0}", this.processed);
                if (this.processed % Globals.FlushTsVal == 0)       // When reachs the value flush on disk
                {
                    this.si.FlushEntries();
                }
            }
            this.si.FlushEntries();
            Console.WriteLine();
        }
Example #8
0
        public void Insert(SaxData input)
        {
            string saxString = Sax.SaxDataRepToSaxStr(input, options.SaxOpts);

            if (splitDepth == 0 && flush == false)
            {
                if (!buffer.ContainsKey(saxString))
                {
                    buffer.Add(saxString, new List <SaxData>());
                }
                buffer[saxString].Add(input);
            }
            else
            {
                if (index.ContainsKey(saxString))
                {
                    IndexEntry entry = index[saxString];
                    if (entry is TermEntry)// if terminal, then search path terminates here
                    {
                        TermEntry tentry      = (TermEntry)entry;
                        string    oldFileName = tentry.FileName;
                        if (SplitEntry(tentry) == false) // check bucket requires a split
                        {
                            tentry.InsertToBuffer(input);
                        }
                        else
                        {
                            List <SaxData> B = tentry.getbuffer();
                            if (B == null)
                            {
                                B = new List <SaxData>();
                            }
                            DiskCost.increasesavedcost(B.Count);

                            ushort[] newMask      = this.options.MaskCopy;
                            ushort[] newSaxString = Sax.SaxStrToSaxVals(saxString);
                            string   newName      = "";
                            for (int i = 0; i < newMask.Length; i++)
                            {
                                newName = newName + newSaxString[i].ToString() + "." + newMask[i].ToString() + "_";
                            }
                            newName = newName.Substring(0, newName.Length - 1);

                            string[] files = Directory.GetFiles(WorkingFolder,
                                                                string.Concat(newName, "*.txt"));

                            //string[] files = Directory.GetFiles(tentry.FileName);
                            if (tentry.OnDisk == true)
                            {
                                Assert.AreEqual(files.Length, 1);
                            }
                            else
                            {
                                Assert.AreEqual(files.Length, 0);
                            }

                            byte[] temp;
                            int    pos         = 0;
                            long   length      = -1;
                            int    bytesToRead = SaxData.ByteLength(typeof(DATAFORMAT));
                            foreach (string f in files)
                            {
                                using (BinaryReader br = new BinaryReader(new FileStream(f, FileMode.Open, FileAccess.Read)))
                                {
                                    length = br.BaseStream.Length;
                                    if (length != 0)
                                    {
                                        DiskCost.increaserandomcost();
                                    }
                                    if (Math.IEEERemainder(length, bytesToRead) != 0)
                                    {
                                        throw new ApplicationException("Math.IEEERemainder(br.BaseStream.Length, bytesToRead) != 0");
                                    }
                                    while (pos < length)
                                    {
                                        temp = br.ReadBytes(bytesToRead);
                                        if (temp.Length != bytesToRead)
                                        {
                                            throw new ApplicationException("temp.Length != bytesToRead");
                                        }

                                        B.Add(SaxData.Parse <DATAFORMAT>(temp));
                                        DiskCost.increasereadcost();
                                        pos += bytesToRead;
                                    }
                                }
                                File.Delete(f);
                            }
                            SplitEntry <DATAFORMAT> newSplit;
                            if (Globals.NewSplitPolicy)
                            {
                                newSplit = new SplitEntry <DATAFORMAT>(saxString, UpdateOptions(B), (byte)(1 + splitDepth));
                            }
                            else
                            {
                                newSplit = new SplitEntry <DATAFORMAT>(saxString, UpdateOptions(null), (byte)(1 + splitDepth));
                            }
                            newSplit.Insert(input);
                            foreach (SaxData S in B)
                            {
                                newSplit.Insert(S);
                            }
                            // update index entry from terminal to split
                            index[saxString] = newSplit;
                        }
                    }
                    else if (entry is SplitEntry <DATAFORMAT> )    // internal node
                    {
                        ((SplitEntry <DATAFORMAT>)entry).Insert(input);
                    }
                }
                else // saxString has not been seen before, create new file and entry
                {
                    ushort[] newMask      = this.options.MaskCopy;
                    ushort[] newSaxString = Sax.SaxStrToSaxVals(saxString);
                    string   newName      = "";
                    for (int i = 0; i < newMask.Length; i++)
                    {
                        newName = newName + newSaxString[i].ToString() + "." + newMask[i].ToString() + "_";
                    }
                    newName = newName.Substring(0, newName.Length - 1);

                    string    newfile  = Path.Combine(WorkingFolder, string.Concat(newName, ".0.txt"));
                    TermEntry newEntry = new TermEntry(saxString, newfile);
                    newEntry.InsertToBuffer(input);
                    index.Add(saxString, newEntry);
                }
            }
        }
        public override void LoadIndex()
        {
            ushort     maskval = (ushort)(Math.Log(Globals.SaxMaxCard, 2) - Math.Log(Globals.SaxBaseCard, 2));
            SaxOptions opts    = new SaxOptions(Util.UnsignedShortArray(Globals.SaxWordLength, maskval));

            double[]    dataBuffer;
            IDataFormat dl;

            double[] ts;
            int      discarded = 0;

            string[] files = Directory.GetFiles(_dataDir, "*.dat");
            Array.Sort(files, new NaturalStringComparer());

            if (files.Length != NUMFILES)
            {
                throw new ApplicationException("numFiles != NUMFILES");
            }

            StringBuilder sb = new StringBuilder();

            sb.AppendLine("File => Number Mapping");

            for (int i = 0; i < NUMFILES; ++i)
            {
                string file = files[i];
                sb.AppendFormat("{0} => {1}", Path.GetFileNameWithoutExtension(file), i);
                Console.WriteLine("Processed:{2} Discarded:{0} AtFile:{1}", discarded, file, processed);

                if (!File.Exists(file))
                {
                    throw new ApplicationException("!File.Exists(file)");
                }

                // read data file into memory
                using (BinaryReader br = new BinaryReader(new FileStream(file, FileMode.Open, FileAccess.Read, FileShare.None, 100000000)))
                {
                    if (Math.IEEERemainder(br.BaseStream.Length, sizeof(double)) != 0)
                    {
                        throw new ApplicationException("Math.IEEERemainder( br.BaseStream.Length, sizeof(double)) != 0");
                    }

                    dataBuffer = new double[br.BaseStream.Length / sizeof(double)];
                    int offset = 0;

                    for (int pos = 0; pos < br.BaseStream.Length; pos += sizeof(double))
                    {
                        dataBuffer[offset++] = br.ReadDouble();
                    }
                }

                // sliding window and extract time series subsequences

                for (int pos = 0; pos < dataBuffer.Length - Globals.TimeSeriesLength; ++pos)
                {
                    ts = new double[Globals.TimeSeriesLength];
                    Array.Copy(dataBuffer, pos, ts, 0, Globals.TimeSeriesLength);

                    // filter
                    double std = Util.StdDev(ts);
                    if (std <= FILTERVAL)
                    {
                        discarded += (int)Math.Ceiling(Globals.TimeSeriesLength / 2.0) + 1;
                        pos       += (int)Math.Ceiling(Globals.TimeSeriesLength / 2.0);
                        continue;
                    }
                    else
                    {
                        // normalize
                        double mean = Util.Mean(ts, 0, ts.Length - 1);
                        for (int j = 0; j < ts.Length; ++j)
                        {
                            ts[j] = (ts[j] - mean) / std;
                        }

                        dl = new Meta3DataFormat(i, pos, ts);
                        _si.Insert(new SaxData(dl, Sax.ArrayToSaxVals(ts, opts)));
                        processed++;

                        if (processed % Globals.FlushTsVal == 0)
                        {
                            _si.FlushEntries();
                        }
                    }
                }

                GC.Collect();
            }
            _si.FlushEntries();

            Console.WriteLine();
            Console.WriteLine(sb.ToString());
            Console.WriteLine();
            Console.WriteLine("Processed:{0} {1}", processed, _si.NumTimeSeries);
            Console.WriteLine("Discarded:{0}", discarded);
            Console.WriteLine();
        }
        public override void LoadIndex()
        {
            ushort     maskval = (ushort)(Math.Log(Globals.SaxMaxCard, 2) - Math.Log(Globals.SaxBaseCard, 2));
            SaxOptions opts    = new SaxOptions(Util.UnsignedShortArray(Globals.SaxWordLength, maskval));

            double[]    ts;
            IDataFormat dl;

            // load dna into memory
            LoadDnaToMetaBuffer(_dataFolder);

            // iterate through each chr and insert
            double mean       = 0;
            int    signChange = 0;
            double delta      = 0;
            double lastVal    = 0;

            for (int chrNo = 0; chrNo < Meta2DataFormat.dnaBuffer.Count; ++chrNo)
            {
                //    Console.WriteLine("ChrNo:{0} Processed:{1} Discarded:{2} IndexDiscarded:{3}", chrNo, processed, discarded, Index<Meta2DataFormat>.discarded);
                //    if (_si.NumTimeSeries != processed - discarded - Index<Meta2DataFormat>.discarded)
                //        throw new ApplicationException();
                for (int pos = 0; pos <= Meta2DataFormat.dnaBuffer[chrNo].Length - Globals.TimeSeriesLength; pos += SHIFT)
                {
                    dl = new Meta2DataFormat(chrNo, pos);
                    ts = dl.GetTimeSeries();

                    // normalize
                    mean       = Util.Mean(ts, 0, ts.Length - 1);
                    signChange = 0;
                    lastVal    = ts[1] - ts[0];
                    for (int k = 2; k < ts.Length; ++k)
                    {
                        delta = ts[k] - ts[k - 1];
                        if (Math.Sign(lastVal) != Math.Sign(delta))
                        {
                            signChange++;
                        }
                        lastVal = delta;
                    }

                    for (int k = 0; k < ts.Length; ++k)
                    {
                        ts[k] = ts[k] - mean;
                    }

                    // filter
                    if (signChange > NUMSIGNCHANGE)
                    {
                        _si.Insert(new SaxData(dl, Sax.ArrayToSaxVals(ts, opts)));
                        processed++;

                        if (processed % Globals.FlushTsVal == 0)
                        {
                            _si.FlushEntries();
                        }
                    }
                    else
                    {
                        discarded++;
                    }
                }
                GC.Collect();
            }
            // Console.WriteLine("Processed:{0} Discarded:{1} IndexDiscarded:{2}", processed, discarded, Index<Meta2DataFormat>.discarded);
            _si.FlushEntries();
        }