예제 #1
0
        public override void LoadIndex()
        {
            ushort     maskval = (ushort)(Math.Log(Globals.SaxMaxCard, 2) - Math.Log(Globals.SaxBaseCard, 2));
            SaxOptions opts    = new SaxOptions(Util.UnsignedShortArray(Globals.SaxWordLength, maskval));

            double[]    tmp = new double[ORIGINAL_LENGTH];
            double[]    ts;
            IDataFormat dl;

            int numFiles = Directory.GetFiles(_dataDir, "*.dat").Length;

            if (numFiles != NUMFILES)
            {
                throw new ApplicationException("numFiles != NUMFILES");
            }

            for (int i = 1; i <= numFiles; ++i)
            {
                string file = Path.Combine(_dataDir, string.Format("i{0}.dat", i));
                if (!File.Exists(file))
                {
                    throw new ApplicationException("!File.Exists(file)");
                }

                using (BinaryReader br = new BinaryReader(new FileStream(file, FileMode.Open, FileAccess.Read)))
                {
                    if (br.BaseStream.Length != EXPECTEDBYTES)
                    {
                        throw new ApplicationException("br.BaseStream.Length != ORIGINAL_LENGTH * NUMTSPERFILE * sizeof(double)");
                    }

                    int bytesRead = br.Read(imageBuffer, 0, EXPECTEDBYTES);
                    if (bytesRead != EXPECTEDBYTES)
                    {
                        throw new ApplicationException("EXPECTEDBYTES");
                    }

                    int pos = 0;
                    while (pos < EXPECTEDBYTES)
                    {
                        Buffer.BlockCopy(imageBuffer, pos, tsBuffer, 0, ORIGINAL_LENGTH * sizeof(double));
                        ts = Util.NormalizationHandler(Util.DownSample(Util.ByteArrayToDoubleArray(tsBuffer), DOWNSAMPLERATE));

                        dl = new Meta1DataFormat(processed, ts);
                        _si.Insert(new SaxData(dl, Sax.ArrayToSaxVals(ts, opts)));
                        processed++;

                        if (processed % Globals.FlushTsVal == 0)
                        {
                            _si.FlushEntries();
                        }

                        pos += ORIGINAL_LENGTH * sizeof(double);
                    }
                    Console.WriteLine("{0} read. TsNum:{1}", Path.GetFileName(file), processed);
                }
            }
            _si.FlushEntries();
        }
예제 #2
0
        public TermEntry ApproximateSearch(double[] ts)
        {
            ushort     maskval = (ushort)(Math.Log(Globals.SaxMaxCard, 2) - Math.Log(Globals.SaxBaseCard, 2));
            SaxOptions opts    = new SaxOptions(Util.UnsignedShortArray(Globals.SaxWordLength, maskval));
            SaxData    dr      = new SaxData(Sax.ArrayToSaxVals(ts, opts));

            return(ApproximateSearch(dr));
        }
        public override void LoadIndex()
        {
            ushort     maskval = (ushort)(Math.Log(Globals.SaxMaxCard, 2) - Math.Log(Globals.SaxBaseCard, 2));
            SaxOptions opts    = new SaxOptions(Util.UnsignedShortArray(Globals.SaxWordLength, maskval));

            this.sr = new StreamReader(this.dataFile);

            while (!(this.allRead && this.buffer.Count == 0))
            {
                if (this.buffer.Count == 0)
                {
                    Console.WriteLine(this.processed);
                    string line;
                    while ((line = this.sr.ReadLine()) != null)
                    {
                        double[] line_data = Util.StringToArray(line);
                        //double ts_class = line_data.First();
                        double[] ts = Util.NormalizationHandler(line_data.Skip(1).ToArray());
                        //double[] together = new double[ts.Length + 1];
                        //together[0] = ts_class;
                        ts.CopyTo(line_data, 1);
                        if (!this.tsLength.HasValue)
                        {
                            this.tsLength = (uint)ts.Length;
                        }
                        else
                        if (this.tsLength.Value != ts.Length)
                        {
                            throw new ApplicationException("Inconsistent length when reading from file.");
                        }

                        this.buffer.Enqueue(line_data);
                        if (this.buffer.Count == this.bufferSize)
                        {
                            break;
                        }
                    }
                    if (line == null)
                    {
                        this.allRead = true;
                    }
                }
                else
                {
                    double[]    tmp         = this.buffer.Dequeue();
                    double[]    ts          = tmp.Skip(1).ToArray();
                    double      shapelet_ts = tmp.First();
                    IDataFormat dl          = new RawShapeletFormat(ts, shapelet_ts);
                    this.si.Insert(new SaxData(dl, Sax.ArrayToSaxVals(ts, opts)));
                    this.processed++;
                }
            }

            this.sr.Close();
            //this.si.ForceFlushBuffers();
            this.si.FlushEntries();
            Console.WriteLine("Total: {0}", this.processed);
        }
예제 #4
0
        public override void LoadIndex()
        {
            Util.SeedGenerator(seed);
            ushort     maskval = (ushort)(Math.Log(Globals.SaxMaxCard, 2) - Math.Log(Globals.SaxBaseCard, 2));
            SaxOptions opts    = new SaxOptions(Util.UnsignedShortArray(Globals.SaxWordLength, maskval));

            double[]    ts;
            IDataFormat dl;

            while (this.processed < this.numTs)
            {
                ts = Util.RandomWalk(this.tsLength);
                dl = new RawDataFormat(ts);
                this.si.Insert(new SaxData(dl, Sax.ArrayToSaxVals(ts, opts)));  // Continuesly insertion on the first level of buffers ( with no threshold )
                this.processed++;
                Console.Write("\r{0}", this.processed);
                if (this.processed % Globals.FlushTsVal == 0)       // When reachs the value flush on disk
                {
                    this.si.FlushEntries();
                }
            }
            this.si.FlushEntries();
            Console.WriteLine();
        }
예제 #5
0
 public IndexOptions(SaxOptions saxOpts, string baseFolder)
 {
     this.baseDir = baseFolder;
     this.saxOpts = saxOpts;
 }
예제 #6
0
        public override void LoadIndex()
        {
            ushort     maskval = (ushort)(Math.Log(Globals.SaxMaxCard, 2) - Math.Log(Globals.SaxBaseCard, 2));
            SaxOptions opts    = new SaxOptions(Util.UnsignedShortArray(Globals.SaxWordLength, maskval));

            double[]    dataBuffer;
            IDataFormat dl;

            double[] ts;
            int      discarded = 0;

            string[] files = Directory.GetFiles(_dataDir, "*.dat");
            Array.Sort(files, new NaturalStringComparer());

            if (files.Length != NUMFILES)
            {
                throw new ApplicationException("numFiles != NUMFILES");
            }

            StringBuilder sb = new StringBuilder();

            sb.AppendLine("File => Number Mapping");

            for (int i = 0; i < NUMFILES; ++i)
            {
                string file = files[i];
                sb.AppendFormat("{0} => {1}", Path.GetFileNameWithoutExtension(file), i);
                Console.WriteLine("Processed:{2} Discarded:{0} AtFile:{1}", discarded, file, processed);

                if (!File.Exists(file))
                {
                    throw new ApplicationException("!File.Exists(file)");
                }

                // read data file into memory
                using (BinaryReader br = new BinaryReader(new FileStream(file, FileMode.Open, FileAccess.Read, FileShare.None, 100000000)))
                {
                    if (Math.IEEERemainder(br.BaseStream.Length, sizeof(double)) != 0)
                    {
                        throw new ApplicationException("Math.IEEERemainder( br.BaseStream.Length, sizeof(double)) != 0");
                    }

                    dataBuffer = new double[br.BaseStream.Length / sizeof(double)];
                    int offset = 0;

                    for (int pos = 0; pos < br.BaseStream.Length; pos += sizeof(double))
                    {
                        dataBuffer[offset++] = br.ReadDouble();
                    }
                }

                // sliding window and extract time series subsequences

                for (int pos = 0; pos < dataBuffer.Length - Globals.TimeSeriesLength; ++pos)
                {
                    ts = new double[Globals.TimeSeriesLength];
                    Array.Copy(dataBuffer, pos, ts, 0, Globals.TimeSeriesLength);

                    // filter
                    double std = Util.StdDev(ts);
                    if (std <= FILTERVAL)
                    {
                        discarded += (int)Math.Ceiling(Globals.TimeSeriesLength / 2.0) + 1;
                        pos       += (int)Math.Ceiling(Globals.TimeSeriesLength / 2.0);
                        continue;
                    }
                    else
                    {
                        // normalize
                        double mean = Util.Mean(ts, 0, ts.Length - 1);
                        for (int j = 0; j < ts.Length; ++j)
                        {
                            ts[j] = (ts[j] - mean) / std;
                        }

                        dl = new Meta3DataFormat(i, pos, ts);
                        _si.Insert(new SaxData(dl, Sax.ArrayToSaxVals(ts, opts)));
                        processed++;

                        if (processed % Globals.FlushTsVal == 0)
                        {
                            _si.FlushEntries();
                        }
                    }
                }

                GC.Collect();
            }
            _si.FlushEntries();

            Console.WriteLine();
            Console.WriteLine(sb.ToString());
            Console.WriteLine();
            Console.WriteLine("Processed:{0} {1}", processed, _si.NumTimeSeries);
            Console.WriteLine("Discarded:{0}", discarded);
            Console.WriteLine();
        }
예제 #7
0
        public override void LoadIndex()
        {
            ushort     maskval = (ushort)(Math.Log(Globals.SaxMaxCard, 2) - Math.Log(Globals.SaxBaseCard, 2));
            SaxOptions opts    = new SaxOptions(Util.UnsignedShortArray(Globals.SaxWordLength, maskval));

            double[]    ts;
            IDataFormat dl;

            // load dna into memory
            LoadDnaToMetaBuffer(_dataFolder);

            // iterate through each chr and insert
            double mean       = 0;
            int    signChange = 0;
            double delta      = 0;
            double lastVal    = 0;

            for (int chrNo = 0; chrNo < Meta2DataFormat.dnaBuffer.Count; ++chrNo)
            {
                //    Console.WriteLine("ChrNo:{0} Processed:{1} Discarded:{2} IndexDiscarded:{3}", chrNo, processed, discarded, Index<Meta2DataFormat>.discarded);
                //    if (_si.NumTimeSeries != processed - discarded - Index<Meta2DataFormat>.discarded)
                //        throw new ApplicationException();
                for (int pos = 0; pos <= Meta2DataFormat.dnaBuffer[chrNo].Length - Globals.TimeSeriesLength; pos += SHIFT)
                {
                    dl = new Meta2DataFormat(chrNo, pos);
                    ts = dl.GetTimeSeries();

                    // normalize
                    mean       = Util.Mean(ts, 0, ts.Length - 1);
                    signChange = 0;
                    lastVal    = ts[1] - ts[0];
                    for (int k = 2; k < ts.Length; ++k)
                    {
                        delta = ts[k] - ts[k - 1];
                        if (Math.Sign(lastVal) != Math.Sign(delta))
                        {
                            signChange++;
                        }
                        lastVal = delta;
                    }

                    for (int k = 0; k < ts.Length; ++k)
                    {
                        ts[k] = ts[k] - mean;
                    }

                    // filter
                    if (signChange > NUMSIGNCHANGE)
                    {
                        _si.Insert(new SaxData(dl, Sax.ArrayToSaxVals(ts, opts)));
                        processed++;

                        if (processed % Globals.FlushTsVal == 0)
                        {
                            _si.FlushEntries();
                        }
                    }
                    else
                    {
                        discarded++;
                    }
                }
                GC.Collect();
            }
            // Console.WriteLine("Processed:{0} Discarded:{1} IndexDiscarded:{2}", processed, discarded, Index<Meta2DataFormat>.discarded);
            _si.FlushEntries();
        }