public override void LoadIndex() { ushort maskval = (ushort)(Math.Log(Globals.SaxMaxCard, 2) - Math.Log(Globals.SaxBaseCard, 2)); SaxOptions opts = new SaxOptions(Util.UnsignedShortArray(Globals.SaxWordLength, maskval)); double[] tmp = new double[ORIGINAL_LENGTH]; double[] ts; IDataFormat dl; int numFiles = Directory.GetFiles(_dataDir, "*.dat").Length; if (numFiles != NUMFILES) { throw new ApplicationException("numFiles != NUMFILES"); } for (int i = 1; i <= numFiles; ++i) { string file = Path.Combine(_dataDir, string.Format("i{0}.dat", i)); if (!File.Exists(file)) { throw new ApplicationException("!File.Exists(file)"); } using (BinaryReader br = new BinaryReader(new FileStream(file, FileMode.Open, FileAccess.Read))) { if (br.BaseStream.Length != EXPECTEDBYTES) { throw new ApplicationException("br.BaseStream.Length != ORIGINAL_LENGTH * NUMTSPERFILE * sizeof(double)"); } int bytesRead = br.Read(imageBuffer, 0, EXPECTEDBYTES); if (bytesRead != EXPECTEDBYTES) { throw new ApplicationException("EXPECTEDBYTES"); } int pos = 0; while (pos < EXPECTEDBYTES) { Buffer.BlockCopy(imageBuffer, pos, tsBuffer, 0, ORIGINAL_LENGTH * sizeof(double)); ts = Util.NormalizationHandler(Util.DownSample(Util.ByteArrayToDoubleArray(tsBuffer), DOWNSAMPLERATE)); dl = new Meta1DataFormat(processed, ts); _si.Insert(new SaxData(dl, Sax.ArrayToSaxVals(ts, opts))); processed++; if (processed % Globals.FlushTsVal == 0) { _si.FlushEntries(); } pos += ORIGINAL_LENGTH * sizeof(double); } Console.WriteLine("{0} read. TsNum:{1}", Path.GetFileName(file), processed); } } _si.FlushEntries(); }
public TermEntry ApproximateSearch(double[] ts) { ushort maskval = (ushort)(Math.Log(Globals.SaxMaxCard, 2) - Math.Log(Globals.SaxBaseCard, 2)); SaxOptions opts = new SaxOptions(Util.UnsignedShortArray(Globals.SaxWordLength, maskval)); SaxData dr = new SaxData(Sax.ArrayToSaxVals(ts, opts)); return(ApproximateSearch(dr)); }
public override void LoadIndex() { ushort maskval = (ushort)(Math.Log(Globals.SaxMaxCard, 2) - Math.Log(Globals.SaxBaseCard, 2)); SaxOptions opts = new SaxOptions(Util.UnsignedShortArray(Globals.SaxWordLength, maskval)); this.sr = new StreamReader(this.dataFile); while (!(this.allRead && this.buffer.Count == 0)) { if (this.buffer.Count == 0) { Console.WriteLine(this.processed); string line; while ((line = this.sr.ReadLine()) != null) { double[] line_data = Util.StringToArray(line); //double ts_class = line_data.First(); double[] ts = Util.NormalizationHandler(line_data.Skip(1).ToArray()); //double[] together = new double[ts.Length + 1]; //together[0] = ts_class; ts.CopyTo(line_data, 1); if (!this.tsLength.HasValue) { this.tsLength = (uint)ts.Length; } else if (this.tsLength.Value != ts.Length) { throw new ApplicationException("Inconsistent length when reading from file."); } this.buffer.Enqueue(line_data); if (this.buffer.Count == this.bufferSize) { break; } } if (line == null) { this.allRead = true; } } else { double[] tmp = this.buffer.Dequeue(); double[] ts = tmp.Skip(1).ToArray(); double shapelet_ts = tmp.First(); IDataFormat dl = new RawShapeletFormat(ts, shapelet_ts); this.si.Insert(new SaxData(dl, Sax.ArrayToSaxVals(ts, opts))); this.processed++; } } this.sr.Close(); //this.si.ForceFlushBuffers(); this.si.FlushEntries(); Console.WriteLine("Total: {0}", this.processed); }
public override void LoadIndex() { Util.SeedGenerator(seed); ushort maskval = (ushort)(Math.Log(Globals.SaxMaxCard, 2) - Math.Log(Globals.SaxBaseCard, 2)); SaxOptions opts = new SaxOptions(Util.UnsignedShortArray(Globals.SaxWordLength, maskval)); double[] ts; IDataFormat dl; while (this.processed < this.numTs) { ts = Util.RandomWalk(this.tsLength); dl = new RawDataFormat(ts); this.si.Insert(new SaxData(dl, Sax.ArrayToSaxVals(ts, opts))); // Continuesly insertion on the first level of buffers ( with no threshold ) this.processed++; Console.Write("\r{0}", this.processed); if (this.processed % Globals.FlushTsVal == 0) // When reachs the value flush on disk { this.si.FlushEntries(); } } this.si.FlushEntries(); Console.WriteLine(); }
public IndexOptions(SaxOptions saxOpts, string baseFolder) { this.baseDir = baseFolder; this.saxOpts = saxOpts; }
public override void LoadIndex() { ushort maskval = (ushort)(Math.Log(Globals.SaxMaxCard, 2) - Math.Log(Globals.SaxBaseCard, 2)); SaxOptions opts = new SaxOptions(Util.UnsignedShortArray(Globals.SaxWordLength, maskval)); double[] dataBuffer; IDataFormat dl; double[] ts; int discarded = 0; string[] files = Directory.GetFiles(_dataDir, "*.dat"); Array.Sort(files, new NaturalStringComparer()); if (files.Length != NUMFILES) { throw new ApplicationException("numFiles != NUMFILES"); } StringBuilder sb = new StringBuilder(); sb.AppendLine("File => Number Mapping"); for (int i = 0; i < NUMFILES; ++i) { string file = files[i]; sb.AppendFormat("{0} => {1}", Path.GetFileNameWithoutExtension(file), i); Console.WriteLine("Processed:{2} Discarded:{0} AtFile:{1}", discarded, file, processed); if (!File.Exists(file)) { throw new ApplicationException("!File.Exists(file)"); } // read data file into memory using (BinaryReader br = new BinaryReader(new FileStream(file, FileMode.Open, FileAccess.Read, FileShare.None, 100000000))) { if (Math.IEEERemainder(br.BaseStream.Length, sizeof(double)) != 0) { throw new ApplicationException("Math.IEEERemainder( br.BaseStream.Length, sizeof(double)) != 0"); } dataBuffer = new double[br.BaseStream.Length / sizeof(double)]; int offset = 0; for (int pos = 0; pos < br.BaseStream.Length; pos += sizeof(double)) { dataBuffer[offset++] = br.ReadDouble(); } } // sliding window and extract time series subsequences for (int pos = 0; pos < dataBuffer.Length - Globals.TimeSeriesLength; ++pos) { ts = new double[Globals.TimeSeriesLength]; Array.Copy(dataBuffer, pos, ts, 0, Globals.TimeSeriesLength); // filter double std = Util.StdDev(ts); if (std <= FILTERVAL) { discarded += (int)Math.Ceiling(Globals.TimeSeriesLength / 2.0) + 1; pos += (int)Math.Ceiling(Globals.TimeSeriesLength / 2.0); continue; } else { // normalize double mean = Util.Mean(ts, 0, ts.Length - 1); for (int j = 0; j < ts.Length; ++j) { ts[j] = (ts[j] - mean) / std; } dl = new Meta3DataFormat(i, pos, ts); _si.Insert(new SaxData(dl, Sax.ArrayToSaxVals(ts, opts))); processed++; if (processed % Globals.FlushTsVal == 0) { _si.FlushEntries(); } } } GC.Collect(); } _si.FlushEntries(); Console.WriteLine(); Console.WriteLine(sb.ToString()); Console.WriteLine(); Console.WriteLine("Processed:{0} {1}", processed, _si.NumTimeSeries); Console.WriteLine("Discarded:{0}", discarded); Console.WriteLine(); }
public override void LoadIndex() { ushort maskval = (ushort)(Math.Log(Globals.SaxMaxCard, 2) - Math.Log(Globals.SaxBaseCard, 2)); SaxOptions opts = new SaxOptions(Util.UnsignedShortArray(Globals.SaxWordLength, maskval)); double[] ts; IDataFormat dl; // load dna into memory LoadDnaToMetaBuffer(_dataFolder); // iterate through each chr and insert double mean = 0; int signChange = 0; double delta = 0; double lastVal = 0; for (int chrNo = 0; chrNo < Meta2DataFormat.dnaBuffer.Count; ++chrNo) { // Console.WriteLine("ChrNo:{0} Processed:{1} Discarded:{2} IndexDiscarded:{3}", chrNo, processed, discarded, Index<Meta2DataFormat>.discarded); // if (_si.NumTimeSeries != processed - discarded - Index<Meta2DataFormat>.discarded) // throw new ApplicationException(); for (int pos = 0; pos <= Meta2DataFormat.dnaBuffer[chrNo].Length - Globals.TimeSeriesLength; pos += SHIFT) { dl = new Meta2DataFormat(chrNo, pos); ts = dl.GetTimeSeries(); // normalize mean = Util.Mean(ts, 0, ts.Length - 1); signChange = 0; lastVal = ts[1] - ts[0]; for (int k = 2; k < ts.Length; ++k) { delta = ts[k] - ts[k - 1]; if (Math.Sign(lastVal) != Math.Sign(delta)) { signChange++; } lastVal = delta; } for (int k = 0; k < ts.Length; ++k) { ts[k] = ts[k] - mean; } // filter if (signChange > NUMSIGNCHANGE) { _si.Insert(new SaxData(dl, Sax.ArrayToSaxVals(ts, opts))); processed++; if (processed % Globals.FlushTsVal == 0) { _si.FlushEntries(); } } else { discarded++; } } GC.Collect(); } // Console.WriteLine("Processed:{0} Discarded:{1} IndexDiscarded:{2}", processed, discarded, Index<Meta2DataFormat>.discarded); _si.FlushEntries(); }