private TermEntry MismatchHandler(SaxData dr) { if (NumIndexEntries > 1) { string saxString = Sax.SaxDataRepToSaxStr(dr, options.SaxOpts); // find last promoted pos int pos = 0; ReadOnlyCollection <ushort> mask = options.Mask; for (int i = 0; i < mask.Count; i++) { if (mask[pos] <= mask[i]) { pos = i; } } // search for match foreach (string entrySaxString in index.Keys) { if (Sax.SaxStrToSaxVals(entrySaxString)[pos] == Sax.SaxStrToSaxVals(saxString)[pos]) { return(SearchHandler(index[entrySaxString], dr)); } } } // if no match return(SearchHandler(ReturnFirstIndexEntry(), dr)); }
public override void LoadIndex() { ushort maskval = (ushort)(Math.Log(Globals.SaxMaxCard, 2) - Math.Log(Globals.SaxBaseCard, 2)); SaxOptions opts = new SaxOptions(Util.UnsignedShortArray(Globals.SaxWordLength, maskval)); double[] tmp = new double[ORIGINAL_LENGTH]; double[] ts; IDataFormat dl; int numFiles = Directory.GetFiles(_dataDir, "*.dat").Length; if (numFiles != NUMFILES) { throw new ApplicationException("numFiles != NUMFILES"); } for (int i = 1; i <= numFiles; ++i) { string file = Path.Combine(_dataDir, string.Format("i{0}.dat", i)); if (!File.Exists(file)) { throw new ApplicationException("!File.Exists(file)"); } using (BinaryReader br = new BinaryReader(new FileStream(file, FileMode.Open, FileAccess.Read))) { if (br.BaseStream.Length != EXPECTEDBYTES) { throw new ApplicationException("br.BaseStream.Length != ORIGINAL_LENGTH * NUMTSPERFILE * sizeof(double)"); } int bytesRead = br.Read(imageBuffer, 0, EXPECTEDBYTES); if (bytesRead != EXPECTEDBYTES) { throw new ApplicationException("EXPECTEDBYTES"); } int pos = 0; while (pos < EXPECTEDBYTES) { Buffer.BlockCopy(imageBuffer, pos, tsBuffer, 0, ORIGINAL_LENGTH * sizeof(double)); ts = Util.NormalizationHandler(Util.DownSample(Util.ByteArrayToDoubleArray(tsBuffer), DOWNSAMPLERATE)); dl = new Meta1DataFormat(processed, ts); _si.Insert(new SaxData(dl, Sax.ArrayToSaxVals(ts, opts))); processed++; if (processed % Globals.FlushTsVal == 0) { _si.FlushEntries(); } pos += ORIGINAL_LENGTH * sizeof(double); } Console.WriteLine("{0} read. TsNum:{1}", Path.GetFileName(file), processed); } } _si.FlushEntries(); }
public TermEntry ApproximateSearch(double[] ts) { ushort maskval = (ushort)(Math.Log(Globals.SaxMaxCard, 2) - Math.Log(Globals.SaxBaseCard, 2)); SaxOptions opts = new SaxOptions(Util.UnsignedShortArray(Globals.SaxWordLength, maskval)); SaxData dr = new SaxData(Sax.ArrayToSaxVals(ts, opts)); return(ApproximateSearch(dr)); }
public override void LoadIndex() { ushort maskval = (ushort)(Math.Log(Globals.SaxMaxCard, 2) - Math.Log(Globals.SaxBaseCard, 2)); SaxOptions opts = new SaxOptions(Util.UnsignedShortArray(Globals.SaxWordLength, maskval)); this.sr = new StreamReader(this.dataFile); while (!(this.allRead && this.buffer.Count == 0)) { if (this.buffer.Count == 0) { Console.WriteLine(this.processed); string line; while ((line = this.sr.ReadLine()) != null) { double[] line_data = Util.StringToArray(line); //double ts_class = line_data.First(); double[] ts = Util.NormalizationHandler(line_data.Skip(1).ToArray()); //double[] together = new double[ts.Length + 1]; //together[0] = ts_class; ts.CopyTo(line_data, 1); if (!this.tsLength.HasValue) { this.tsLength = (uint)ts.Length; } else if (this.tsLength.Value != ts.Length) { throw new ApplicationException("Inconsistent length when reading from file."); } this.buffer.Enqueue(line_data); if (this.buffer.Count == this.bufferSize) { break; } } if (line == null) { this.allRead = true; } } else { double[] tmp = this.buffer.Dequeue(); double[] ts = tmp.Skip(1).ToArray(); double shapelet_ts = tmp.First(); IDataFormat dl = new RawShapeletFormat(ts, shapelet_ts); this.si.Insert(new SaxData(dl, Sax.ArrayToSaxVals(ts, opts))); this.processed++; } } this.sr.Close(); //this.si.ForceFlushBuffers(); this.si.FlushEntries(); Console.WriteLine("Total: {0}", this.processed); }
public CostCounter ExactSearch(double[] ts, out IndexFileDist bsf) { CostCounter meas = new CostCounter(0, 0); IntervalHeap <IndexEntryDist> pq = new IntervalHeap <IndexEntryDist>(NumIndexEntries); // approx search TermEntry approx = ApproximateSearch(ts); bsf = Index <DATAFORMAT> .MinFileEucDist(ts, approx.FileName); meas.IO++; meas.distance += approx.NumTimeSeries; // initalize pq with IndexEntries at root node foreach (IndexEntry e in index.Values) { pq.Add(new IndexEntryDist(e, Sax.MinDistPAAToiSAX( Sax.SaxStrToSaxVals(e.SaxWord), options.SaxOpts, ts))); } while (!pq.IsEmpty) { IndexEntryDist minInfo = pq.DeleteMin(); IndexEntry minEntry = minInfo.entry; if (minInfo.dist >= bsf.distance) { break; } if (minEntry is TermEntry) { IndexFileDist posMin = Index <DATAFORMAT> .MinFileEucDist(ts, ((TermEntry)minEntry).FileName); meas.IO++; meas.distance += minEntry.NumTimeSeries; // update bsf if (posMin.distance < bsf.distance) { bsf = posMin; } } else if (minEntry is SplitEntry <DATAFORMAT> ) { SplitEntry <DATAFORMAT> sEntry = minEntry as SplitEntry <DATAFORMAT>; foreach (IndexEntry e in sEntry.GetIndexEntries()) { pq.Add(new IndexEntryDist(e, Sax.MinDistPAAToiSAX( Sax.SaxStrToSaxVals(e.SaxWord), sEntry.Options.SaxOpts, ts))); } } } return(meas); }
public TermEntry ApproximateSearch(SaxData dr) { string saxString = Sax.SaxDataRepToSaxStr(dr, options.SaxOpts); if (index.ContainsKey(saxString)) { return(SearchHandler(index[saxString], dr)); } else { return(MismatchHandler(dr)); } }
public override void LoadIndex() { Util.SeedGenerator(seed); ushort maskval = (ushort)(Math.Log(Globals.SaxMaxCard, 2) - Math.Log(Globals.SaxBaseCard, 2)); SaxOptions opts = new SaxOptions(Util.UnsignedShortArray(Globals.SaxWordLength, maskval)); double[] ts; IDataFormat dl; while (this.processed < this.numTs) { ts = Util.RandomWalk(this.tsLength); dl = new RawDataFormat(ts); this.si.Insert(new SaxData(dl, Sax.ArrayToSaxVals(ts, opts))); // Continuesly insertion on the first level of buffers ( with no threshold ) this.processed++; Console.Write("\r{0}", this.processed); if (this.processed % Globals.FlushTsVal == 0) // When reachs the value flush on disk { this.si.FlushEntries(); } } this.si.FlushEntries(); Console.WriteLine(); }
public void Insert(SaxData input) { string saxString = Sax.SaxDataRepToSaxStr(input, options.SaxOpts); if (splitDepth == 0 && flush == false) { if (!buffer.ContainsKey(saxString)) { buffer.Add(saxString, new List <SaxData>()); } buffer[saxString].Add(input); } else { if (index.ContainsKey(saxString)) { IndexEntry entry = index[saxString]; if (entry is TermEntry)// if terminal, then search path terminates here { TermEntry tentry = (TermEntry)entry; string oldFileName = tentry.FileName; if (SplitEntry(tentry) == false) // check bucket requires a split { tentry.InsertToBuffer(input); } else { List <SaxData> B = tentry.getbuffer(); if (B == null) { B = new List <SaxData>(); } DiskCost.increasesavedcost(B.Count); ushort[] newMask = this.options.MaskCopy; ushort[] newSaxString = Sax.SaxStrToSaxVals(saxString); string newName = ""; for (int i = 0; i < newMask.Length; i++) { newName = newName + newSaxString[i].ToString() + "." + newMask[i].ToString() + "_"; } newName = newName.Substring(0, newName.Length - 1); string[] files = Directory.GetFiles(WorkingFolder, string.Concat(newName, "*.txt")); //string[] files = Directory.GetFiles(tentry.FileName); if (tentry.OnDisk == true) { Assert.AreEqual(files.Length, 1); } else { Assert.AreEqual(files.Length, 0); } byte[] temp; int pos = 0; long length = -1; int bytesToRead = SaxData.ByteLength(typeof(DATAFORMAT)); foreach (string f in files) { using (BinaryReader br = new BinaryReader(new FileStream(f, FileMode.Open, FileAccess.Read))) { length = br.BaseStream.Length; if (length != 0) { DiskCost.increaserandomcost(); } if (Math.IEEERemainder(length, bytesToRead) != 0) { throw new ApplicationException("Math.IEEERemainder(br.BaseStream.Length, bytesToRead) != 0"); } while (pos < length) { temp = br.ReadBytes(bytesToRead); if (temp.Length != bytesToRead) { throw new ApplicationException("temp.Length != bytesToRead"); } B.Add(SaxData.Parse <DATAFORMAT>(temp)); DiskCost.increasereadcost(); pos += bytesToRead; } } File.Delete(f); } SplitEntry <DATAFORMAT> newSplit; if (Globals.NewSplitPolicy) { newSplit = new SplitEntry <DATAFORMAT>(saxString, UpdateOptions(B), (byte)(1 + splitDepth)); } else { newSplit = new SplitEntry <DATAFORMAT>(saxString, UpdateOptions(null), (byte)(1 + splitDepth)); } newSplit.Insert(input); foreach (SaxData S in B) { newSplit.Insert(S); } // update index entry from terminal to split index[saxString] = newSplit; } } else if (entry is SplitEntry <DATAFORMAT> ) // internal node { ((SplitEntry <DATAFORMAT>)entry).Insert(input); } } else // saxString has not been seen before, create new file and entry { ushort[] newMask = this.options.MaskCopy; ushort[] newSaxString = Sax.SaxStrToSaxVals(saxString); string newName = ""; for (int i = 0; i < newMask.Length; i++) { newName = newName + newSaxString[i].ToString() + "." + newMask[i].ToString() + "_"; } newName = newName.Substring(0, newName.Length - 1); string newfile = Path.Combine(WorkingFolder, string.Concat(newName, ".0.txt")); TermEntry newEntry = new TermEntry(saxString, newfile); newEntry.InsertToBuffer(input); index.Add(saxString, newEntry); } } }
public override void LoadIndex() { ushort maskval = (ushort)(Math.Log(Globals.SaxMaxCard, 2) - Math.Log(Globals.SaxBaseCard, 2)); SaxOptions opts = new SaxOptions(Util.UnsignedShortArray(Globals.SaxWordLength, maskval)); double[] dataBuffer; IDataFormat dl; double[] ts; int discarded = 0; string[] files = Directory.GetFiles(_dataDir, "*.dat"); Array.Sort(files, new NaturalStringComparer()); if (files.Length != NUMFILES) { throw new ApplicationException("numFiles != NUMFILES"); } StringBuilder sb = new StringBuilder(); sb.AppendLine("File => Number Mapping"); for (int i = 0; i < NUMFILES; ++i) { string file = files[i]; sb.AppendFormat("{0} => {1}", Path.GetFileNameWithoutExtension(file), i); Console.WriteLine("Processed:{2} Discarded:{0} AtFile:{1}", discarded, file, processed); if (!File.Exists(file)) { throw new ApplicationException("!File.Exists(file)"); } // read data file into memory using (BinaryReader br = new BinaryReader(new FileStream(file, FileMode.Open, FileAccess.Read, FileShare.None, 100000000))) { if (Math.IEEERemainder(br.BaseStream.Length, sizeof(double)) != 0) { throw new ApplicationException("Math.IEEERemainder( br.BaseStream.Length, sizeof(double)) != 0"); } dataBuffer = new double[br.BaseStream.Length / sizeof(double)]; int offset = 0; for (int pos = 0; pos < br.BaseStream.Length; pos += sizeof(double)) { dataBuffer[offset++] = br.ReadDouble(); } } // sliding window and extract time series subsequences for (int pos = 0; pos < dataBuffer.Length - Globals.TimeSeriesLength; ++pos) { ts = new double[Globals.TimeSeriesLength]; Array.Copy(dataBuffer, pos, ts, 0, Globals.TimeSeriesLength); // filter double std = Util.StdDev(ts); if (std <= FILTERVAL) { discarded += (int)Math.Ceiling(Globals.TimeSeriesLength / 2.0) + 1; pos += (int)Math.Ceiling(Globals.TimeSeriesLength / 2.0); continue; } else { // normalize double mean = Util.Mean(ts, 0, ts.Length - 1); for (int j = 0; j < ts.Length; ++j) { ts[j] = (ts[j] - mean) / std; } dl = new Meta3DataFormat(i, pos, ts); _si.Insert(new SaxData(dl, Sax.ArrayToSaxVals(ts, opts))); processed++; if (processed % Globals.FlushTsVal == 0) { _si.FlushEntries(); } } } GC.Collect(); } _si.FlushEntries(); Console.WriteLine(); Console.WriteLine(sb.ToString()); Console.WriteLine(); Console.WriteLine("Processed:{0} {1}", processed, _si.NumTimeSeries); Console.WriteLine("Discarded:{0}", discarded); Console.WriteLine(); }
public override void LoadIndex() { ushort maskval = (ushort)(Math.Log(Globals.SaxMaxCard, 2) - Math.Log(Globals.SaxBaseCard, 2)); SaxOptions opts = new SaxOptions(Util.UnsignedShortArray(Globals.SaxWordLength, maskval)); double[] ts; IDataFormat dl; // load dna into memory LoadDnaToMetaBuffer(_dataFolder); // iterate through each chr and insert double mean = 0; int signChange = 0; double delta = 0; double lastVal = 0; for (int chrNo = 0; chrNo < Meta2DataFormat.dnaBuffer.Count; ++chrNo) { // Console.WriteLine("ChrNo:{0} Processed:{1} Discarded:{2} IndexDiscarded:{3}", chrNo, processed, discarded, Index<Meta2DataFormat>.discarded); // if (_si.NumTimeSeries != processed - discarded - Index<Meta2DataFormat>.discarded) // throw new ApplicationException(); for (int pos = 0; pos <= Meta2DataFormat.dnaBuffer[chrNo].Length - Globals.TimeSeriesLength; pos += SHIFT) { dl = new Meta2DataFormat(chrNo, pos); ts = dl.GetTimeSeries(); // normalize mean = Util.Mean(ts, 0, ts.Length - 1); signChange = 0; lastVal = ts[1] - ts[0]; for (int k = 2; k < ts.Length; ++k) { delta = ts[k] - ts[k - 1]; if (Math.Sign(lastVal) != Math.Sign(delta)) { signChange++; } lastVal = delta; } for (int k = 0; k < ts.Length; ++k) { ts[k] = ts[k] - mean; } // filter if (signChange > NUMSIGNCHANGE) { _si.Insert(new SaxData(dl, Sax.ArrayToSaxVals(ts, opts))); processed++; if (processed % Globals.FlushTsVal == 0) { _si.FlushEntries(); } } else { discarded++; } } GC.Collect(); } // Console.WriteLine("Processed:{0} Discarded:{1} IndexDiscarded:{2}", processed, discarded, Index<Meta2DataFormat>.discarded); _si.FlushEntries(); }