private TermEntry MismatchHandler(SaxData dr) { if (NumIndexEntries > 1) { string saxString = Sax.SaxDataRepToSaxStr(dr, options.SaxOpts); // find last promoted pos int pos = 0; ReadOnlyCollection <ushort> mask = options.Mask; for (int i = 0; i < mask.Count; i++) { if (mask[pos] <= mask[i]) { pos = i; } } // search for match foreach (string entrySaxString in index.Keys) { if (Sax.SaxStrToSaxVals(entrySaxString)[pos] == Sax.SaxStrToSaxVals(saxString)[pos]) { return(SearchHandler(index[entrySaxString], dr)); } } } // if no match return(SearchHandler(ReturnFirstIndexEntry(), dr)); }
public List <DATAFORMAT> ReturnDataFormatFromTermEntry(TermEntry e) { if (e.NBuf != -1 || !e.OnDisk) { throw new ApplicationException("e.NBuf!= -1 || ~e.OnDisk"); } List <DATAFORMAT> tmp = new List <DATAFORMAT>(); using (BinaryReader br = new BinaryReader(new FileStream(e.FileName, FileMode.Open, FileAccess.Read))) { long length = br.BaseStream.Length; int bytesToRead = SaxData.ByteLength(typeof(DATAFORMAT)); if (Math.IEEERemainder(length, bytesToRead) != 0) { throw new ApplicationException("Math.IEEERemainder(br.BaseStream.Length, bytesToRead) != 0"); } int pos = 0; byte[] temp; while (pos < length) { temp = br.ReadBytes(bytesToRead); if (temp.Length != bytesToRead) { throw new ApplicationException("temp.Length != bytesToRead"); } tmp.Add((DATAFORMAT)SaxData.Parse <DATAFORMAT>(temp).dl); pos += bytesToRead; } } return(tmp); }
public TermEntry ApproximateSearch(double[] ts) { ushort maskval = (ushort)(Math.Log(Globals.SaxMaxCard, 2) - Math.Log(Globals.SaxBaseCard, 2)); SaxOptions opts = new SaxOptions(Util.UnsignedShortArray(Globals.SaxWordLength, maskval)); SaxData dr = new SaxData(Sax.ArrayToSaxVals(ts, opts)); return(ApproximateSearch(dr)); }
public void InsertInBuffer(SaxData entry) { if (Utilization >= TermBuffer.SingleBufferSize) { // Check if the TermNode can not fit other TS FlushBuffer(); } BL.Add(entry); Utilization++; }
public static int CreateNewBuffer(SaxData entry, TermEntry node) { Buf B = new Buf(); B.Initialization(); B.setnode(node); B.InsertInBuffer(entry); // Insert the Time series in the new Buffer TBuffer.Add(B); return(TBuffer.Count - 1); }
private static TermEntry SearchHandler(IndexEntry entry, SaxData dr) { if (entry is TermEntry) { return((TermEntry)entry); } else { return(((SplitEntry <DATAFORMAT>)entry).ApproximateSearch(dr)); } }
public void InsertToBuffer(SaxData dr) { numTimeSeries++; if (NBuf == -1) // Check if there is a buffer attached to this node { NBuf = TermBuffer.CreateNewBuffer(dr, this); // Create one and return the ID } else { TermBuffer.InsertInBuffer(dr, NBuf); // If it exists insert ts in the buffer } }
public TermEntry ApproximateSearch(SaxData dr) { string saxString = Sax.SaxDataRepToSaxStr(dr, options.SaxOpts); if (index.ContainsKey(saxString)) { return(SearchHandler(index[saxString], dr)); } else { return(MismatchHandler(dr)); } }
public static IndexFileDist MinFileEucDist(double[] ts, string file) { ushort[] val; double[] dd; byte[] temp; int pos = 0; int length; IndexFileDist?best = null; int lineNum = 1; int wlen = Globals.SaxBaseCard; using (FileStream sr = new FileStream(file, FileMode.Open, FileAccess.Read)) { BinaryReader r = new BinaryReader(sr); pos = 0; length = (int)r.BaseStream.Length; val = new ushort[wlen]; dd = new double[ts.Length]; while (pos < length) { temp = r.ReadBytes(SaxData.ByteLength(typeof(DATAFORMAT))); SaxData tmp = SaxData.Parse <DATAFORMAT>(temp); pos = pos + SaxData.ByteLength(typeof(DATAFORMAT)); double[] fileTs = Util.NormalizationHandler(tmp.dl.GetTimeSeries()); // repo.ReturnData(Util.IndexFlineToDataLocation(line))); double dist = Util.EuclideanDistance(fileTs, ts); IndexFileDist retEntry = new IndexFileDist(file, lineNum, dist); if (best == null) { best = retEntry; } else { if (best.Value.distance > dist) { best = retEntry; } } lineNum++; } r.Close(); sr.Close(); } return(best.Value); }
public void Insert(SaxData dr) // Insertion for Splitnodes not in the first level of the index { index.Insert(dr); }
public TermEntry ApproximateSearch(SaxData dr) { return(index.ApproximateSearch(dr)); }
public List <IndexFileDist[]> KNearestNeighborSequentialScan(int k, List <double[]> tsList) { CostCounter counter = new CostCounter(0, 0); if (k > NumTimeSeries) { Console.WriteLine("K > number of time series, setting K to number of time series."); k = NumTimeSeries; } List <IntervalHeap <IndexFileDist> > neighbors = new List <IntervalHeap <IndexFileDist> >(tsList.Count); for (int l = 0; l < tsList.Count; l++) { neighbors.Add(new IntervalHeap <IndexFileDist>(k + 1)); } Console.Write("Retreiving All Index Files:"); string[] indexFiles = Directory.GetFiles(Globals.IndexRootDir, "*.*.txt", SearchOption.AllDirectories); Console.WriteLine(" {0} files.", indexFiles.Length); int frac = indexFiles.Length / 10; int srchFiles = 0; int srchTs = 0; int pos = 0; int length = 0; byte[] temp; SaxData tmp; double[] data; int line; double dist; BinaryReader r; foreach (string f in indexFiles) { // disp update if (srchFiles % (frac == 0 ? 1 : frac) == 0) { Console.Write("\r{0}", srchFiles); } srchFiles++; counter.IO++; using (FileStream sr = new FileStream(f, FileMode.Open, FileAccess.Read)) { r = new BinaryReader(sr); pos = 0; length = (int)r.BaseStream.Length; // get the file lenght line = 0; while (pos < length) { srchTs++; temp = r.ReadBytes(SaxData.ByteLength(typeof(DATAFORMAT))); tmp = SaxData.Parse <DATAFORMAT>(temp); data = tmp.dl.GetTimeSeries(); for (int query = 0; query < tsList.Count; query++) // compute distance to each query { dist = Util.EuclideanDistance(data, tsList[query]); neighbors[query].Add(new IndexFileDist(f, line + 1, dist)); if (neighbors[query].Count > k) // { neighbors[query].DeleteMax(); } } counter.distance += tsList.Count; line++; pos = pos + SaxData.ByteLength(typeof(DATAFORMAT)); } r.Close(); sr.Close(); } } Console.WriteLine(); Console.WriteLine("{0} files {1} entries searched.", srchFiles, srchTs); List <IndexFileDist[]> result = new List <IndexFileDist[]>(tsList.Count); for (int l = 0; l < tsList.Count; l++) { result.Add(new IndexFileDist[k]); } for (int t = 0; t < tsList.Count; t++) { for (int i = 0; i < k; i++) { result[t][i] = neighbors[t].DeleteMin(); } } return(result); }
public void Insert(SaxData input) { string saxString = Sax.SaxDataRepToSaxStr(input, options.SaxOpts); if (splitDepth == 0 && flush == false) { if (!buffer.ContainsKey(saxString)) { buffer.Add(saxString, new List <SaxData>()); } buffer[saxString].Add(input); } else { if (index.ContainsKey(saxString)) { IndexEntry entry = index[saxString]; if (entry is TermEntry)// if terminal, then search path terminates here { TermEntry tentry = (TermEntry)entry; string oldFileName = tentry.FileName; if (SplitEntry(tentry) == false) // check bucket requires a split { tentry.InsertToBuffer(input); } else { List <SaxData> B = tentry.getbuffer(); if (B == null) { B = new List <SaxData>(); } DiskCost.increasesavedcost(B.Count); ushort[] newMask = this.options.MaskCopy; ushort[] newSaxString = Sax.SaxStrToSaxVals(saxString); string newName = ""; for (int i = 0; i < newMask.Length; i++) { newName = newName + newSaxString[i].ToString() + "." + newMask[i].ToString() + "_"; } newName = newName.Substring(0, newName.Length - 1); string[] files = Directory.GetFiles(WorkingFolder, string.Concat(newName, "*.txt")); //string[] files = Directory.GetFiles(tentry.FileName); if (tentry.OnDisk == true) { Assert.AreEqual(files.Length, 1); } else { Assert.AreEqual(files.Length, 0); } byte[] temp; int pos = 0; long length = -1; int bytesToRead = SaxData.ByteLength(typeof(DATAFORMAT)); foreach (string f in files) { using (BinaryReader br = new BinaryReader(new FileStream(f, FileMode.Open, FileAccess.Read))) { length = br.BaseStream.Length; if (length != 0) { DiskCost.increaserandomcost(); } if (Math.IEEERemainder(length, bytesToRead) != 0) { throw new ApplicationException("Math.IEEERemainder(br.BaseStream.Length, bytesToRead) != 0"); } while (pos < length) { temp = br.ReadBytes(bytesToRead); if (temp.Length != bytesToRead) { throw new ApplicationException("temp.Length != bytesToRead"); } B.Add(SaxData.Parse <DATAFORMAT>(temp)); DiskCost.increasereadcost(); pos += bytesToRead; } } File.Delete(f); } SplitEntry <DATAFORMAT> newSplit; if (Globals.NewSplitPolicy) { newSplit = new SplitEntry <DATAFORMAT>(saxString, UpdateOptions(B), (byte)(1 + splitDepth)); } else { newSplit = new SplitEntry <DATAFORMAT>(saxString, UpdateOptions(null), (byte)(1 + splitDepth)); } newSplit.Insert(input); foreach (SaxData S in B) { newSplit.Insert(S); } // update index entry from terminal to split index[saxString] = newSplit; } } else if (entry is SplitEntry <DATAFORMAT> ) // internal node { ((SplitEntry <DATAFORMAT>)entry).Insert(input); } } else // saxString has not been seen before, create new file and entry { ushort[] newMask = this.options.MaskCopy; ushort[] newSaxString = Sax.SaxStrToSaxVals(saxString); string newName = ""; for (int i = 0; i < newMask.Length; i++) { newName = newName + newSaxString[i].ToString() + "." + newMask[i].ToString() + "_"; } newName = newName.Substring(0, newName.Length - 1); string newfile = Path.Combine(WorkingFolder, string.Concat(newName, ".0.txt")); TermEntry newEntry = new TermEntry(saxString, newfile); newEntry.InsertToBuffer(input); index.Add(saxString, newEntry); } } }
public static void InsertInBuffer(SaxData entry, int i) { TBuffer[i].InsertInBuffer(entry); }