public List <DATAFORMAT> ReturnDataFormatFromTermEntry(TermEntry e) { if (e.NBuf != -1 || !e.OnDisk) { throw new ApplicationException("e.NBuf!= -1 || ~e.OnDisk"); } List <DATAFORMAT> tmp = new List <DATAFORMAT>(); using (BinaryReader br = new BinaryReader(new FileStream(e.FileName, FileMode.Open, FileAccess.Read))) { long length = br.BaseStream.Length; int bytesToRead = SaxData.ByteLength(typeof(DATAFORMAT)); if (Math.IEEERemainder(length, bytesToRead) != 0) { throw new ApplicationException("Math.IEEERemainder(br.BaseStream.Length, bytesToRead) != 0"); } int pos = 0; byte[] temp; while (pos < length) { temp = br.ReadBytes(bytesToRead); if (temp.Length != bytesToRead) { throw new ApplicationException("temp.Length != bytesToRead"); } tmp.Add((DATAFORMAT)SaxData.Parse <DATAFORMAT>(temp).dl); pos += bytesToRead; } } return(tmp); }
public static IndexFileDist MinFileEucDist(double[] ts, string file) { ushort[] val; double[] dd; byte[] temp; int pos = 0; int length; IndexFileDist?best = null; int lineNum = 1; int wlen = Globals.SaxBaseCard; using (FileStream sr = new FileStream(file, FileMode.Open, FileAccess.Read)) { BinaryReader r = new BinaryReader(sr); pos = 0; length = (int)r.BaseStream.Length; val = new ushort[wlen]; dd = new double[ts.Length]; while (pos < length) { temp = r.ReadBytes(SaxData.ByteLength(typeof(DATAFORMAT))); SaxData tmp = SaxData.Parse <DATAFORMAT>(temp); pos = pos + SaxData.ByteLength(typeof(DATAFORMAT)); double[] fileTs = Util.NormalizationHandler(tmp.dl.GetTimeSeries()); // repo.ReturnData(Util.IndexFlineToDataLocation(line))); double dist = Util.EuclideanDistance(fileTs, ts); IndexFileDist retEntry = new IndexFileDist(file, lineNum, dist); if (best == null) { best = retEntry; } else { if (best.Value.distance > dist) { best = retEntry; } } lineNum++; } r.Close(); sr.Close(); } return(best.Value); }
public List <IndexFileDist[]> KNearestNeighborSequentialScan(int k, List <double[]> tsList) { CostCounter counter = new CostCounter(0, 0); if (k > NumTimeSeries) { Console.WriteLine("K > number of time series, setting K to number of time series."); k = NumTimeSeries; } List <IntervalHeap <IndexFileDist> > neighbors = new List <IntervalHeap <IndexFileDist> >(tsList.Count); for (int l = 0; l < tsList.Count; l++) { neighbors.Add(new IntervalHeap <IndexFileDist>(k + 1)); } Console.Write("Retreiving All Index Files:"); string[] indexFiles = Directory.GetFiles(Globals.IndexRootDir, "*.*.txt", SearchOption.AllDirectories); Console.WriteLine(" {0} files.", indexFiles.Length); int frac = indexFiles.Length / 10; int srchFiles = 0; int srchTs = 0; int pos = 0; int length = 0; byte[] temp; SaxData tmp; double[] data; int line; double dist; BinaryReader r; foreach (string f in indexFiles) { // disp update if (srchFiles % (frac == 0 ? 1 : frac) == 0) { Console.Write("\r{0}", srchFiles); } srchFiles++; counter.IO++; using (FileStream sr = new FileStream(f, FileMode.Open, FileAccess.Read)) { r = new BinaryReader(sr); pos = 0; length = (int)r.BaseStream.Length; // get the file lenght line = 0; while (pos < length) { srchTs++; temp = r.ReadBytes(SaxData.ByteLength(typeof(DATAFORMAT))); tmp = SaxData.Parse <DATAFORMAT>(temp); data = tmp.dl.GetTimeSeries(); for (int query = 0; query < tsList.Count; query++) // compute distance to each query { dist = Util.EuclideanDistance(data, tsList[query]); neighbors[query].Add(new IndexFileDist(f, line + 1, dist)); if (neighbors[query].Count > k) // { neighbors[query].DeleteMax(); } } counter.distance += tsList.Count; line++; pos = pos + SaxData.ByteLength(typeof(DATAFORMAT)); } r.Close(); sr.Close(); } } Console.WriteLine(); Console.WriteLine("{0} files {1} entries searched.", srchFiles, srchTs); List <IndexFileDist[]> result = new List <IndexFileDist[]>(tsList.Count); for (int l = 0; l < tsList.Count; l++) { result.Add(new IndexFileDist[k]); } for (int t = 0; t < tsList.Count; t++) { for (int i = 0; i < k; i++) { result[t][i] = neighbors[t].DeleteMin(); } } return(result); }
public void Insert(SaxData input) { string saxString = Sax.SaxDataRepToSaxStr(input, options.SaxOpts); if (splitDepth == 0 && flush == false) { if (!buffer.ContainsKey(saxString)) { buffer.Add(saxString, new List <SaxData>()); } buffer[saxString].Add(input); } else { if (index.ContainsKey(saxString)) { IndexEntry entry = index[saxString]; if (entry is TermEntry)// if terminal, then search path terminates here { TermEntry tentry = (TermEntry)entry; string oldFileName = tentry.FileName; if (SplitEntry(tentry) == false) // check bucket requires a split { tentry.InsertToBuffer(input); } else { List <SaxData> B = tentry.getbuffer(); if (B == null) { B = new List <SaxData>(); } DiskCost.increasesavedcost(B.Count); ushort[] newMask = this.options.MaskCopy; ushort[] newSaxString = Sax.SaxStrToSaxVals(saxString); string newName = ""; for (int i = 0; i < newMask.Length; i++) { newName = newName + newSaxString[i].ToString() + "." + newMask[i].ToString() + "_"; } newName = newName.Substring(0, newName.Length - 1); string[] files = Directory.GetFiles(WorkingFolder, string.Concat(newName, "*.txt")); //string[] files = Directory.GetFiles(tentry.FileName); if (tentry.OnDisk == true) { Assert.AreEqual(files.Length, 1); } else { Assert.AreEqual(files.Length, 0); } byte[] temp; int pos = 0; long length = -1; int bytesToRead = SaxData.ByteLength(typeof(DATAFORMAT)); foreach (string f in files) { using (BinaryReader br = new BinaryReader(new FileStream(f, FileMode.Open, FileAccess.Read))) { length = br.BaseStream.Length; if (length != 0) { DiskCost.increaserandomcost(); } if (Math.IEEERemainder(length, bytesToRead) != 0) { throw new ApplicationException("Math.IEEERemainder(br.BaseStream.Length, bytesToRead) != 0"); } while (pos < length) { temp = br.ReadBytes(bytesToRead); if (temp.Length != bytesToRead) { throw new ApplicationException("temp.Length != bytesToRead"); } B.Add(SaxData.Parse <DATAFORMAT>(temp)); DiskCost.increasereadcost(); pos += bytesToRead; } } File.Delete(f); } SplitEntry <DATAFORMAT> newSplit; if (Globals.NewSplitPolicy) { newSplit = new SplitEntry <DATAFORMAT>(saxString, UpdateOptions(B), (byte)(1 + splitDepth)); } else { newSplit = new SplitEntry <DATAFORMAT>(saxString, UpdateOptions(null), (byte)(1 + splitDepth)); } newSplit.Insert(input); foreach (SaxData S in B) { newSplit.Insert(S); } // update index entry from terminal to split index[saxString] = newSplit; } } else if (entry is SplitEntry <DATAFORMAT> ) // internal node { ((SplitEntry <DATAFORMAT>)entry).Insert(input); } } else // saxString has not been seen before, create new file and entry { ushort[] newMask = this.options.MaskCopy; ushort[] newSaxString = Sax.SaxStrToSaxVals(saxString); string newName = ""; for (int i = 0; i < newMask.Length; i++) { newName = newName + newSaxString[i].ToString() + "." + newMask[i].ToString() + "_"; } newName = newName.Substring(0, newName.Length - 1); string newfile = Path.Combine(WorkingFolder, string.Concat(newName, ".0.txt")); TermEntry newEntry = new TermEntry(saxString, newfile); newEntry.InsertToBuffer(input); index.Add(saxString, newEntry); } } }