public static void OutputIndex(Index <RawShapeletFormat> index, String parent) { int splitDepth = index.SplitDepth; foreach (IndexEntry i in index.GetIndexEntries()) { if (i is TermEntry) { TermEntry term = ((TermEntry)i); String saxWord = term.SaxWord; String iSaxWord = term.iSaxWord; String fileName = term.FileName; int numNodes = term.NumNodes; int numTimeSeries = term.NumTimeSeries; System.Console.WriteLine("SAX word: {0}, iSax word: {1}, file name: {2}, parent: {3}, split depth: {4}, num nodes: {5}, num TS: {6}", saxWord, iSaxWord, fileName, parent, splitDepth, numNodes, numTimeSeries); String[] line = { saxWord, iSaxWord, "TermEntry", "", fileName, parent, splitDepth.ToString(), "", numNodes.ToString(), numTimeSeries.ToString() }; output = output + String.Join(";", line) + "\n"; } else { SplitEntry <RawShapeletFormat> split = (SplitEntry <RawShapeletFormat>)i; String saxWord = split.SaxWord; String iSaxWord = split.iSaxWord; String baseDir = split.Options.BaseDir; String maskValue = split.Options.maskValue(); int numNodes = split.NumNodes; int numTimeSeries = split.NumTimeSeries; System.Console.WriteLine("SAX word: {0}, iSax word: {1}, dir: {2}, mask: {3}, split depth: {4}, num nodes: {5}, num TS: {6}", saxWord, iSaxWord, baseDir, maskValue, splitDepth, numNodes, numTimeSeries); String[] line = { saxWord, iSaxWord, "SplitEntry", baseDir, "", parent, splitDepth.ToString(), maskValue, numNodes.ToString(), numTimeSeries.ToString() }; output = output + String.Join(";", line) + "\n"; Index <RawShapeletFormat> splitIndex = split.GetIndex(); OutputIndex(splitIndex, saxWord); } } }
public CostCounter ExactSearch(double[] ts, out IndexFileDist bsf) { CostCounter meas = new CostCounter(0, 0); IntervalHeap <IndexEntryDist> pq = new IntervalHeap <IndexEntryDist>(NumIndexEntries); // approx search TermEntry approx = ApproximateSearch(ts); bsf = Index <DATAFORMAT> .MinFileEucDist(ts, approx.FileName); meas.IO++; meas.distance += approx.NumTimeSeries; // initalize pq with IndexEntries at root node foreach (IndexEntry e in index.Values) { pq.Add(new IndexEntryDist(e, Sax.MinDistPAAToiSAX( Sax.SaxStrToSaxVals(e.SaxWord), options.SaxOpts, ts))); } while (!pq.IsEmpty) { IndexEntryDist minInfo = pq.DeleteMin(); IndexEntry minEntry = minInfo.entry; if (minInfo.dist >= bsf.distance) { break; } if (minEntry is TermEntry) { IndexFileDist posMin = Index <DATAFORMAT> .MinFileEucDist(ts, ((TermEntry)minEntry).FileName); meas.IO++; meas.distance += minEntry.NumTimeSeries; // update bsf if (posMin.distance < bsf.distance) { bsf = posMin; } } else if (minEntry is SplitEntry <DATAFORMAT> ) { SplitEntry <DATAFORMAT> sEntry = minEntry as SplitEntry <DATAFORMAT>; foreach (IndexEntry e in sEntry.GetIndexEntries()) { pq.Add(new IndexEntryDist(e, Sax.MinDistPAAToiSAX( Sax.SaxStrToSaxVals(e.SaxWord), sEntry.Options.SaxOpts, ts))); } } } return(meas); }
public void Insert(SaxData input) { string saxString = Sax.SaxDataRepToSaxStr(input, options.SaxOpts); if (splitDepth == 0 && flush == false) { if (!buffer.ContainsKey(saxString)) { buffer.Add(saxString, new List <SaxData>()); } buffer[saxString].Add(input); } else { if (index.ContainsKey(saxString)) { IndexEntry entry = index[saxString]; if (entry is TermEntry)// if terminal, then search path terminates here { TermEntry tentry = (TermEntry)entry; string oldFileName = tentry.FileName; if (SplitEntry(tentry) == false) // check bucket requires a split { tentry.InsertToBuffer(input); } else { List <SaxData> B = tentry.getbuffer(); if (B == null) { B = new List <SaxData>(); } DiskCost.increasesavedcost(B.Count); ushort[] newMask = this.options.MaskCopy; ushort[] newSaxString = Sax.SaxStrToSaxVals(saxString); string newName = ""; for (int i = 0; i < newMask.Length; i++) { newName = newName + newSaxString[i].ToString() + "." + newMask[i].ToString() + "_"; } newName = newName.Substring(0, newName.Length - 1); string[] files = Directory.GetFiles(WorkingFolder, string.Concat(newName, "*.txt")); //string[] files = Directory.GetFiles(tentry.FileName); if (tentry.OnDisk == true) { Assert.AreEqual(files.Length, 1); } else { Assert.AreEqual(files.Length, 0); } byte[] temp; int pos = 0; long length = -1; int bytesToRead = SaxData.ByteLength(typeof(DATAFORMAT)); foreach (string f in files) { using (BinaryReader br = new BinaryReader(new FileStream(f, FileMode.Open, FileAccess.Read))) { length = br.BaseStream.Length; if (length != 0) { DiskCost.increaserandomcost(); } if (Math.IEEERemainder(length, bytesToRead) != 0) { throw new ApplicationException("Math.IEEERemainder(br.BaseStream.Length, bytesToRead) != 0"); } while (pos < length) { temp = br.ReadBytes(bytesToRead); if (temp.Length != bytesToRead) { throw new ApplicationException("temp.Length != bytesToRead"); } B.Add(SaxData.Parse <DATAFORMAT>(temp)); DiskCost.increasereadcost(); pos += bytesToRead; } } File.Delete(f); } SplitEntry <DATAFORMAT> newSplit; if (Globals.NewSplitPolicy) { newSplit = new SplitEntry <DATAFORMAT>(saxString, UpdateOptions(B), (byte)(1 + splitDepth)); } else { newSplit = new SplitEntry <DATAFORMAT>(saxString, UpdateOptions(null), (byte)(1 + splitDepth)); } newSplit.Insert(input); foreach (SaxData S in B) { newSplit.Insert(S); } // update index entry from terminal to split index[saxString] = newSplit; } } else if (entry is SplitEntry <DATAFORMAT> ) // internal node { ((SplitEntry <DATAFORMAT>)entry).Insert(input); } } else // saxString has not been seen before, create new file and entry { ushort[] newMask = this.options.MaskCopy; ushort[] newSaxString = Sax.SaxStrToSaxVals(saxString); string newName = ""; for (int i = 0; i < newMask.Length; i++) { newName = newName + newSaxString[i].ToString() + "." + newMask[i].ToString() + "_"; } newName = newName.Substring(0, newName.Length - 1); string newfile = Path.Combine(WorkingFolder, string.Concat(newName, ".0.txt")); TermEntry newEntry = new TermEntry(saxString, newfile); newEntry.InsertToBuffer(input); index.Add(saxString, newEntry); } } }