public List <DATAFORMAT> ReturnDataFormatFromTermEntry(TermEntry e) { if (e.NBuf != -1 || !e.OnDisk) { throw new ApplicationException("e.NBuf!= -1 || ~e.OnDisk"); } List <DATAFORMAT> tmp = new List <DATAFORMAT>(); using (BinaryReader br = new BinaryReader(new FileStream(e.FileName, FileMode.Open, FileAccess.Read))) { long length = br.BaseStream.Length; int bytesToRead = SaxData.ByteLength(typeof(DATAFORMAT)); if (Math.IEEERemainder(length, bytesToRead) != 0) { throw new ApplicationException("Math.IEEERemainder(br.BaseStream.Length, bytesToRead) != 0"); } int pos = 0; byte[] temp; while (pos < length) { temp = br.ReadBytes(bytesToRead); if (temp.Length != bytesToRead) { throw new ApplicationException("temp.Length != bytesToRead"); } tmp.Add((DATAFORMAT)SaxData.Parse <DATAFORMAT>(temp).dl); pos += bytesToRead; } } return(tmp); }
public static void OutputIndex(Index <RawShapeletFormat> index, String parent) { int splitDepth = index.SplitDepth; foreach (IndexEntry i in index.GetIndexEntries()) { if (i is TermEntry) { TermEntry term = ((TermEntry)i); String saxWord = term.SaxWord; String iSaxWord = term.iSaxWord; String fileName = term.FileName; int numNodes = term.NumNodes; int numTimeSeries = term.NumTimeSeries; System.Console.WriteLine("SAX word: {0}, iSax word: {1}, file name: {2}, parent: {3}, split depth: {4}, num nodes: {5}, num TS: {6}", saxWord, iSaxWord, fileName, parent, splitDepth, numNodes, numTimeSeries); String[] line = { saxWord, iSaxWord, "TermEntry", "", fileName, parent, splitDepth.ToString(), "", numNodes.ToString(), numTimeSeries.ToString() }; output = output + String.Join(";", line) + "\n"; } else { SplitEntry <RawShapeletFormat> split = (SplitEntry <RawShapeletFormat>)i; String saxWord = split.SaxWord; String iSaxWord = split.iSaxWord; String baseDir = split.Options.BaseDir; String maskValue = split.Options.maskValue(); int numNodes = split.NumNodes; int numTimeSeries = split.NumTimeSeries; System.Console.WriteLine("SAX word: {0}, iSax word: {1}, dir: {2}, mask: {3}, split depth: {4}, num nodes: {5}, num TS: {6}", saxWord, iSaxWord, baseDir, maskValue, splitDepth, numNodes, numTimeSeries); String[] line = { saxWord, iSaxWord, "SplitEntry", baseDir, "", parent, splitDepth.ToString(), maskValue, numNodes.ToString(), numTimeSeries.ToString() }; output = output + String.Join(";", line) + "\n"; Index <RawShapeletFormat> splitIndex = split.GetIndex(); OutputIndex(splitIndex, saxWord); } } }
public static void TinyImagesExperiment() { Index <Meta1DataFormat> si = Index <Meta1DataFormat> .Load(Globals.IndexRootDir); string queryFile = @"F:\Exp\TinyImages_256Len_8Word_2KThreshold\_queries\queries.txt"; List <double[]> queries = Util.ReadFiletoDoubleList(queryFile, false); for (int i = 0; i < queries.Count; ++i) { queries[i] = Util.NormalizationHandler(Util.DownSample(queries[i], TinyImagesDataLoader.DOWNSAMPLERATE)); if (queries[i].Length != Globals.TimeSeriesLength) { throw new ApplicationException("queries[i].Length != Globals.TimeSeriesLength"); } TermEntry res = si.ApproximateSearch(queries[i]); Console.WriteLine("Query:{0} FileName:{1}", i, res.FileName); List <Meta1DataFormat> metas = si.ReturnDataFormatFromTermEntry(res); double bsf = Double.MaxValue; Meta1DataFormat bsfMeta = new Meta1DataFormat(); foreach (Meta1DataFormat m in metas) { double dist = Util.EuclideanDistance(m.GetTimeSeries(), queries[i]); if (dist < bsf) { bsf = dist; bsfMeta = m; } } Console.WriteLine("BsfDist:{0} LocMeta:{1}", bsf, bsfMeta.meta); } }
public TermEntry(string saxWord, string fileName) : this(fileName) // debug { if (saxWord != TermEntry.FileNameParseSaxStr(fileName)) { throw new ApplicationException("TermEntry inconsistency saxWord does not match fileName."); } }
public static int CreateNewBuffer(SaxData entry, TermEntry node) { Buf B = new Buf(); B.Initialization(); B.setnode(node); B.InsertInBuffer(entry); // Insert the Time series in the new Buffer TBuffer.Add(B); return(TBuffer.Count - 1); }
public CostCounter ExactSearch(double[] ts, out IndexFileDist bsf) { CostCounter meas = new CostCounter(0, 0); IntervalHeap <IndexEntryDist> pq = new IntervalHeap <IndexEntryDist>(NumIndexEntries); // approx search TermEntry approx = ApproximateSearch(ts); bsf = Index <DATAFORMAT> .MinFileEucDist(ts, approx.FileName); meas.IO++; meas.distance += approx.NumTimeSeries; // initalize pq with IndexEntries at root node foreach (IndexEntry e in index.Values) { pq.Add(new IndexEntryDist(e, Sax.MinDistPAAToiSAX( Sax.SaxStrToSaxVals(e.SaxWord), options.SaxOpts, ts))); } while (!pq.IsEmpty) { IndexEntryDist minInfo = pq.DeleteMin(); IndexEntry minEntry = minInfo.entry; if (minInfo.dist >= bsf.distance) { break; } if (minEntry is TermEntry) { IndexFileDist posMin = Index <DATAFORMAT> .MinFileEucDist(ts, ((TermEntry)minEntry).FileName); meas.IO++; meas.distance += minEntry.NumTimeSeries; // update bsf if (posMin.distance < bsf.distance) { bsf = posMin; } } else if (minEntry is SplitEntry <DATAFORMAT> ) { SplitEntry <DATAFORMAT> sEntry = minEntry as SplitEntry <DATAFORMAT>; foreach (IndexEntry e in sEntry.GetIndexEntries()) { pq.Add(new IndexEntryDist(e, Sax.MinDistPAAToiSAX( Sax.SaxStrToSaxVals(e.SaxWord), sEntry.Options.SaxOpts, ts))); } } } return(meas); }
private bool SplitEntry(TermEntry entry) { int numEntries = ((TermEntry)entry).NumTimeSeries + 1; if (numEntries > Globals.IndexNumMaxEntries) { return(true); } else { return(false); } }
public static void InsectExperiment() { Index <Meta3DataFormat> si = Index <Meta3DataFormat> .Load(Globals.IndexRootDir); string queryFile = @"C:\Temp\insect\queries.txt"; List <double[]> queries = Util.ReadFiletoDoubleList(queryFile, true); using (StreamWriter sw = new StreamWriter(@"C:\Temp\insect\output.txt")) { for (int i = 0; i < queries.Count; ++i) { if (queries[i].Length != Globals.TimeSeriesLength) { throw new ApplicationException("queries[i].Length != Globals.TimeSeriesLength"); } TermEntry res = si.ApproximateSearch(queries[i]); Console.WriteLine("Query:{0} FileName:{1}", i, res.FileName); List <Meta3DataFormat> metas = si.ReturnDataFormatFromTermEntry(res); double bsf = Double.MaxValue; Meta3DataFormat bsfMeta = new Meta3DataFormat(); foreach (Meta3DataFormat m in metas) { double dist = Util.EuclideanDistance(m.GetTimeSeries(), queries[i]); if (dist < bsf) { bsf = dist; bsfMeta = m; } } Console.WriteLine("BsfDist:{0} Meta1:{1} Meta2:{2}", bsf, bsfMeta.meta1, bsfMeta.meta2); sw.WriteLine(Util.ArrayToString(queries[i])); sw.WriteLine(Util.ArrayToString(bsfMeta.GetTimeSeries())); } } }
public void Insert(SaxData input) { string saxString = Sax.SaxDataRepToSaxStr(input, options.SaxOpts); if (splitDepth == 0 && flush == false) { if (!buffer.ContainsKey(saxString)) { buffer.Add(saxString, new List <SaxData>()); } buffer[saxString].Add(input); } else { if (index.ContainsKey(saxString)) { IndexEntry entry = index[saxString]; if (entry is TermEntry)// if terminal, then search path terminates here { TermEntry tentry = (TermEntry)entry; string oldFileName = tentry.FileName; if (SplitEntry(tentry) == false) // check bucket requires a split { tentry.InsertToBuffer(input); } else { List <SaxData> B = tentry.getbuffer(); if (B == null) { B = new List <SaxData>(); } DiskCost.increasesavedcost(B.Count); ushort[] newMask = this.options.MaskCopy; ushort[] newSaxString = Sax.SaxStrToSaxVals(saxString); string newName = ""; for (int i = 0; i < newMask.Length; i++) { newName = newName + newSaxString[i].ToString() + "." + newMask[i].ToString() + "_"; } newName = newName.Substring(0, newName.Length - 1); string[] files = Directory.GetFiles(WorkingFolder, string.Concat(newName, "*.txt")); //string[] files = Directory.GetFiles(tentry.FileName); if (tentry.OnDisk == true) { Assert.AreEqual(files.Length, 1); } else { Assert.AreEqual(files.Length, 0); } byte[] temp; int pos = 0; long length = -1; int bytesToRead = SaxData.ByteLength(typeof(DATAFORMAT)); foreach (string f in files) { using (BinaryReader br = new BinaryReader(new FileStream(f, FileMode.Open, FileAccess.Read))) { length = br.BaseStream.Length; if (length != 0) { DiskCost.increaserandomcost(); } if (Math.IEEERemainder(length, bytesToRead) != 0) { throw new ApplicationException("Math.IEEERemainder(br.BaseStream.Length, bytesToRead) != 0"); } while (pos < length) { temp = br.ReadBytes(bytesToRead); if (temp.Length != bytesToRead) { throw new ApplicationException("temp.Length != bytesToRead"); } B.Add(SaxData.Parse <DATAFORMAT>(temp)); DiskCost.increasereadcost(); pos += bytesToRead; } } File.Delete(f); } SplitEntry <DATAFORMAT> newSplit; if (Globals.NewSplitPolicy) { newSplit = new SplitEntry <DATAFORMAT>(saxString, UpdateOptions(B), (byte)(1 + splitDepth)); } else { newSplit = new SplitEntry <DATAFORMAT>(saxString, UpdateOptions(null), (byte)(1 + splitDepth)); } newSplit.Insert(input); foreach (SaxData S in B) { newSplit.Insert(S); } // update index entry from terminal to split index[saxString] = newSplit; } } else if (entry is SplitEntry <DATAFORMAT> ) // internal node { ((SplitEntry <DATAFORMAT>)entry).Insert(input); } } else // saxString has not been seen before, create new file and entry { ushort[] newMask = this.options.MaskCopy; ushort[] newSaxString = Sax.SaxStrToSaxVals(saxString); string newName = ""; for (int i = 0; i < newMask.Length; i++) { newName = newName + newSaxString[i].ToString() + "." + newMask[i].ToString() + "_"; } newName = newName.Substring(0, newName.Length - 1); string newfile = Path.Combine(WorkingFolder, string.Concat(newName, ".0.txt")); TermEntry newEntry = new TermEntry(saxString, newfile); newEntry.InsertToBuffer(input); index.Add(saxString, newEntry); } } }
public void setnode(TermEntry node) { Node = node; }
public List <SaxData> BL; // List of TS in the buffer public void Initialization() { BL = new List <SaxData>(); Utilization = 0; Node = null; }
public static void SearchQualityExperiment() { DateTime startTime = DateTime.Now; // index construction Index <RawDataFormat> si = new Index <RawDataFormat>(0, new IndexOptions("root")); DataLoader dl = new GeneratedRawDataLoader(si, Globals.TimeSeriesLength, NUM_TIMESERIES, SEED); InsertTimeSeries(dl); Console.WriteLine(); Console.WriteLine("Sequential Disk Accesses: " + DiskCost.seqcost); Console.WriteLine("Random Disk Accesses: " + DiskCost.rancost); Console.WriteLine("Read Disk Accesses: " + DiskCost.readcost); Console.WriteLine("Saved cost in buffer: " + DiskCost.savedcost); Console.WriteLine(); Index <RawDataFormat> .Save(Globals.IndexRootDir, si); Index <RawDataFormat> si2 = Index <RawDataFormat> .Load(Globals.IndexRootDir); DateTime endConstructionTime = DateTime.Now; Console.WriteLine("Index Construction Time: {0}", endConstructionTime - startTime); // avg over queries const int NUM_QUERIES = 100; List <double[]> queries = new List <double[]>(NUM_QUERIES); for (int i = 0; i < NUM_QUERIES; i++) { queries.Add(Util.RandomWalk(Globals.TimeSeriesLength)); } // measured metrics double approxSearchDist = 0; double approxSearchNodeDist = 0; double approxSearchNodeSize = 0; CostCounter exactSearchCosts = new CostCounter(); for (int i = 0; i < queries.Count; ++i) { // exact search IndexFileDist eRes; exactSearchCosts += si.ExactSearch(queries[i], out eRes); // approximate search TermEntry approxNode = si.ApproximateSearch(queries[i]); double mDist = double.MaxValue; List <RawDataFormat> nodeEntries = si.ReturnDataFormatFromTermEntry(approxNode); double sumDists = 0; foreach (RawDataFormat rd in nodeEntries) { double dist = Util.EuclideanDistance(queries[i], rd.GetTimeSeries()); sumDists += dist; if (dist < mDist) { mDist = dist; } } approxSearchDist += mDist; approxSearchNodeDist += sumDists / nodeEntries.Count; approxSearchNodeSize += nodeEntries.Count; } approxSearchDist /= queries.Count; approxSearchNodeDist /= queries.Count; approxSearchNodeSize /= queries.Count; using (StreamWriter sw = new StreamWriter(Path.Combine(ROOT_DIR, "searchQuality.txt"))) { string baseFormat = string.Format("{0}:NumTs_{1}:Th_{2}:Wl_{3}:NewPolicy", NUM_TIMESERIES, Globals.IndexNumMaxEntries, Globals.TimeSeriesLength, Globals.NewSplitPolicy); sw.WriteLine(baseFormat); sw.WriteLine("ExactSearchNumIO {0}", exactSearchCosts.IO / (double)queries.Count); sw.WriteLine("ExactSearchNumCalcuations {0}", exactSearchCosts.distance / (double)queries.Count); sw.WriteLine("ApproxSearchDistance {0}", approxSearchDist); sw.WriteLine("ApproxSearchAverageNodeDistance {0}", approxSearchNodeDist); sw.WriteLine("ApproxSearchAverageNodeSize {0}", approxSearchNodeSize); sw.WriteLine("ValidationString "); foreach (double[] query in queries) { sw.Write("{0} ", query[1]); } sw.WriteLine(); } }
public static void DnaExperiment() { Util.NormalizationHandler = new Util.Normalize(Util.MeanZero_Normalization); // in-memory data, referenced by the index const string DATAFOLDER = @"K:\Datasets\DNA\Dna2Ts\Monkey_Binary"; // load index Index <Meta2DataFormat> si = Index <Meta2DataFormat> .Load(Globals.IndexRootDir); // populate in-memory data DnaDataLoader.LoadDnaToMetaBuffer(DATAFOLDER); // generate queries DateTime queryStart = DateTime.Now; int numQueries = 0; string[] humanChrs = Directory.GetFiles(@"K:\Datasets\DNA\Dna2Ts\Human_Binary", "*.dat"); Array.Sort(humanChrs, new NaturalStringComparer()); Dictionary <string, DnaChrResult> queryResult = new Dictionary <string, DnaChrResult>(humanChrs.Length); for (int chrNo = 0; chrNo < humanChrs.Length; ++chrNo) { string chrFile = humanChrs[chrNo]; GC.Collect(); using (BinaryReader br = new BinaryReader(new FileStream(chrFile, FileMode.Open, FileAccess.Read))) { List <DnaSearchResult> qResults = new List <DnaSearchResult>(); // List<Meta2DataFormat> _queryApproxRes = new List<Meta2DataFormat>(); // List<double> _dists = new List<double>(); // List<int> _queryPos = new List<int>(); long fileLength = br.BaseStream.Length / sizeof(int); int posShift = Globals.TimeSeriesLength / 4; // shift by quarters double[] dnaChr = new double[(int)Math.Floor((fileLength / sizeof(int)) / (double)DnaDataLoader.SAMPLERATE)]; Console.WriteLine("F:{0} OrigLen:{1} newLen:{2} Shift:{3}", chrFile, fileLength, dnaChr.Length, posShift); // downsample int count = 0; double sum = 0; for (int i = 0; i < dnaChr.Length; ++i) { sum = 0; count = 0; while (count < DnaDataLoader.SAMPLERATE) { sum += br.ReadInt32(); count++; } dnaChr[i] = sum / DnaDataLoader.SAMPLERATE; } double[] ts = new double[Globals.TimeSeriesLength]; for (int pos = 0; pos < dnaChr.Length - Globals.TimeSeriesLength; pos += posShift) { numQueries += 2; Array.Copy(dnaChr, pos, ts, 0, Globals.TimeSeriesLength); double mean = Util.Mean(ts, 0, ts.Length - 1); for (int k = 0; k < ts.Length; ++k) { ts[k] = ts[k] - mean; } TermEntry tEntry = si.ApproximateSearch(ts); List <Meta2DataFormat> termNodeEntries = si.ReturnDataFormatFromTermEntry(tEntry); double bsfDist = Double.MaxValue; Meta2DataFormat bsfMeta = new Meta2DataFormat(); foreach (Meta2DataFormat m in termNodeEntries) { double dist = Util.EuclideanDistance(Util.NormalizationHandler(m.GetTimeSeries()), ts); if (dist < bsfDist) { bsfDist = dist; bsfMeta = m; } } qResults.Add(new DnaSearchResult() { dist = bsfDist, matchingChr = bsfMeta._chrNo, matchingPos = bsfMeta._pos, queryChr = chrNo, queryPos = pos, }); // reverse ts = ts.Reverse().ToArray(); tEntry = si.ApproximateSearch(ts); termNodeEntries = si.ReturnDataFormatFromTermEntry(tEntry); bsfDist = Double.MaxValue; bsfMeta = new Meta2DataFormat(); foreach (Meta2DataFormat m in termNodeEntries) { double dist = Util.EuclideanDistance(Util.NormalizationHandler(m.GetTimeSeries()), ts); if (dist < bsfDist) { bsfDist = dist; bsfMeta = m; } } qResults.Add(new DnaSearchResult() { dist = bsfDist, matchingChr = bsfMeta._chrNo, matchingPos = bsfMeta._pos, queryChr = chrNo, queryPos = pos, }); } queryResult.Add(chrFile, new DnaChrResult() { results = qResults }); } } DateTime queryStop = DateTime.Now; Console.WriteLine("{0} Queries, {1} TimeElapsed.", numQueries, queryStop - queryStart); //// print results using (StreamWriter sw = new StreamWriter(Path.Combine(Globals.IndexRootDir, "queryOutput.txt"))) { foreach (KeyValuePair <string, DnaChrResult> kvp in queryResult) { // Console.WriteLine("HumanChromosome:{0}", kvp.Key); // Console.WriteLine("AverageDistance:{0}", kvp.Value.AverageDistance); // Console.WriteLine(); foreach (DnaSearchResult sr in kvp.Value.results) { sw.WriteLine(sr.ToString()); } } } //using (StreamWriter sw = new StreamWriter(Path.Combine(Globals.IndexRootDir, "queryOutputTop.txt"))) //{ // foreach (KeyValuePair<string, DnaChrResult> kvp in queryResult) // { // // Console.WriteLine("HumanChromosome:{0}", kvp.Key); // // Console.WriteLine("AverageDistance:{0}", kvp.Value.AverageDistance); // // Console.WriteLine(); // List<DnaSearchResult>sr = kvp.Value.results; // sr.Sort(); // sr = sr.GetRange(0, 10); // Console.WriteLine("For Human Chr:{0}", kvp.Key); // var counts = from q in sr // group q by q.matchingChr into g // select new { Chr = g.Key, NumHits = g.Count() }; // foreach (var v in counts) // Console.WriteLine("{0} : {1}", v.Chr, v.NumHits); // } // // //{ // // // for (int i = 0; i < kvp.Value.queryTs.Count; ++i) // // // { // // // sw.WriteLine(Util.ArrayToString(kvp.Value.queryTs[i])); // // // sw.WriteLine(Util.ArrayToString(Util.NormalizationHandler(kvp.Value.queryApproxRes[i].GetTimeSeries()))); // // // } // // // //foreach (double[] d in kvp.Value.queryTs) // // // // sw.WriteLine(Util.ArrayToString(d)); //} }