public BermudaResult GetData(string domain, string query, string mapreduce, string merge, string paging, int remdepth, string command, string cursor, string paging2) { BermudaResult result = null; BermudaNodeStatistic metadata = null; Stopwatch sw = new Stopwatch(); sw.Start(); try { result = BermudaMapReduce.Instance.GetData(domain, query, mapreduce, merge, paging, remdepth, command, cursor, paging2); metadata = result.Metadata; } catch (BermudaException ex) { result = new BermudaResult { Error = ex.Message }; } catch (Exception ex) { result = new BermudaResult { Error = ex.ToString() }; } sw.Stop(); if (metadata == null) { metadata = new BermudaNodeStatistic(); } metadata.OperationTime = sw.Elapsed; result.Metadata = metadata; return(result); }
public BermudaResult GetData(string domain, string query, string mapreduce, string merge, string paging, int remdepth, string command, string cursor, string paging2) { BermudaResult result = null; BermudaNodeStatistic metadata = null; Stopwatch sw = new Stopwatch(); sw.Start(); try { result = BermudaMapReduce.Instance.GetData(domain, query, mapreduce, merge, paging, remdepth, command, cursor, paging2); metadata = result.Metadata; } catch (BermudaException ex) { result = new BermudaResult { Error = ex.Message }; } catch (Exception ex) { result = new BermudaResult { Error = ex.ToString() }; } sw.Stop(); if (metadata == null) metadata = new BermudaNodeStatistic(); metadata.OperationTime = sw.Elapsed; result.Metadata = metadata; return result; }
public BermudaResult GetData(string domain, string query, string mapreduce, string merge, string paging, string command, string cursor, string paging2) { //return new BermudaDatapointResult { Datapoints = new List<Entities.Datapoint>(), Metadata = new Entities.BermudaNodeStatistic() }; Stopwatch sw = new Stopwatch(); BermudaResult result = null; BermudaNodeStatistic metadata = null; sw.Start(); try { result = BermudaMapReduce.Instance.GetData(domain, query, mapreduce, merge, paging, 1, command, cursor, paging2); metadata = result.Metadata; } catch (BermudaException ex) { result = new BermudaResult { Error = ex.Message }; } catch (Exception ex) { result = new BermudaResult { Error = ex.ToString() }; } sw.Stop(); if (metadata == null) metadata = new BermudaNodeStatistic(); metadata.LinqExecutionTime = sw.Elapsed; result.Metadata = metadata; return result; }
public BermudaResult GetData(string domain, string query, string mapreduce, string merge, string paging, string command, string cursor, string paging2) { //return new BermudaDatapointResult { Datapoints = new List<Entities.Datapoint>(), Metadata = new Entities.BermudaNodeStatistic() }; Stopwatch sw = new Stopwatch(); BermudaResult result = null; BermudaNodeStatistic metadata = null; sw.Start(); try { result = BermudaMapReduce.Instance.GetData(domain, query, mapreduce, merge, paging, 1, command, cursor, paging2); metadata = result.Metadata; } catch (BermudaException ex) { result = new BermudaResult { Error = ex.Message }; } catch (Exception ex) { result = new BermudaResult { Error = ex.ToString() }; } sw.Stop(); if (metadata == null) { metadata = new BermudaNodeStatistic(); } metadata.LinqExecutionTime = sw.Elapsed; result.Metadata = metadata; return(result); }
private BermudaResult GetCursorData(string paging2, BermudaResult cachedDatapoints, Type dataType) { var pagingFunc2 = GetPagingFunc(paging2, dataType); if (pagingFunc2 != null) { var mergeInvokeMethod = pagingFunc2.GetType().GetMethod("Invoke"); var allItems = mergeInvokeMethod.Invoke(pagingFunc2, new object[] { cachedDatapoints.OriginalData }); var arraylol2 = ToArrayCollection(allItems, dataType); var json2 = JsonConvert.SerializeObject(arraylol2); return(new BermudaResult { DataType = cachedDatapoints.DataType, Data = json2, Metadata = new BermudaNodeStatistic { Notes = "Cursor_Hit_1" }, CacheKey = cachedDatapoints.CacheKey }); } else { throw new BermudaException("Cursor Paging Function required"); } }
public BermudaResult GetData(string domain, string query, string mapreduce, string merge, string paging, int remdepth, string command, string cursor, string paging2) { var args = ParseCommand(command); bool noCache = args.Contains("-nocache"); bool makeCursor = cursor == MakeCursorToken; bool useCursor = !makeCursor && !string.IsNullOrWhiteSpace(cursor); DateTime minDate = DateTime.MinValue; DateTime maxDate = DateTime.MaxValue; if (remdepth > 0) { //map var queryHash = cursor ?? GetQueryHash(domain, query, mapreduce, merge, paging, null); //reduce BermudaResult cachedDatapoints; if (!noCache && CachedData.TryGetValue(queryHash, out cachedDatapoints) && (DateTime.Now.Ticks - cachedDatapoints.CreatedOn) < CacheLifetime) { #if DEBUG if (CacheTraceMessageLevel < 3) Trace.WriteLine("returned CACHED BLOBS DATAPOINTS results FOR ENTIRE BLOB SET [REMDEPTH:" + remdepth + "]"); #endif if (useCursor) { var dataType = LinqRuntimeTypeBuilder.GetTypeFromTypeKey(cachedDatapoints.DataType); return GetCursorData(paging2, cachedDatapoints, dataType); } else { return new BermudaResult { DataType = cachedDatapoints.DataType, Data = cachedDatapoints.Data, Metadata = new BermudaNodeStatistic { Notes = "Cache_Hit_1" }, CacheKey = cachedDatapoints.CacheKey }; } } else { if (useCursor) throw new Exception("Cursor " + cursor + " not found"); //var assignments = PartitionBlobs(domain, blobInterfaces, minDate, maxDate, false, true); var reducers = HostEnvironment.Instance.GetAvailablePeerConnections(); if (!reducers.Any()) throw new Exception("Specified dataset not loaded: " + domain); ConcurrentDictionary<PeerInfo, BermudaResult> results = new ConcurrentDictionary<PeerInfo, BermudaResult>(); Stopwatch sw = new Stopwatch(); sw.Start(); List<Task> tasks = new List<Task>(); foreach (var reducer in reducers) { Task t = new Task((peerObj) => { var peerInfo = peerObj as PeerInfo; var initiated = DateTime.Now; var subqueryHash = GetQueryHash(domain, query, mapreduce, merge, paging, peerInfo.ToString()); Stopwatch sw3 = new Stopwatch(); sw3.Start(); //see if the cache contains a matching result and return it if it's not outdated BermudaResult cachedDatapoints2; if (!noCache && CachedData.TryGetValue(subqueryHash, out cachedDatapoints2) && (DateTime.Now.Ticks - cachedDatapoints2.CreatedOn) < CacheLifetime) { if (CacheTraceMessageLevel < 2) Trace.WriteLine("returned CACHED BLOB DATAPOINT results FOR BLOB SUBSET [REMDEPTH:" + remdepth + "]"); BermudaResult res = null; if (useCursor) { var dataType2 = LinqRuntimeTypeBuilder.GetTypeFromTypeKey(cachedDatapoints2.DataType); res = GetCursorData(paging2, cachedDatapoints2, dataType2); } else { res = new BermudaResult { DataType = cachedDatapoints2.DataType, Data = cachedDatapoints2.Data, Metadata = new BermudaNodeStatistic { Notes = "Cache_Hit_2" } }; } results[peerInfo] = res; } else { try { Stopwatch sw2 = new Stopwatch(); sw2.Start(); BermudaResult subresult = null; if (peerInfo.Equals(Endpoint)) { subresult = GetData(domain, query, mapreduce, merge, paging, remdepth - 1, command, cursor, paging2); } else { using (var client = HostEnvironment.GetServiceClient(peerInfo)) { subresult = client.GetData(domain, query, mapreduce, merge, paging, remdepth - 1, command, cursor, paging2); } //subresult = GetDataFromPeer(domain, query, mapreduce, merge, minDate, maxDate, remdepth - 1, command, assignment.PeerEndpoint.Endpoint); } sw2.Stop(); subresult.CreatedOn = DateTime.Now.Ticks; subresult.Metadata.Initiated = initiated; subresult.Metadata.Completed = DateTime.Now; subresult.Metadata.OperationTime = sw2.Elapsed; results[peerInfo] = CachedData[subqueryHash] = subresult; } catch (Exception ex) { results[peerInfo] = new BermudaResult { Error = "[Failed Node] " + ex }; } } }, reducer, TaskCreationOptions.LongRunning); tasks.Add(t); t.Start(); } Task.WaitAll(tasks.ToArray()); sw.Stop(); #if DEBUG Trace.WriteLine("Join Time:" + sw.Elapsed); #endif if (results.Any(x => x.Value.Error != null)) throw new BermudaException("Some nodes failed:\r\n" + string.Join("\r\n", results.Select(x => x.Value.Error))); if (results.All(x => x.Value.Data == null)) return new BermudaResult { Metadata = new BermudaNodeStatistic { Notes = "No Data" } }; //if all results are not the same time throw an error if (results.GroupBy(x => x.Value.DataType).Count() > 1) throw new BermudaException("Subresults must all return the same type"); var dataTypeDescriptor = results.Select(x => x.Value.DataType).FirstOrDefault(x => x != null); if (dataTypeDescriptor == null) return new BermudaResult { Error = "Could not determine the merge type, none of the nodes provided type info" }; //use the passed combine espression to make multiple datapoint sets into one var dataType = LinqRuntimeTypeBuilder.GetTypeFromTypeKey(dataTypeDescriptor); //allItems = results.Values.SelectMany(x => x.DataObject) var totalJson = "[" + string.Join(",", results.Values.Where(x => !string.IsNullOrWhiteSpace(x.Data)).Select(x => x.Data.Trim('[', ']')).Where(x => !string.IsNullOrWhiteSpace(x))) + "]"; var allItems = LinqRuntimeTypeBuilder.DeserializeJson(totalJson, dataTypeDescriptor, true); //var aaa = new JavaScriptSerializer().Deserialize<Datapoint[]>(totalJson); //var ggc = aaa.GroupBy(x => new { x.Id, x.Id2 }).Count(); //InvokeSelectManyViaReflectionTheKilla(results.Values.Select(x => x.DataObject), dataType); var mergeFunc = GetMergeFunc(merge, mapreduce, dataType, dataType); if (mergeFunc != null) { //var dataType = "kdsajkdsa"; var mergeInvokeMethod = mergeFunc.GetType().GetMethod("Invoke"); allItems = mergeInvokeMethod.Invoke(mergeFunc, new object[] { allItems }); // MergeDatapoints(results.Values.Where(x => x.Data != null).SelectMany(x => x.Data), mergeFunc); } var pagingFunc = GetPagingFunc(paging, dataType); if (pagingFunc != null) { var pagingInvokeMethod = pagingFunc.GetType().GetMethod("Invoke"); allItems = pagingInvokeMethod.Invoke(pagingFunc, new object[] { allItems }); } //figure out the metadata var finalMetadata = new BermudaNodeStatistic { Notes = "Merged Datapoints in " + sw.Elapsed, NodeId = HostEnvironment.Instance.CurrentInstanceId, ChildNodes = results.Values.Select(x => x.Metadata).ToArray() }; var arraylol = ToArrayCollection(allItems, dataType); var json = JsonConvert.SerializeObject(arraylol); //var json = JsonConvert.SerializeObject(allItems); var originalData = makeCursor ? arraylol : null; var finalResult = new BermudaResult { DataType = dataTypeDescriptor, OriginalData = originalData, Data = json, CreatedOn = DateTime.Now.Ticks, Metadata = finalMetadata, CacheKey = queryHash }; CachedData[queryHash] = finalResult; return finalResult; } } else { ConcurrentDictionary<string, BermudaResult> results = new ConcurrentDictionary<string, BermudaResult>(); BermudaNodeStatistic stats = new BermudaNodeStatistic(); var bucketInterfaces = HostEnvironment.Instance.GetBucketInterfacesForDomain(domain); if (!bucketInterfaces.Any()) throw new BermudaException("Data not loaded for: " + domain); if (bucketInterfaces.Count() > 1) throw new BermudaException("Multiple buckets not supported by BermudaMapReduce"); var queryHash = GetQueryHash(domain, query, mapreduce, merge, paging, Endpoint.ToString()); BermudaResult cachedDatapoints; if (!noCache && CachedData.TryGetValue(queryHash, out cachedDatapoints) && (DateTime.Now.Ticks - cachedDatapoints.CreatedOn) < CacheLifetime) { if (CacheTraceMessageLevel < 2) Trace.WriteLine("returned CACHED BLOB SET DATAPOINT results [REMDEPTH:" + remdepth + "]"); if (useCursor) { var dataType = LinqRuntimeTypeBuilder.GetTypeFromTypeKey(cachedDatapoints.DataType); return GetCursorData(paging2, cachedDatapoints, dataType); } else { return new BermudaResult { DataType = cachedDatapoints.DataType, Data = cachedDatapoints.Data, Metadata = new BermudaNodeStatistic { Notes = "Cache_Hit_3" }, CacheKey = queryHash }; } } else { //Chad: short circuiting to test WCF response time in Azure //return new DatapointResult() { Datapoints = new List<Datapoint>(), CreatedOn = DateTime.Now.Ticks, Metadata = new BermudaNodeStatistic() }; //IEnumerable<Datapoint> datapoints = null; object datapoints = null; Stopwatch sw = new Stopwatch(); sw.Start(); Type itemType = null; Type resultType = null; string json = null; foreach (var bucketInterface in bucketInterfaces) { var bucketKey = GetQueryHash(domain, query, mapreduce, merge, paging, Endpoint.ToString()); //see if the cache contains a matching result and return it if it's not outdated BermudaResult cachedDatapoints2; if (!noCache && CachedData.TryGetValue(bucketKey, out cachedDatapoints2) && (DateTime.Now.Ticks - cachedDatapoints2.CreatedOn) < CacheLifetime) { if (CacheTraceMessageLevel < 1) Trace.WriteLine("returned CACHED BLOB DATAPOINT results [REMDEPTH:" + remdepth + "]"); if (useCursor) { if (cachedDatapoints2.OriginalData == null) throw new Exception("Cursor " + cursor + " contains null data"); var dataType = LinqRuntimeTypeBuilder.GetTypeFromTypeKey(cachedDatapoints2.DataType); results[bucketInterface.Name] = GetCursorData(paging2, cachedDatapoints2, dataType); } else { results[bucketInterface.Name] = new BermudaResult { DataType = cachedDatapoints2.DataType, Data = cachedDatapoints2.Data, Metadata = new BermudaNodeStatistic { Notes = "Cache_Hit_4" } }; json = cachedDatapoints2.Data; } } else { //get mentions var collections = GetCollections(query, mapreduce); if (collections.Count() > 1) throw new BermudaException("More than one collection specified: " + string.Join(",", collections)); var table = collections.FirstOrDefault(); var tableName = table == null ? null : table.Source; var raw = bucketInterface.GetData(tableName); //var rawType = raw.GetType(); //itemType = ReduceExpressionGeneration.GetTypeOfEnumerable(rawType); itemType = bucketInterface.GetDataType(tableName); var mapreduceFunc = GetMapReduceFunc(mapreduce, itemType, out resultType); var queryFunc = GetFilterFunc(query, itemType); var pagingFunc = GetPagingFunc(paging, resultType); var minDateTicks = minDate.Ticks; var maxDateTicks = maxDate.Ticks; object subresult = raw; //queryFunc == null ? // raw.AsParallel() : //minDate == DateTime.MinValue && maxDate == DateTime.MaxValue ? // raw.AsParallel().Where(x => queryFunc) : // raw.AsParallel().Where(x => x.OccurredOnTicks >= minDateTicks && x.OccurredOnTicks <= maxDateTicks && queryFunc(x, parameters)); if (json == null) { if (queryFunc != null) { var queryFuncInvoke = queryFunc.GetType().GetMethod("Invoke"); subresult = queryFuncInvoke.Invoke(queryFunc, new object[] { subresult }); } //reduce them using the passed expression if (mapreduceFunc != null) { var mapReduceFuncInvoke = mapreduceFunc.GetType().GetMethod("Invoke"); subresult = mapReduceFuncInvoke.Invoke(mapreduceFunc, new object[] { subresult }); } if (pagingFunc != null) { var pagingInvokeMethod = pagingFunc.GetType().GetMethod("Invoke"); subresult = pagingInvokeMethod.Invoke(pagingFunc, new object[] { subresult }); } datapoints = subresult; } //format a metada string if (!args.Contains("-nocount")) { stats.TotalItems = bucketInterface.GetCount(tableName); //stats.FilteredItems = filtered.Count(); //stats.ReducedItems = subresult.Count(); } //cache the result //results[blobInterface.Name] = new DatapointResult { Datapoints = subresult, CreatedOn = DateTime.UtcNow.Ticks, Metadata = stats.Serialize() }; //CachedDatapoints[blobKey] = new DatapointResult { Datapoints = subresult.ToList(), CreatedOn = DateTime.UtcNow.Ticks, Metadata = stats.Serialize() }; } } //figure out the metadata //var finalMetadata = " [@" + AzureInterface.Instance.CurrentInstanceId + "] Calculated Datapoints:\r\n" + string.Join("\r\n", results.Values.Select(x => x.Metadata)); stats.NodeId = HostEnvironment.Instance.CurrentInstanceId; stats.Notes = "Computed Datapoints"; //Trace.WriteLine("total mentions processed: " + mentionCount); //var datapoints = results.Values.SelectMany(x => x.Datapoints); if (datapoints == null) return new BermudaResult() { Metadata = new BermudaNodeStatistic { Notes = "No Results" } }; //foreach (var p in datapoints) if (p.IsCount) p.Value = p.Count; var mergeFunc = GetMergeFunc(merge, mapreduce, itemType, resultType); if (mergeFunc != null) { var mergeFuncInvoke = mergeFunc.GetType().GetMethod("Invoke"); datapoints = mergeFuncInvoke.Invoke(mergeFunc, new object[] { datapoints }); } stats.LinqExecutionTime = sw.Elapsed; var arraylol = ToArrayCollection(datapoints, resultType); if (json == null && datapoints != null) { json = JsonConvert.SerializeObject(arraylol); } //var json = JsonConvert.SerializeObject(datapoints); var originalData = makeCursor ? arraylol : null; var result = CachedData[queryHash] = new BermudaResult { DataType = LinqRuntimeTypeBuilder.GetTypeKey(resultType), OriginalData = originalData, Data = json, CreatedOn = DateTime.Now.Ticks, Metadata = stats }; sw.Stop(); return result; } } }
private BermudaResult GetCursorData(string paging2, BermudaResult cachedDatapoints, Type dataType) { var pagingFunc2 = GetPagingFunc(paging2, dataType); if (pagingFunc2 != null) { var mergeInvokeMethod = pagingFunc2.GetType().GetMethod("Invoke"); var allItems = mergeInvokeMethod.Invoke(pagingFunc2, new object[] { cachedDatapoints.OriginalData }); var arraylol2 = ToArrayCollection(allItems, dataType); var json2 = JsonConvert.SerializeObject(arraylol2); return new BermudaResult { DataType = cachedDatapoints.DataType, Data = json2, Metadata = new BermudaNodeStatistic { Notes = "Cursor_Hit_1" }, CacheKey = cachedDatapoints.CacheKey }; } else { throw new BermudaException("Cursor Paging Function required"); } }
public BermudaResult GetData(string domain, IEnumerable<string> blobs, string query, string mapreduce, string merge, DateTime minDate, DateTime maxDate, int remdepth, object[] parameters, string command) { var args = ParseCommand(command); if (remdepth > 0) { //map var blobInterfaces = blobs == null ? AzureInterface.Instance.ListBlobs(domain, minDate.Ticks, maxDate.Ticks) : AzureInterface.Instance.GetBlobInterfacesByNames(domain, blobs); var blobSetKey = GetQueryChecksum(domain, string.Join(",", blobInterfaces.Select(x => x.Name)), query, mapreduce, minDate, maxDate, parameters, null); //reduce BermudaResult cachedDatapoints; if (CachedData.TryGetValue(blobSetKey, out cachedDatapoints) && (DateTime.Now.Ticks - cachedDatapoints.CreatedOn) < CacheLifetime) { if (CacheTraceMessageLevel < 3) Trace.WriteLine("returned CACHED BLOBS DATAPOINTS results FOR ENTIRE BLOB SET [REMDEPTH:" + remdepth + "]"); return new BermudaResult { DataType = cachedDatapoints.DataType, Data = cachedDatapoints.Data, MetadataObject = new BermudaNodeStatistic { Notes = "Cache_Hit_1" } }; } else { var assignments = PartitionBlobs(domain, blobInterfaces, minDate, maxDate, false, true); if (!assignments.Any()) throw new Exception("Specified dataset not loaded: " + domain); ConcurrentDictionary<IPEndPoint, BermudaResult> results = new ConcurrentDictionary<IPEndPoint, BermudaResult>(); Stopwatch sw = new Stopwatch(); sw.Start(); List<Task> tasks = new List<Task>(); foreach (var ass in assignments) { Task t = new Task((assObj) => { ZipMetadata assignment = assObj as ZipMetadata; var initiated = DateTime.Now; var blobSubsetKey = GetQueryChecksum(domain, string.Join(",", assignment.Blobs.Select(x => x.Name)), query, mapreduce, minDate, maxDate, parameters, assignment.PeerEndpoint.ToString()); Stopwatch sw3 = new Stopwatch(); sw3.Start(); //see if the cache contains a matching result and return it if it's not outdated BermudaResult cachedDatapoints2; if (CachedData.TryGetValue(blobSubsetKey, out cachedDatapoints2) && (DateTime.Now.Ticks - cachedDatapoints2.CreatedOn) < CacheLifetime) { if (CacheTraceMessageLevel < 2) Trace.WriteLine("returned CACHED BLOB DATAPOINT results FOR BLOB SUBSET [REMDEPTH:" + remdepth + "]"); results[assignment.PeerEndpoint] = new BermudaResult { DataType = cachedDatapoints2.DataType, Data = cachedDatapoints2.Data, MetadataObject = new BermudaNodeStatistic { Notes = "Cache_Hit_2" } }; } else { try { Stopwatch sw2 = new Stopwatch(); sw2.Start(); BermudaResult subresult = null; if (assignment.PeerEndpoint.Equals(Endpoint)) { subresult = GetData(domain, assignment.Blobs.Select(x => x.Name), query, mapreduce, merge, minDate, maxDate, remdepth - 1, parameters, command); } else { using (var client = AzureInterface.Instance.GetServiceClient(assignment.PeerEndpoint)) { subresult = client.GetData(domain, query, mapreduce, merge, minDate, maxDate, remdepth - 1, parameters, command); } } sw2.Stop(); subresult.CreatedOn = DateTime.Now.Ticks; subresult.MetadataObject.Initiated = initiated; subresult.MetadataObject.Completed = DateTime.Now; subresult.MetadataObject.OperationTime = sw2.Elapsed; results[assignment.PeerEndpoint] = CachedData[blobSubsetKey] = subresult; } catch (Exception ex) { results[assignment.PeerEndpoint] = new BermudaResult { Error = "[Failed Node] " + ex }; } } }, ass, TaskCreationOptions.LongRunning); tasks.Add(t); t.Start(); } Task.WaitAll(tasks.ToArray()); sw.Stop(); Trace.WriteLine("Join Time:" + sw.Elapsed); if (results.All(x => x.Value.Error != null)) throw new Exception("All nodes failed:\r\n" + string.Join("\r\n", results.Select(x => x.Value.Error))); //if all results are not the same time throw an error if (results.GroupBy(x => x.Value.DataType).Count() > 1) throw new Exception("Subresults must all return the same type"); var dataTypeDescriptor = results.Select(x => x.Value.DataType).FirstOrDefault(x => x != null); if (dataTypeDescriptor == null) return new BermudaResult { Error = "Could not determine the merge type, none of the nodes provided type info" }; //use the passed combine espression to make multiple datapoint sets into one var dataType = LinqRuntimeTypeBuilder.GetTypeFromTypeKey(dataTypeDescriptor); //allItems = results.Values.SelectMany(x => x.DataObject) var totalJson = "[" + string.Join(",", results.Values.Select(x => x.Data.Trim('[', ']'))) + "]"; var allItems = LinqRuntimeTypeBuilder.DeserializeJson(totalJson, dataTypeDescriptor, true); //var aaa = new JavaScriptSerializer().Deserialize<Datapoint[]>(totalJson); //var ggc = aaa.GroupBy(x => new { x.Id, x.Id2 }).Count(); //InvokeSelectManyViaReflectionTheKilla(results.Values.Select(x => x.DataObject), dataType); var mergeFunc = GetMergeFunc(merge, mapreduce, dataType); if (mergeFunc != null) { //var dataType = "kdsajkdsa"; var mergeInvokeMethod = mergeFunc.GetType().GetMethod("Invoke"); allItems = mergeInvokeMethod.Invoke(mergeFunc, new object[] { allItems }); // MergeDatapoints(results.Values.Where(x => x.Data != null).SelectMany(x => x.Data), mergeFunc); } //figure out the metadata var finalMetadata = new BermudaNodeStatistic { Notes = "Merged Datapoints in " + sw.Elapsed, NodeId = AzureInterface.Instance.CurrentInstanceId, ChildNodes = results.Values.Select(x => x.MetadataObject ).ToArray() }; var finalResult = new BermudaResult { DataType = dataTypeDescriptor, DataObject = allItems, CreatedOn = DateTime.Now.Ticks, MetadataObject = finalMetadata }; CachedData[blobSetKey] = finalResult; return finalResult; } } else { ConcurrentDictionary<string, BermudaResult> results = new ConcurrentDictionary<string, BermudaResult>(); BermudaNodeStatistic stats = new BermudaNodeStatistic(); var blobInterfaces = AzureInterface.Instance.GetBlobInterfacesByNames(domain, blobs); var blobSetKey = GetQueryChecksum(domain, string.Join(",", blobInterfaces.Select(x => x.Name)), query, mapreduce, minDate, maxDate, parameters, Endpoint.ToString()); BermudaResult cachedDatapoints; if (CachedData.TryGetValue(blobSetKey, out cachedDatapoints) && (DateTime.Now.Ticks - cachedDatapoints.CreatedOn) < CacheLifetime) { if (CacheTraceMessageLevel < 2) Trace.WriteLine("returned CACHED BLOB SET DATAPOINT results [REMDEPTH:" + remdepth + "]"); return new BermudaResult { DataType = cachedDatapoints.DataType, Data = cachedDatapoints.Data, MetadataObject = new BermudaNodeStatistic { Notes = "Cache_Hit_3" } }; } else { //Chad: short circuiting to test WCF response time in Azure //return new DatapointResult() { Datapoints = new List<Datapoint>(), CreatedOn = DateTime.Now.Ticks, Metadata = new BermudaNodeStatistic() }; //IEnumerable<Datapoint> datapoints = null; object datapoints = null; Stopwatch sw = new Stopwatch(); sw.Start(); Type itemType = null; Type resultType = null; foreach (var blobInterface in blobInterfaces) { var blobKey = GetQueryChecksum(domain, blobInterface.Name, query, mapreduce, minDate, maxDate, parameters, Endpoint.ToString()); //see if the cache contains a matching result and return it if it's not outdated BermudaResult cachedDatapoints2; if (CachedData.TryGetValue(blobKey, out cachedDatapoints2) && (DateTime.Now.Ticks - cachedDatapoints2.CreatedOn) < CacheLifetime) { if (CacheTraceMessageLevel < 1) Trace.WriteLine("returned CACHED BLOB DATAPOINT results [REMDEPTH:" + remdepth + "]"); results[blobInterface.Name] = new BermudaResult { DataType = cachedDatapoints2.DataType, Data = cachedDatapoints2.Data, MetadataObject = new BermudaNodeStatistic { Notes = "Cache_Hit_4" } }; datapoints = cachedDatapoints2.DataObject; } else { //get mentions var raw = blobInterface.GetData(); var rawType = raw.GetType(); itemType = ReduceExpressionGeneration.GetTypeOfEnumerable(rawType); var mapreduceFunc = GetMapReduceFunc(mapreduce, itemType, out resultType); var queryFunc = GetFilterFunc(query, itemType); var minDateTicks = minDate.Ticks; var maxDateTicks = maxDate.Ticks; object subresult = raw.AsParallel(); //queryFunc == null ? // raw.AsParallel() : //minDate == DateTime.MinValue && maxDate == DateTime.MaxValue ? // raw.AsParallel().Where(x => queryFunc) : // raw.AsParallel().Where(x => x.OccurredOnTicks >= minDateTicks && x.OccurredOnTicks <= maxDateTicks && queryFunc(x, parameters)); if (queryFunc != null) { var queryFuncInvoke = queryFunc.GetType().GetMethod("Invoke"); subresult = queryFuncInvoke.Invoke(queryFunc, new object[] { subresult }); } //reduce them using the passed expression if (mapreduceFunc != null) { var mapReduceFuncInvoke = mapreduceFunc.GetType().GetMethod("Invoke"); subresult = mapReduceFuncInvoke.Invoke(mapreduceFunc, new object[] { subresult }); } datapoints = subresult; //format a metada string if (!args.Contains("-nocount")) { //stats.TotalItems = raw.Count(); //stats.FilteredItems = filtered.Count(); //stats.ReducedItems = subresult.Count(); } //cache the result //results[blobInterface.Name] = new DatapointResult { Datapoints = subresult, CreatedOn = DateTime.UtcNow.Ticks, Metadata = stats.Serialize() }; //CachedDatapoints[blobKey] = new DatapointResult { Datapoints = subresult.ToList(), CreatedOn = DateTime.UtcNow.Ticks, Metadata = stats.Serialize() }; } } //figure out the metadata //var finalMetadata = " [@" + AzureInterface.Instance.CurrentInstanceId + "] Calculated Datapoints:\r\n" + string.Join("\r\n", results.Values.Select(x => x.Metadata)); stats.NodeId = AzureInterface.Instance.CurrentInstanceId; stats.Notes = "Computed Datapoints"; //Trace.WriteLine("total mentions processed: " + mentionCount); //var datapoints = results.Values.SelectMany(x => x.Datapoints); if (datapoints == null) return new BermudaResult() { MetadataObject = new BermudaNodeStatistic { Notes = "No Results" } }; //foreach (var p in datapoints) if (p.IsCount) p.Value = p.Count; var mergeFunc = resultType == null ? null : GetMergeFunc(merge, mapreduce, resultType); if (mergeFunc != null) { var mergeFuncInvoke = mergeFunc.GetType().GetMethod("Invoke"); datapoints = mergeFuncInvoke.Invoke(mergeFunc, new object[] { datapoints }); } sw.Stop(); stats.LinqExecutionTime = sw.Elapsed; var result = CachedData[blobSetKey] = new BermudaResult { DataType = LinqRuntimeTypeBuilder.GetTypeKey(resultType), DataObject = datapoints, CreatedOn = DateTime.Now.Ticks, MetadataObject = stats }; return result; } } }
public BermudaResult GetData(string domain, string query, string mapreduce, string merge, string paging, int remdepth, string command, string cursor, string paging2) { var args = ParseCommand(command); bool noCache = args.Contains("-nocache"); bool makeCursor = cursor == MakeCursorToken; bool useCursor = !makeCursor && !string.IsNullOrWhiteSpace(cursor); DateTime minDate = DateTime.MinValue; DateTime maxDate = DateTime.MaxValue; if (remdepth > 0) { //map var queryHash = cursor ?? GetQueryHash(domain, query, mapreduce, merge, paging, null); //reduce BermudaResult cachedDatapoints; if (!noCache && CachedData.TryGetValue(queryHash, out cachedDatapoints) && (DateTime.Now.Ticks - cachedDatapoints.CreatedOn) < CacheLifetime) { #if DEBUG if (CacheTraceMessageLevel < 3) { Trace.WriteLine("returned CACHED BLOBS DATAPOINTS results FOR ENTIRE BLOB SET [REMDEPTH:" + remdepth + "]"); } #endif if (useCursor) { var dataType = LinqRuntimeTypeBuilder.GetTypeFromTypeKey(cachedDatapoints.DataType); return(GetCursorData(paging2, cachedDatapoints, dataType)); } else { return(new BermudaResult { DataType = cachedDatapoints.DataType, Data = cachedDatapoints.Data, Metadata = new BermudaNodeStatistic { Notes = "Cache_Hit_1" }, CacheKey = cachedDatapoints.CacheKey }); } } else { if (useCursor) { throw new Exception("Cursor " + cursor + " not found"); } //var assignments = PartitionBlobs(domain, blobInterfaces, minDate, maxDate, false, true); var reducers = HostEnvironment.Instance.GetAvailablePeerConnections(); if (!reducers.Any()) { throw new Exception("Specified dataset not loaded: " + domain); } ConcurrentDictionary <PeerInfo, BermudaResult> results = new ConcurrentDictionary <PeerInfo, BermudaResult>(); Stopwatch sw = new Stopwatch(); sw.Start(); List <Task> tasks = new List <Task>(); foreach (var reducer in reducers) { Task t = new Task((peerObj) => { var peerInfo = peerObj as PeerInfo; var initiated = DateTime.Now; var subqueryHash = GetQueryHash(domain, query, mapreduce, merge, paging, peerInfo.ToString()); Stopwatch sw3 = new Stopwatch(); sw3.Start(); //see if the cache contains a matching result and return it if it's not outdated BermudaResult cachedDatapoints2; if (!noCache && CachedData.TryGetValue(subqueryHash, out cachedDatapoints2) && (DateTime.Now.Ticks - cachedDatapoints2.CreatedOn) < CacheLifetime) { if (CacheTraceMessageLevel < 2) { Trace.WriteLine("returned CACHED BLOB DATAPOINT results FOR BLOB SUBSET [REMDEPTH:" + remdepth + "]"); } BermudaResult res = null; if (useCursor) { var dataType2 = LinqRuntimeTypeBuilder.GetTypeFromTypeKey(cachedDatapoints2.DataType); res = GetCursorData(paging2, cachedDatapoints2, dataType2); } else { res = new BermudaResult { DataType = cachedDatapoints2.DataType, Data = cachedDatapoints2.Data, Metadata = new BermudaNodeStatistic { Notes = "Cache_Hit_2" } }; } results[peerInfo] = res; } else { try { Stopwatch sw2 = new Stopwatch(); sw2.Start(); BermudaResult subresult = null; if (peerInfo.Equals(Endpoint)) { subresult = GetData(domain, query, mapreduce, merge, paging, remdepth - 1, command, cursor, paging2); } else { using (var client = HostEnvironment.GetServiceClient(peerInfo)) { subresult = client.GetData(domain, query, mapreduce, merge, paging, remdepth - 1, command, cursor, paging2); } //subresult = GetDataFromPeer(domain, query, mapreduce, merge, minDate, maxDate, remdepth - 1, command, assignment.PeerEndpoint.Endpoint); } sw2.Stop(); subresult.CreatedOn = DateTime.Now.Ticks; subresult.Metadata.Initiated = initiated; subresult.Metadata.Completed = DateTime.Now; subresult.Metadata.OperationTime = sw2.Elapsed; results[peerInfo] = CachedData[subqueryHash] = subresult; } catch (Exception ex) { results[peerInfo] = new BermudaResult { Error = "[Failed Node] " + ex }; } } }, reducer, TaskCreationOptions.LongRunning); tasks.Add(t); t.Start(); } Task.WaitAll(tasks.ToArray()); sw.Stop(); #if DEBUG Trace.WriteLine("Join Time:" + sw.Elapsed); #endif if (results.Any(x => x.Value.Error != null)) { throw new BermudaException("Some nodes failed:\r\n" + string.Join("\r\n", results.Select(x => x.Value.Error))); } if (results.All(x => x.Value.Data == null)) { return new BermudaResult { Metadata = new BermudaNodeStatistic { Notes = "No Data" } } } ; //if all results are not the same time throw an error if (results.GroupBy(x => x.Value.DataType).Count() > 1) { throw new BermudaException("Subresults must all return the same type"); } var dataTypeDescriptor = results.Select(x => x.Value.DataType).FirstOrDefault(x => x != null); if (dataTypeDescriptor == null) { return new BermudaResult { Error = "Could not determine the merge type, none of the nodes provided type info" } } ; //use the passed combine espression to make multiple datapoint sets into one var dataType = LinqRuntimeTypeBuilder.GetTypeFromTypeKey(dataTypeDescriptor); //allItems = results.Values.SelectMany(x => x.DataObject) var totalJson = "[" + string.Join(",", results.Values.Where(x => !string.IsNullOrWhiteSpace(x.Data)).Select(x => x.Data.Trim('[', ']')).Where(x => !string.IsNullOrWhiteSpace(x))) + "]"; var allItems = LinqRuntimeTypeBuilder.DeserializeJson(totalJson, dataTypeDescriptor, true); //var aaa = new JavaScriptSerializer().Deserialize<Datapoint[]>(totalJson); //var ggc = aaa.GroupBy(x => new { x.Id, x.Id2 }).Count(); //InvokeSelectManyViaReflectionTheKilla(results.Values.Select(x => x.DataObject), dataType); var mergeFunc = GetMergeFunc(merge, mapreduce, dataType, dataType); if (mergeFunc != null) { //var dataType = "kdsajkdsa"; var mergeInvokeMethod = mergeFunc.GetType().GetMethod("Invoke"); allItems = mergeInvokeMethod.Invoke(mergeFunc, new object[] { allItems }); // MergeDatapoints(results.Values.Where(x => x.Data != null).SelectMany(x => x.Data), mergeFunc); } var pagingFunc = GetPagingFunc(paging, dataType); if (pagingFunc != null) { var pagingInvokeMethod = pagingFunc.GetType().GetMethod("Invoke"); allItems = pagingInvokeMethod.Invoke(pagingFunc, new object[] { allItems }); } //figure out the metadata var finalMetadata = new BermudaNodeStatistic { Notes = "Merged Datapoints in " + sw.Elapsed, NodeId = HostEnvironment.Instance.CurrentInstanceId, ChildNodes = results.Values.Select(x => x.Metadata).ToArray() }; var arraylol = ToArrayCollection(allItems, dataType); var json = JsonConvert.SerializeObject(arraylol); //var json = JsonConvert.SerializeObject(allItems); var originalData = makeCursor ? arraylol : null; var finalResult = new BermudaResult { DataType = dataTypeDescriptor, OriginalData = originalData, Data = json, CreatedOn = DateTime.Now.Ticks, Metadata = finalMetadata, CacheKey = queryHash }; CachedData[queryHash] = finalResult; return(finalResult); } } else { ConcurrentDictionary <string, BermudaResult> results = new ConcurrentDictionary <string, BermudaResult>(); BermudaNodeStatistic stats = new BermudaNodeStatistic(); var bucketInterfaces = HostEnvironment.Instance.GetBucketInterfacesForDomain(domain); if (!bucketInterfaces.Any()) { throw new BermudaException("Data not loaded for: " + domain); } if (bucketInterfaces.Count() > 1) { throw new BermudaException("Multiple buckets not supported by BermudaMapReduce"); } var queryHash = GetQueryHash(domain, query, mapreduce, merge, paging, Endpoint.ToString()); BermudaResult cachedDatapoints; if (!noCache && CachedData.TryGetValue(queryHash, out cachedDatapoints) && (DateTime.Now.Ticks - cachedDatapoints.CreatedOn) < CacheLifetime) { if (CacheTraceMessageLevel < 2) { Trace.WriteLine("returned CACHED BLOB SET DATAPOINT results [REMDEPTH:" + remdepth + "]"); } if (useCursor) { var dataType = LinqRuntimeTypeBuilder.GetTypeFromTypeKey(cachedDatapoints.DataType); return(GetCursorData(paging2, cachedDatapoints, dataType)); } else { return(new BermudaResult { DataType = cachedDatapoints.DataType, Data = cachedDatapoints.Data, Metadata = new BermudaNodeStatistic { Notes = "Cache_Hit_3" }, CacheKey = queryHash }); } } else { //Chad: short circuiting to test WCF response time in Azure //return new DatapointResult() { Datapoints = new List<Datapoint>(), CreatedOn = DateTime.Now.Ticks, Metadata = new BermudaNodeStatistic() }; //IEnumerable<Datapoint> datapoints = null; object datapoints = null; Stopwatch sw = new Stopwatch(); sw.Start(); Type itemType = null; Type resultType = null; string json = null; foreach (var bucketInterface in bucketInterfaces) { var bucketKey = GetQueryHash(domain, query, mapreduce, merge, paging, Endpoint.ToString()); //see if the cache contains a matching result and return it if it's not outdated BermudaResult cachedDatapoints2; if (!noCache && CachedData.TryGetValue(bucketKey, out cachedDatapoints2) && (DateTime.Now.Ticks - cachedDatapoints2.CreatedOn) < CacheLifetime) { if (CacheTraceMessageLevel < 1) { Trace.WriteLine("returned CACHED BLOB DATAPOINT results [REMDEPTH:" + remdepth + "]"); } if (useCursor) { if (cachedDatapoints2.OriginalData == null) { throw new Exception("Cursor " + cursor + " contains null data"); } var dataType = LinqRuntimeTypeBuilder.GetTypeFromTypeKey(cachedDatapoints2.DataType); results[bucketInterface.Name] = GetCursorData(paging2, cachedDatapoints2, dataType); } else { results[bucketInterface.Name] = new BermudaResult { DataType = cachedDatapoints2.DataType, Data = cachedDatapoints2.Data, Metadata = new BermudaNodeStatistic { Notes = "Cache_Hit_4" } }; json = cachedDatapoints2.Data; } } else { //get mentions var collections = GetCollections(query, mapreduce); if (collections.Count() > 1) { throw new BermudaException("More than one collection specified: " + string.Join(",", collections)); } var table = collections.FirstOrDefault(); var tableName = table == null ? null : table.Source; var raw = bucketInterface.GetData(tableName); //var rawType = raw.GetType(); //itemType = ReduceExpressionGeneration.GetTypeOfEnumerable(rawType); itemType = bucketInterface.GetDataType(tableName); var mapreduceFunc = GetMapReduceFunc(mapreduce, itemType, out resultType); var queryFunc = GetFilterFunc(query, itemType); var pagingFunc = GetPagingFunc(paging, resultType); var minDateTicks = minDate.Ticks; var maxDateTicks = maxDate.Ticks; object subresult = raw; //queryFunc == null ? // raw.AsParallel() : //minDate == DateTime.MinValue && maxDate == DateTime.MaxValue ? // raw.AsParallel().Where(x => queryFunc) : // raw.AsParallel().Where(x => x.OccurredOnTicks >= minDateTicks && x.OccurredOnTicks <= maxDateTicks && queryFunc(x, parameters)); if (json == null) { if (queryFunc != null) { var queryFuncInvoke = queryFunc.GetType().GetMethod("Invoke"); subresult = queryFuncInvoke.Invoke(queryFunc, new object[] { subresult }); } //reduce them using the passed expression if (mapreduceFunc != null) { var mapReduceFuncInvoke = mapreduceFunc.GetType().GetMethod("Invoke"); subresult = mapReduceFuncInvoke.Invoke(mapreduceFunc, new object[] { subresult }); } if (pagingFunc != null) { var pagingInvokeMethod = pagingFunc.GetType().GetMethod("Invoke"); subresult = pagingInvokeMethod.Invoke(pagingFunc, new object[] { subresult }); } datapoints = subresult; } //format a metada string if (!args.Contains("-nocount")) { stats.TotalItems = bucketInterface.GetCount(tableName); //stats.FilteredItems = filtered.Count(); //stats.ReducedItems = subresult.Count(); } //cache the result //results[blobInterface.Name] = new DatapointResult { Datapoints = subresult, CreatedOn = DateTime.UtcNow.Ticks, Metadata = stats.Serialize() }; //CachedDatapoints[blobKey] = new DatapointResult { Datapoints = subresult.ToList(), CreatedOn = DateTime.UtcNow.Ticks, Metadata = stats.Serialize() }; } } //figure out the metadata //var finalMetadata = " [@" + AzureInterface.Instance.CurrentInstanceId + "] Calculated Datapoints:\r\n" + string.Join("\r\n", results.Values.Select(x => x.Metadata)); stats.NodeId = HostEnvironment.Instance.CurrentInstanceId; stats.Notes = "Computed Datapoints"; //Trace.WriteLine("total mentions processed: " + mentionCount); //var datapoints = results.Values.SelectMany(x => x.Datapoints); if (datapoints == null) { return new BermudaResult() { Metadata = new BermudaNodeStatistic { Notes = "No Results" } } } ; //foreach (var p in datapoints) if (p.IsCount) p.Value = p.Count; var mergeFunc = GetMergeFunc(merge, mapreduce, itemType, resultType); if (mergeFunc != null) { var mergeFuncInvoke = mergeFunc.GetType().GetMethod("Invoke"); datapoints = mergeFuncInvoke.Invoke(mergeFunc, new object[] { datapoints }); } stats.LinqExecutionTime = sw.Elapsed; var arraylol = ToArrayCollection(datapoints, resultType); if (json == null && datapoints != null) { json = JsonConvert.SerializeObject(arraylol); } //var json = JsonConvert.SerializeObject(datapoints); var originalData = makeCursor ? arraylol : null; var result = CachedData[queryHash] = new BermudaResult { DataType = LinqRuntimeTypeBuilder.GetTypeKey(resultType), OriginalData = originalData, Data = json, CreatedOn = DateTime.Now.Ticks, Metadata = stats }; sw.Stop(); return(result); } } }