protected virtual async Task <IEnumerable <TIn> > JoinBatch(JoinBatch <TIn> batch) { if (m_rowCache.Count == 0) { InitializeCache(m_rowCache); } if (batch.Strategy == CacheLookupStrategy.NoLookup) { return(batch.Data); } Func <TIn, object> accessor = m_typeAccessor.GetPropertyAccessor(this.m_joinOnMapping.DestColumnOffset); var outputList = new List <TIn>(batch.Data.Length / 2); var remoteLookupList = new List <TIn>(batch.Data.Length / 2); int missCount = 0; lock (m_rowCache) //may have race condition with dimtableinserter who will update the global cache { foreach (var input in batch.Data) { IDimRow <TLookupKey> row; if (m_rowCache.TryGetValue((TLookupKey)accessor(input), out row)) { this.OnSuccessfulLookup(input, row); outputList.Add(input); } else { missCount++; if (batch.Strategy == CacheLookupStrategy.RemoteLookup) { remoteLookupList.Add(input); } else { //Local lookup failed. record in garbage recorder as the input is discarded. this.GarbageRecorder.Record(input); } } } } foreach (var cacheMiss in remoteLookupList) { //post to dim inserter to do remote lookup (insert to tmp table and do a MERGE) await m_dimInserter.SendAsync(cacheMiss).ConfigureAwait(false); } m_logger.DebugFormat("{0} {1} cache miss among {2} lookup in this round of JoinBatch()", FullName, missCount, batch.Data.Length); return(outputList); }
protected override async Task DumpToDBAsync(TIn[] data, TargetTable targetTable) { IsBusy = true; int oldCount = data.Length; var deduplicatedData = data.Distinct(this).ToArray(); int newCount = deduplicatedData.Length; m_host.m_logger.DebugFormat("{0} batch distincted from {1} down to {2}", this.FullName, oldCount, newCount); using (var connection = new SqlConnection(targetTable.ConnectionString)) { connection.Open(); SqlCommand command = new SqlCommand(m_createTmpTable, connection); command.ExecuteNonQuery(); } await base.DumpToDBAsync(deduplicatedData, m_tmpTargetTable).ConfigureAwait(false); m_host.m_logger.DebugFormat("{0} Executing merge as server-side lookup: {1}", this.FullName, m_mergeTmpToDimTable); var subCache = new Dictionary <TLookupKey, PartialDimRow <TLookupKey> >(deduplicatedData.Length, m_keyComparer); using (var connection = new SqlConnection(targetTable.ConnectionString)) { connection.Open(); SqlCommand command = new SqlCommand(this.m_mergeTmpToDimTable, connection); var autoKeyColumn = Utils.GetAutoIncrementColumn(m_typeAccessor.SchemaTable.Columns); bool is64BitAutoKey = autoKeyColumn.DataType == typeof(long); SqlDataReader reader = command.ExecuteReader(); while (reader.Read()) { long dimKey = is64BitAutoKey ? reader.GetInt64(0) : reader.GetInt32(0); //$inserted.AutoKey TLookupKey joinColumnValue = (TLookupKey)reader[1]; //$inserted.JoinOnColumn //add to the subcache no matter it is an "UPDATE" or "INSERT" subCache.Add( joinColumnValue, new PartialDimRow <TLookupKey> { AutoIncrementKey = dimKey, JoinOn = joinColumnValue }); } } var globalCache = m_host.m_rowCache; //update global cache using the sub cache lock (globalCache) { foreach (var dimRow in subCache) { globalCache.TryAdd(dimRow.Key, dimRow.Value); } } m_host.m_logger.DebugFormat("{0} Global cache now has {1} items after merging sub cache", FullName, globalCache.Count); //lookup from the sub cache (no global cache lookup) var host = m_host; var keyGetter = m_keyGetter; foreach (var item in data) { host.OnSuccessfulLookup(item, subCache[keyGetter(item)]); } //and output as a already-looked-up batch var doneBatch = new JoinBatch <TIn>(data, CacheLookupStrategy.NoLookup); await this.m_outputBuffer.SendAsync(doneBatch).ConfigureAwait(false); IsBusy = false; }