Example #1
0
        protected virtual async Task <IEnumerable <TIn> > JoinBatch(JoinBatch <TIn> batch)
        {
            if (m_rowCache.Count == 0)
            {
                InitializeCache(m_rowCache);
            }

            if (batch.Strategy == CacheLookupStrategy.NoLookup)
            {
                return(batch.Data);
            }

            Func <TIn, object> accessor = m_typeAccessor.GetPropertyAccessor(this.m_joinOnMapping.DestColumnOffset);

            var outputList       = new List <TIn>(batch.Data.Length / 2);
            var remoteLookupList = new List <TIn>(batch.Data.Length / 2);
            int missCount        = 0;

            lock (m_rowCache) //may have race condition with dimtableinserter who will update the global cache
            {
                foreach (var input in batch.Data)
                {
                    IDimRow <TLookupKey> row;
                    if (m_rowCache.TryGetValue((TLookupKey)accessor(input), out row))
                    {
                        this.OnSuccessfulLookup(input, row);
                        outputList.Add(input);
                    }
                    else
                    {
                        missCount++;

                        if (batch.Strategy == CacheLookupStrategy.RemoteLookup)
                        {
                            remoteLookupList.Add(input);
                        }
                        else
                        {
                            //Local lookup failed. record in garbage recorder as the input is discarded.
                            this.GarbageRecorder.Record(input);
                        }
                    }
                }
            }

            foreach (var cacheMiss in remoteLookupList)
            {
                //post to dim inserter to do remote lookup (insert to tmp table and do a MERGE)
                await m_dimInserter.SendAsync(cacheMiss).ConfigureAwait(false);
            }

            m_logger.DebugFormat("{0} {1} cache miss among {2} lookup in this round of JoinBatch()", FullName, missCount, batch.Data.Length);

            return(outputList);
        }
Example #2
0
            protected override async Task DumpToDBAsync(TIn[] data, TargetTable targetTable)
            {
                IsBusy = true;

                int oldCount         = data.Length;
                var deduplicatedData = data.Distinct(this).ToArray();
                int newCount         = deduplicatedData.Length;

                m_host.m_logger.DebugFormat("{0} batch distincted from {1} down to {2}", this.FullName, oldCount, newCount);

                using (var connection = new SqlConnection(targetTable.ConnectionString))
                {
                    connection.Open();
                    SqlCommand command = new SqlCommand(m_createTmpTable, connection);
                    command.ExecuteNonQuery();
                }

                await base.DumpToDBAsync(deduplicatedData, m_tmpTargetTable).ConfigureAwait(false);

                m_host.m_logger.DebugFormat("{0} Executing merge as server-side lookup: {1}", this.FullName, m_mergeTmpToDimTable);

                var subCache = new Dictionary <TLookupKey, PartialDimRow <TLookupKey> >(deduplicatedData.Length, m_keyComparer);

                using (var connection = new SqlConnection(targetTable.ConnectionString))
                {
                    connection.Open();

                    SqlCommand command = new SqlCommand(this.m_mergeTmpToDimTable, connection);

                    var  autoKeyColumn  = Utils.GetAutoIncrementColumn(m_typeAccessor.SchemaTable.Columns);
                    bool is64BitAutoKey = autoKeyColumn.DataType == typeof(long);

                    SqlDataReader reader = command.ExecuteReader();
                    while (reader.Read())
                    {
                        long       dimKey          = is64BitAutoKey ? reader.GetInt64(0) : reader.GetInt32(0); //$inserted.AutoKey
                        TLookupKey joinColumnValue = (TLookupKey)reader[1];                                    //$inserted.JoinOnColumn

                        //add to the subcache no matter it is an "UPDATE" or "INSERT"
                        subCache.Add(
                            joinColumnValue,
                            new PartialDimRow <TLookupKey> {
                            AutoIncrementKey = dimKey, JoinOn = joinColumnValue
                        });
                    }
                }

                var globalCache = m_host.m_rowCache;

                //update global cache using the sub cache
                lock (globalCache)
                {
                    foreach (var dimRow in subCache)
                    {
                        globalCache.TryAdd(dimRow.Key, dimRow.Value);
                    }
                }

                m_host.m_logger.DebugFormat("{0} Global cache now has {1} items after merging sub cache", FullName, globalCache.Count);

                //lookup from the sub cache (no global cache lookup)
                var host      = m_host;
                var keyGetter = m_keyGetter;

                foreach (var item in data)
                {
                    host.OnSuccessfulLookup(item, subCache[keyGetter(item)]);
                }

                //and output as a already-looked-up batch
                var doneBatch = new JoinBatch <TIn>(data, CacheLookupStrategy.NoLookup);

                await this.m_outputBuffer.SendAsync(doneBatch).ConfigureAwait(false);

                IsBusy = false;
            }