public void SubProcAclImdbResultDictEn() { using (var dbx_src = new FastTextProcessDB(DBF_W2V_EN)) { using (var dbx_dst = new FastTextResultDB(DBF_AclImdb)) { var tran = dbx_dst.BeginTransaction(); try { var inx_old = dbx_dst.GetDictInxMax(); long inx_check = inx_old.HasValue ? inx_old.Value + 1 : 0; dbx_src.ProcessEmbedJoins((itm) => { Assert.Equal(inx_check, itm.Inx); dbx_dst.StoreDictItem(itm); inx_check++; }, from_inx: inx_check); tran.Commit(); } catch { tran.Rollback(); throw; } } } }
/// <summary> /// Build result DB dictionary /// </summary> /// <param name="proc_db_fn">Result Db filename</param> /// <param name="dbf_w2v_fn">DB word to vector filename</param> protected void SubProcBuildResultDict(string proc_db_fn, string dbf_w2v_fn) { using (var dbx_src = new FastTextProcessDB(dbf_w2v_fn)) { using (var dbx_dst = new FastTextResultDB(proc_db_fn)) { var tran = dbx_dst.BeginTransaction(); try { var inx_old = dbx_dst.GetDictInxMax(); long inx_check = inx_old.HasValue ? inx_old.Value + 1 : 0; dbx_src.ProcessEmbedJoins((itm) => { Assert.Equal(inx_check, itm.Inx); dbx_dst.StoreDictItem(itm); inx_check++; }, from_inx: inx_check); tran.Commit(); } catch { tran.Rollback(); throw; } } } }
public TextProcessor(string dbf_w2v, string dbf_res , Preprocessor.ITextPreprocess preprocessor , int boundedCapacity = 10000) { QueueProcess = new BlockingCollection <ProcessItem>(boundedCapacity); QueueWordToDict = new BlockingCollection <ProcessItem>(boundedCapacity); QueueStoreResult = new BlockingCollection <ProcessItem>(boundedCapacity); CancelTokenSrc = new CancellationTokenSource(); var cancel_token = CancelTokenSrc.Token; taskPreprocess = Task.Run(() => { try { Parallel.ForEach( QueueProcess.GetConsumingEnumerable(cancel_token) , (itm) => { itm.Preprocessed = preprocessor.Process(itm.Src); QueueWordToDict.Add(itm, cancel_token); } ); QueueWordToDict.CompleteAdding(); } catch { CancelTokenSrc.Cancel(); throw; } }, cancel_token ); taskWordToDict = Task.Run(() => { try { using (var wordToDict = new WordToDictProcessor(dbf_w2v)) { #region experimental (unused) //int thread_cnt_max = Environment.ProcessorCount; //var tasks = new BlockingCollection<Task>(thread_cnt_max); //while (!QueueWordToDict.IsCompleted) //{ // var proc_buff = new List<string[]>(); // string[] words; // while (QueueWordToDict.TryTake(out words)) // { // proc_buff.Add(words); // } // if (proc_buff.Count == 0) // continue; // var t = Task.Run(() => wordToDict.Process(proc_buff)); // while (!tasks.TryAdd(t)) // { // var tarr = tasks.ToArray(); // var inx = Task.WaitAny(tarr); // if (tarr[inx].IsFaulted) // throw tarr[inx].Exception; // if (!tasks.TryTake(out tarr[inx])) // throw new InvalidOperationException(); // } //} //Task.WaitAll(tasks.ToArray()); #endregion var opt = new ParallelOptions { CancellationToken = cancel_token }; Parallel.ForEach( QueueWordToDict.GetConsumingEnumerable(cancel_token) , opt , (itm) => { itm.Embedded = wordToDict.WordsToInxsForParallel(itm.Preprocessed); QueueStoreResult.Add(itm); } ); wordToDict.StoreEmbed(); QueueStoreResult.CompleteAdding(); } } catch { CancelTokenSrc.Cancel(); throw; } }, cancel_token ); taskStoreResult = Task.Run(() => { FastTextResultDB.CreateIfNotExistsDB(dbf_res); using (var res_dbx = new FastTextResultDB(dbf_res)) { var tran = res_dbx.BeginTransaction(); try { foreach (var itm in QueueStoreResult.GetConsumingEnumerable(cancel_token)) { res_dbx.StoreProcessItem(itm); } tran.Commit(); } catch { tran.Rollback(); CancelTokenSrc.Cancel(); throw; } } }, cancel_token ); }