Exemplo n.º 1
0
 public void SubProcAclImdbResultDictEn()
 {
     using (var dbx_src = new FastTextProcessDB(DBF_W2V_EN))
     {
         using (var dbx_dst = new FastTextResultDB(DBF_AclImdb))
         {
             var tran = dbx_dst.BeginTransaction();
             try
             {
                 var  inx_old   = dbx_dst.GetDictInxMax();
                 long inx_check = inx_old.HasValue ? inx_old.Value + 1 : 0;
                 dbx_src.ProcessEmbedJoins((itm) =>
                 {
                     Assert.Equal(inx_check, itm.Inx);
                     dbx_dst.StoreDictItem(itm);
                     inx_check++;
                 }, from_inx: inx_check);
                 tran.Commit();
             }
             catch
             {
                 tran.Rollback();
                 throw;
             }
         }
     }
 }
Exemplo n.º 2
0
 /// <summary>
 /// Build result DB dictionary
 /// </summary>
 /// <param name="proc_db_fn">Result Db filename</param>
 /// <param name="dbf_w2v_fn">DB word to vector filename</param>
 protected void SubProcBuildResultDict(string proc_db_fn, string dbf_w2v_fn)
 {
     using (var dbx_src = new FastTextProcessDB(dbf_w2v_fn))
     {
         using (var dbx_dst = new FastTextResultDB(proc_db_fn))
         {
             var tran = dbx_dst.BeginTransaction();
             try
             {
                 var  inx_old   = dbx_dst.GetDictInxMax();
                 long inx_check = inx_old.HasValue ? inx_old.Value + 1 : 0;
                 dbx_src.ProcessEmbedJoins((itm) =>
                 {
                     Assert.Equal(inx_check, itm.Inx);
                     dbx_dst.StoreDictItem(itm);
                     inx_check++;
                 }, from_inx: inx_check);
                 tran.Commit();
             }
             catch
             {
                 tran.Rollback();
                 throw;
             }
         }
     }
 }
Exemplo n.º 3
0
        public TextProcessor(string dbf_w2v, string dbf_res
                             , Preprocessor.ITextPreprocess preprocessor
                             , int boundedCapacity = 10000)
        {
            QueueProcess     = new BlockingCollection <ProcessItem>(boundedCapacity);
            QueueWordToDict  = new BlockingCollection <ProcessItem>(boundedCapacity);
            QueueStoreResult = new BlockingCollection <ProcessItem>(boundedCapacity);
            CancelTokenSrc   = new CancellationTokenSource();
            var cancel_token = CancelTokenSrc.Token;

            taskPreprocess = Task.Run(() =>
            {
                try
                {
                    Parallel.ForEach(
                        QueueProcess.GetConsumingEnumerable(cancel_token)
                        , (itm) =>
                    {
                        itm.Preprocessed = preprocessor.Process(itm.Src);
                        QueueWordToDict.Add(itm, cancel_token);
                    }
                        );
                    QueueWordToDict.CompleteAdding();
                }
                catch
                {
                    CancelTokenSrc.Cancel();
                    throw;
                }
            }, cancel_token
                                      );
            taskWordToDict = Task.Run(() =>
            {
                try
                {
                    using (var wordToDict = new WordToDictProcessor(dbf_w2v))
                    {
                        #region experimental (unused)
                        //int thread_cnt_max = Environment.ProcessorCount;
                        //var tasks = new BlockingCollection<Task>(thread_cnt_max);
                        //while (!QueueWordToDict.IsCompleted)
                        //{
                        //    var proc_buff = new List<string[]>();
                        //    string[] words;
                        //    while (QueueWordToDict.TryTake(out words))
                        //    {
                        //        proc_buff.Add(words);
                        //    }
                        //    if (proc_buff.Count == 0)
                        //        continue;
                        //    var t = Task.Run(() => wordToDict.Process(proc_buff));
                        //    while (!tasks.TryAdd(t))
                        //    {
                        //        var tarr = tasks.ToArray();
                        //        var inx = Task.WaitAny(tarr);
                        //        if (tarr[inx].IsFaulted)
                        //            throw tarr[inx].Exception;
                        //        if (!tasks.TryTake(out tarr[inx]))
                        //            throw new InvalidOperationException();
                        //    }
                        //}
                        //Task.WaitAll(tasks.ToArray());
                        #endregion
                        var opt = new ParallelOptions {
                            CancellationToken = cancel_token
                        };
                        Parallel.ForEach(
                            QueueWordToDict.GetConsumingEnumerable(cancel_token)
                            , opt
                            , (itm) =>
                        {
                            itm.Embedded = wordToDict.WordsToInxsForParallel(itm.Preprocessed);
                            QueueStoreResult.Add(itm);
                        }
                            );
                        wordToDict.StoreEmbed();
                        QueueStoreResult.CompleteAdding();
                    }
                }
                catch
                {
                    CancelTokenSrc.Cancel();
                    throw;
                }
            }, cancel_token
                                      );
            taskStoreResult = Task.Run(() =>
            {
                FastTextResultDB.CreateIfNotExistsDB(dbf_res);
                using (var res_dbx = new FastTextResultDB(dbf_res))
                {
                    var tran = res_dbx.BeginTransaction();
                    try
                    {
                        foreach (var itm in QueueStoreResult.GetConsumingEnumerable(cancel_token))
                        {
                            res_dbx.StoreProcessItem(itm);
                        }
                        tran.Commit();
                    }
                    catch
                    {
                        tran.Rollback();
                        CancelTokenSrc.Cancel();
                        throw;
                    }
                }
            }, cancel_token
                                       );
        }