예제 #1
0
        async Task <string> CutByCode(string dataInput)//the method: double(double C,double E) //count, entropy, return score
        {
            var counter = System.Threading.Interlocked.Increment(ref counter_CutByCode);

            try
            {
                await SemaphoreSlim_CutByCode.WaitAsync();

                if (counter != System.Threading.Interlocked.Read(ref counter_CutByCode))
                {
                    return(null);
                }
                const string namespaceName = "WikiDataAnalysis", className = "FooClass", methodName = "FooMethod";
                string       code =
                    "using System;" +
                    $"namespace {namespaceName}" +
                    "{" +
                    $"   class {className}" +
                    "   {" +
                    $"       public static double {methodName}(string S,int N,Func<string,int> C)" +
                    "       {" +
                    $"           {dataInput}" +
                    "       }" +
                    "   }" +
                    "}";
                System.Reflection.MethodInfo methodInfo;
                try
                {
                    Trace.Indent();
                    Trace.WriteLine($"Compiling... code length = {code.Length}");
                    methodInfo = Utils.DynamicCompile.GetMethod(code, namespaceName, className, methodName, "System");
                    var method = new Func <string, int, Func <string, int>, double>((s, n, c) => (double)methodInfo.Invoke(null, new object[] { s, n, c }));
                    Trace.WriteLine("Splitting...");
                    var           maxWordLength = int.Parse(IFdata.GetField("maxWordLength"));
                    var           probRatio     = double.Parse(IFdata.GetField("probRatio"));
                    var           bemsRatio     = double.Parse(IFdata.GetField("bemsRatio"));
                    StringBuilder sb_ret        = new StringBuilder();
                    long          cnt           = 0;
                    await Task.Run(() =>
                    {
                        var mainInputs = string.IsNullOrWhiteSpace(TXBdata.Text) ? (txbDataFileContent != null ? txbDataFileContent : data) : TXBdata.Text;
                        var inputs     = mainInputs.Split(' ', '\r', '\n', '\t');
                        if (ss_CutByCode == null)
                        {
                            ss_CutByCode = new SentenceSplitter(sa);
                        }
                        const int maxoutputLength = 10000;
                        bool appending            = true;
                        int progress       = 0, total_progress = inputs.Length;
                        var lastUpdateTime = DateTime.MinValue;
                        foreach (var input in inputs)
                        {
                            ++progress;
                            if ((DateTime.Now - lastUpdateTime).TotalSeconds > 0.5)
                            {
                                Trace.WriteLine($"Splitting... {progress}/{total_progress}");
                                lastUpdateTime = DateTime.Now;
                            }
                            var cutResult = ss_CutByCode.Split(input, maxWordLength, method, false);
                            cnt          += cutResult.Count;
                            if (sb_ret.Length + cutResult.Sum(s => (long)s.Length) > maxoutputLength)
                            {
                                appending = false;
                            }
                            if (appending)
                            {
                                sb_ret.AppendLine(string.Join(" ", cutResult));
                            }
                        }
                    });

                    Trace.WriteLine($"{cnt} words identified.");
                    return(sb_ret.ToString());
                }
                catch (Exception error) { return(error.ToString()); }
                finally { Trace.Unindent(); }
            }
            finally { lock (SemaphoreSlim_CutByCode) SemaphoreSlim_CutByCode.Release(); }
        }
예제 #2
0
 private async Task PerformSplit()
 {
     try
     {
         Trace.Indent();
         var maxWordLength = int.Parse(IFdata.GetField("maxWordLength"));
         var probRatio     = double.Parse(IFdata.GetField("probRatio"));
         var bemsRatio     = double.Parse(IFdata.GetField("bemsRatio"));
         CHBsplit.Enabled = false;
         string fileName = "output.txt";
         var    encoding = Encoding.UTF8;
         using (var writer = new StreamWriter(fileName, false, encoding))
         {
             TXBout.Clear();
             var d = new SentenceSplitter.WordIdentifiedEventHandler((word) =>
             {
                 writer.WriteLine(word);
                 TXBout.Invoke(new Action(() =>
                 {
                     if (TXBout.TextLength < 10000)
                     {
                         TXBout.AppendText($"{word}\r\n");
                         if (TXBout.TextLength >= 10000)
                         {
                             TXBout.AppendText("......(Cut)\r\n");
                         }
                     }
                 }));
             });
             var ss = new SentenceSplitter(trie, baseDataLength);
             try
             {
                 ss.WordIdentified += d;
                 Trace.WriteLine("Splitting...");
                 var  mainInputs = string.IsNullOrWhiteSpace(TXBdata.Text) ? (txbDataFileContent != null ? txbDataFileContent : data) : TXBdata.Text;
                 var  inputs     = mainInputs.Split(' ', '\r', '\n', '\t');
                 long cnt        = 0;
                 foreach (var input in inputs)
                 {
                     cnt += (await ss.SplitAsync(
                                 input,
                                 maxWordLength,
                                 probType,
                                 true)).Count;
                 }
                 Trace.WriteLine($"{cnt} words identified.");
             }
             catch (Exception error) { TXBout.Text = error.ToString(); }
             finally { ss.WordIdentified -= d; }
             writer.Close();
         }
     }
     catch (Exception error) { TXBout.Text = error.ToString(); }
     finally { Trace.Unindent(); CHBsplit.CheckState = CheckState.Indeterminate; CHBsplit.Enabled = true; }
 }