async Task <string> CutByCode(string dataInput)//the method: double(double C,double E) //count, entropy, return score { var counter = System.Threading.Interlocked.Increment(ref counter_CutByCode); try { await SemaphoreSlim_CutByCode.WaitAsync(); if (counter != System.Threading.Interlocked.Read(ref counter_CutByCode)) { return(null); } const string namespaceName = "WikiDataAnalysis", className = "FooClass", methodName = "FooMethod"; string code = "using System;" + $"namespace {namespaceName}" + "{" + $" class {className}" + " {" + $" public static double {methodName}(string S,int N,Func<string,int> C)" + " {" + $" {dataInput}" + " }" + " }" + "}"; System.Reflection.MethodInfo methodInfo; try { Trace.Indent(); Trace.WriteLine($"Compiling... code length = {code.Length}"); methodInfo = Utils.DynamicCompile.GetMethod(code, namespaceName, className, methodName, "System"); var method = new Func <string, int, Func <string, int>, double>((s, n, c) => (double)methodInfo.Invoke(null, new object[] { s, n, c })); Trace.WriteLine("Splitting..."); var maxWordLength = int.Parse(IFdata.GetField("maxWordLength")); var probRatio = double.Parse(IFdata.GetField("probRatio")); var bemsRatio = double.Parse(IFdata.GetField("bemsRatio")); StringBuilder sb_ret = new StringBuilder(); long cnt = 0; await Task.Run(() => { var mainInputs = string.IsNullOrWhiteSpace(TXBdata.Text) ? (txbDataFileContent != null ? txbDataFileContent : data) : TXBdata.Text; var inputs = mainInputs.Split(' ', '\r', '\n', '\t'); if (ss_CutByCode == null) { ss_CutByCode = new SentenceSplitter(sa); } const int maxoutputLength = 10000; bool appending = true; int progress = 0, total_progress = inputs.Length; var lastUpdateTime = DateTime.MinValue; foreach (var input in inputs) { ++progress; if ((DateTime.Now - lastUpdateTime).TotalSeconds > 0.5) { Trace.WriteLine($"Splitting... {progress}/{total_progress}"); lastUpdateTime = DateTime.Now; } var cutResult = ss_CutByCode.Split(input, maxWordLength, method, false); cnt += cutResult.Count; if (sb_ret.Length + cutResult.Sum(s => (long)s.Length) > maxoutputLength) { appending = false; } if (appending) { sb_ret.AppendLine(string.Join(" ", cutResult)); } } }); Trace.WriteLine($"{cnt} words identified."); return(sb_ret.ToString()); } catch (Exception error) { return(error.ToString()); } finally { Trace.Unindent(); } } finally { lock (SemaphoreSlim_CutByCode) SemaphoreSlim_CutByCode.Release(); } }
private async Task PerformSplit() { try { Trace.Indent(); var maxWordLength = int.Parse(IFdata.GetField("maxWordLength")); var probRatio = double.Parse(IFdata.GetField("probRatio")); var bemsRatio = double.Parse(IFdata.GetField("bemsRatio")); CHBsplit.Enabled = false; string fileName = "output.txt"; var encoding = Encoding.UTF8; using (var writer = new StreamWriter(fileName, false, encoding)) { TXBout.Clear(); var d = new SentenceSplitter.WordIdentifiedEventHandler((word) => { writer.WriteLine(word); TXBout.Invoke(new Action(() => { if (TXBout.TextLength < 10000) { TXBout.AppendText($"{word}\r\n"); if (TXBout.TextLength >= 10000) { TXBout.AppendText("......(Cut)\r\n"); } } })); }); var ss = new SentenceSplitter(trie, baseDataLength); try { ss.WordIdentified += d; Trace.WriteLine("Splitting..."); var mainInputs = string.IsNullOrWhiteSpace(TXBdata.Text) ? (txbDataFileContent != null ? txbDataFileContent : data) : TXBdata.Text; var inputs = mainInputs.Split(' ', '\r', '\n', '\t'); long cnt = 0; foreach (var input in inputs) { cnt += (await ss.SplitAsync( input, maxWordLength, probType, true)).Count; } Trace.WriteLine($"{cnt} words identified."); } catch (Exception error) { TXBout.Text = error.ToString(); } finally { ss.WordIdentified -= d; } writer.Close(); } } catch (Exception error) { TXBout.Text = error.ToString(); } finally { Trace.Unindent(); CHBsplit.CheckState = CheckState.Indeterminate; CHBsplit.Enabled = true; } }