private async Task PerformSplit() { try { Trace.Indent(); var maxWordLength = int.Parse(IFdata.GetField("maxWordLength")); var probRatio = double.Parse(IFdata.GetField("probRatio")); var bemsRatio = double.Parse(IFdata.GetField("bemsRatio")); CHBsplit.Enabled = false; string fileName = "output.txt"; var encoding = Encoding.UTF8; using (var writer = new StreamWriter(fileName, false, encoding)) { TXBout.Clear(); var d = new SentenceSplitter.WordIdentifiedEventHandler((word) => { writer.WriteLine(word); TXBout.Invoke(new Action(() => { if (TXBout.TextLength < 10000) { TXBout.AppendText($"{word}\r\n"); if (TXBout.TextLength >= 10000) { TXBout.AppendText("......(Cut)\r\n"); } } })); }); var ss = new SentenceSplitter(trie, baseDataLength); try { ss.WordIdentified += d; Trace.WriteLine("Splitting..."); var mainInputs = string.IsNullOrWhiteSpace(TXBdata.Text) ? (txbDataFileContent != null ? txbDataFileContent : data) : TXBdata.Text; var inputs = mainInputs.Split(' ', '\r', '\n', '\t'); long cnt = 0; foreach (var input in inputs) { cnt += (await ss.SplitAsync( input, maxWordLength, probType, true)).Count; } Trace.WriteLine($"{cnt} words identified."); } catch (Exception error) { TXBout.Text = error.ToString(); } finally { ss.WordIdentified -= d; } writer.Close(); } } catch (Exception error) { TXBout.Text = error.ToString(); } finally { Trace.Unindent(); CHBsplit.CheckState = CheckState.Indeterminate; CHBsplit.Enabled = true; } }
private async void BTNexportList_Click(object sender, EventArgs e) { try { Trace.Indent(); int threshold = int.Parse(Microsoft.VisualBasic.Interaction.InputBox("Threshold?", "", "25")); List <Tuple <int, int> > s = new List <Tuple <int, int> >(); Trace.WriteLine("Searching..."); try { Trace.Indent(); using (StreamWriter writer = new StreamWriter("output.txt", false, Encoding.UTF8)) { TXBout.Clear(); long progress = 0; var lastUpdateTime = DateTime.Now; await sa.ListFrequentWords(threshold, new Func <string, Task>(async(str) => { ++progress; var count = sa.UpperBound(str) - sa.LowerBound(str); await writer.WriteLineAsync($"{str},{count}"); if (TXBout.TextLength < 100000) { TXBout.AppendText($"{str}\t{count}\r\n"); if (TXBout.TextLength >= 100000) { TXBout.AppendText("......(Cut)"); } } if ((DateTime.Now - lastUpdateTime).TotalSeconds > 0.5) { Trace.WriteLine($"{progress} words listed, Ex:{str}\t{count}"); lastUpdateTime = DateTime.Now; } })); } } finally { Trace.Unindent(); } Trace.Write("Done"); } catch (Exception error) { TXBout.Text = error.ToString(); } finally { Trace.Unindent(); } }
private async Task PerformSplit() { try { Trace.Indent(); CHBsplit.Enabled = false; string fileName = "output.txt"; var encoding = Encoding.UTF8; using (var writer = new StreamWriter(fileName, false, encoding)) { TXBout.Clear(); var d = new SentenceSplitter.WordIdentifiedEventHandler((word) => { writer.WriteLine(word); TXBout.Invoke(new Action(() => { if (TXBout.TextLength < 10000) { TXBout.AppendText($"{word}\r\n"); if (TXBout.TextLength >= 10000) { TXBout.AppendText("......(Cut)\r\n"); } } })); }); try { ss.WordIdentified += d; Trace.WriteLine("Splitting..."); var ans = await ss.SplitAsync( string.IsNullOrWhiteSpace(TXBdata.Text)?(txbDataFileContent != null ? txbDataFileContent : sa.S) : TXBdata.Text, maxWordLength, probType, CHBverbose.Checked); Trace.WriteLine($"{ans.Count} words identified."); } catch (Exception error) { TXBout.Text = error.ToString(); } finally { ss.WordIdentified -= d; } writer.Close(); } } catch (Exception error) { TXBout.Text = error.ToString(); } finally { Trace.Unindent(); CHBsplit.CheckState = CheckState.Indeterminate; CHBsplit.Enabled = true; } }