private async Task PerformSplit() { try { Trace.Indent(); var maxWordLength = int.Parse(IFdata.GetField("maxWordLength")); var probRatio = double.Parse(IFdata.GetField("probRatio")); var bemsRatio = double.Parse(IFdata.GetField("bemsRatio")); CHBsplit.Enabled = false; string fileName = "output.txt"; var encoding = Encoding.UTF8; using (var writer = new StreamWriter(fileName, false, encoding)) { TXBout.Clear(); var d = new SentenceSplitter.WordIdentifiedEventHandler((word) => { writer.WriteLine(word); TXBout.Invoke(new Action(() => { if (TXBout.TextLength < 10000) { TXBout.AppendText($"{word}\r\n"); if (TXBout.TextLength >= 10000) { TXBout.AppendText("......(Cut)\r\n"); } } })); }); var ss = new SentenceSplitter(trie, baseDataLength); try { ss.WordIdentified += d; Trace.WriteLine("Splitting..."); var mainInputs = string.IsNullOrWhiteSpace(TXBdata.Text) ? (txbDataFileContent != null ? txbDataFileContent : data) : TXBdata.Text; var inputs = mainInputs.Split(' ', '\r', '\n', '\t'); long cnt = 0; foreach (var input in inputs) { cnt += (await ss.SplitAsync( input, maxWordLength, probType, true)).Count; } Trace.WriteLine($"{cnt} words identified."); } catch (Exception error) { TXBout.Text = error.ToString(); } finally { ss.WordIdentified -= d; } writer.Close(); } } catch (Exception error) { TXBout.Text = error.ToString(); } finally { Trace.Unindent(); CHBsplit.CheckState = CheckState.Indeterminate; CHBsplit.Enabled = true; } }
private async void BTNexportList_Click(object sender, EventArgs e) { try { Trace.Indent(); int threshold = int.Parse(Microsoft.VisualBasic.Interaction.InputBox("Threshold?", "", "25")); List <Tuple <int, int> > s = new List <Tuple <int, int> >(); Trace.WriteLine("Searching..."); try { Trace.Indent(); using (StreamWriter writer = new StreamWriter("output.txt", false, Encoding.UTF8)) { TXBout.Clear(); long progress = 0; var lastUpdateTime = DateTime.Now; await sa.ListFrequentWords(threshold, new Func <string, Task>(async(str) => { ++progress; var count = sa.UpperBound(str) - sa.LowerBound(str); await writer.WriteLineAsync($"{str},{count}"); if (TXBout.TextLength < 100000) { TXBout.AppendText($"{str}\t{count}\r\n"); if (TXBout.TextLength >= 100000) { TXBout.AppendText("......(Cut)"); } } if ((DateTime.Now - lastUpdateTime).TotalSeconds > 0.5) { Trace.WriteLine($"{progress} words listed, Ex:{str}\t{count}"); lastUpdateTime = DateTime.Now; } })); } } finally { Trace.Unindent(); } Trace.Write("Done"); } catch (Exception error) { TXBout.Text = error.ToString(); } finally { Trace.Unindent(); } }
private async Task PerformSplit() { try { Trace.Indent(); CHBsplit.Enabled = false; string fileName = "output.txt"; var encoding = Encoding.UTF8; using (var writer = new StreamWriter(fileName, false, encoding)) { TXBout.Clear(); var d = new SentenceSplitter.WordIdentifiedEventHandler((word) => { writer.WriteLine(word); TXBout.Invoke(new Action(() => { if (TXBout.TextLength < 10000) { TXBout.AppendText($"{word}\r\n"); if (TXBout.TextLength >= 10000) { TXBout.AppendText("......(Cut)\r\n"); } } })); }); try { ss.WordIdentified += d; Trace.WriteLine("Splitting..."); var ans = await ss.SplitAsync( string.IsNullOrWhiteSpace(TXBdata.Text)?(txbDataFileContent != null ? txbDataFileContent : sa.S) : TXBdata.Text, maxWordLength, probType, CHBverbose.Checked); Trace.WriteLine($"{ans.Count} words identified."); } catch (Exception error) { TXBout.Text = error.ToString(); } finally { ss.WordIdentified -= d; } writer.Close(); } } catch (Exception error) { TXBout.Text = error.ToString(); } finally { Trace.Unindent(); CHBsplit.CheckState = CheckState.Indeterminate; CHBsplit.Enabled = true; } }
private async void TXBin_KeyDown(object sender, KeyEventArgs e) { if (e.KeyCode == Keys.Enter && e.Control) { switch (CBmethod.SelectedItem) { case "Send Socket": { try { Trace.Indent(); Trace.WriteLine($"Initializing Socket..."); var input = TXBin.Text.Split('\n').Select(s => s.TrimEnd('\r')).ToArray(); var port = int.Parse(input[0]); TXBout.Text = $"port: {port}\r\nmsg: {input[1]}\r\n"; Socket client_sock = new Socket(AddressFamily.InterNetwork, SocketType.Stream, ProtocolType.Tcp); client_sock.Connect(new IPEndPoint(IPAddress.Parse("127.0.0.1"), port)); Trace.WriteLine($"Connected: {client_sock.Connected}"); client_sock.Send(Encoding.UTF8.GetBytes(input[1])); client_sock.Send(new[] { (byte)'\0' }); var thread = new Thread(() => { using (var stream = new NetworkStream(client_sock)) { List <byte> data = new List <byte>(); const byte target = (byte)'\0'; while (true) { var _b = stream.ReadByte(); if (_b == -1) { Trace.WriteLine("Connection closed."); } var b = (byte)_b; if (b == target) { Trace.WriteLine($"receive_length={data.Count}"); var s = Encoding.UTF8.GetString(data.ToArray()); TXBout.Invoke(new Action(() => TXBout.AppendText(s + "\r\n"))); } else { data.Add(b); } } } }); thread.Start(); new Thread(() => { Thread.Sleep(1000 * 60); thread.Abort(); }); } catch (Exception error) { TXBout.Text = error.ToString(); } finally { Trace.Unindent(); } } break; case "Cut by Code": { string s = await CutByCode(TXBin.Text); if (s != null) { TXBout.Text = s; } } break; } } }
private async void BTNiteration_Click(object sender, EventArgs e) { try { Trace.Indent(); int iterCount = int.Parse(Microsoft.VisualBasic.Interaction.InputBox("Iteration count?", "", "1")); for (int iterIdx = 0; iterIdx < iterCount; iterIdx++) { Trace.Unindent(); Trace.Indent(); var iterationStatus = $"Iteration: {iterIdx + 1}/{iterCount}"; TXBout.AppendText(iterationStatus + "\r\n"); Trace.WriteLine(iterationStatus); try { Trace.Indent(); var maxWordLength = int.Parse(IFdata.GetField("maxWordLength")); var probRatio = double.Parse(IFdata.GetField("probRatio")); var bemsRatio = double.Parse(IFdata.GetField("bemsRatio")); var words = new List <string>(); var ss = new SentenceSplitter(trie, baseDataLength); List <FPLtype> fpl = null; Trace.WriteLine("Getting FPL..."); await Task.Run(() => fpl = SentenceSplitter.MethodsForTrie.FrequencyPerLength(trie)); string[] ddd = null; Trace.WriteLine("Preprocessing data..."); var data = string.IsNullOrWhiteSpace(TXBdata.Text) ? (txbDataFileContent != null ? txbDataFileContent : this.data) : TXBdata.Text; await Task.Run(() => ddd = data.Split(' ')); { int progress = 0, total_progress = ddd.Length, percent = -1; object syncRoot = new object(); Trace.WriteLine("Splitting..."); await Task.Run(() => Parallel.For(0, (ddd.Length + 9) / 10, _ => { List <string> ans = new List <string>(); for (int i = _ * 10; i < (_ + 1) * 10 && i < ddd.Length; i++) { { var p = System.Threading.Interlocked.Increment(ref progress) * 1000L / total_progress; if (p > percent) { percent = (int)p; Trace.WriteLine($"Splitting... {0.1 * percent}%"); } } ans.AddRange(ss.Split( ddd[i], maxWordLength, fpl, probType, false)); } lock (syncRoot) words.AddRange(ans); })); Trace.Assert(progress == total_progress); } Trace.WriteLine($"{words.Count} words / {data.Length} chars identified."); TXBout.Text = iterationStatus + "\r\n"; for (int i = 0; i < 1000 && i < words.Count; i++) { TXBout.AppendText(words[i] + " "); } var decayRatio = double.Parse(IFdata.GetField("decayRatio")); await Task.Run(() => { Trace.WriteLine($"Decaying... ratio = {decayRatio}"); long cnt = 0; trie.Traverse(c => { }, () => { }, c => cnt += c); Trace.Write($"\t{cnt}→"); trie.Decay(decayRatio); cnt = 0; trie.Traverse(c => { }, () => { }, c => cnt += c); Trace.Write($"{cnt} OK"); try { Trace.Indent(); int progress = 0, total_progress = words.Count, percent = -1; foreach (var word in words) { if (++progress * 100L / total_progress > percent) { Trace.WriteLine($"{words.Count} words / {data.Length} chars inserted. {++percent}%"); } trie.Insert(word); } } finally { Trace.Unindent(); } }); Trace.WriteLine("Saving Trie..."); var fileName = $"Trie {DateTime.Now.ToString("yyyy-MM-dd HH-mm-ss.fffffff")}.sav"; using (var stream = new FileStream(fileName, FileMode.Create)) { await Task.Run(() => trie.Save(stream)); } Trace.Unindent(); Trace.Indent(); Trace.WriteLine("OK: " + fileName); } catch (Exception error) { TXBout.Text = error.ToString(); } finally { Trace.Unindent(); } } TXBout.AppendText("\r\nOK"); } catch (Exception error) { TXBout.Text = error.ToString(); } finally { Trace.Unindent(); } }