예제 #1
0
 private async Task PerformSplit()
 {
     try
     {
         Trace.Indent();
         var maxWordLength = int.Parse(IFdata.GetField("maxWordLength"));
         var probRatio     = double.Parse(IFdata.GetField("probRatio"));
         var bemsRatio     = double.Parse(IFdata.GetField("bemsRatio"));
         CHBsplit.Enabled = false;
         string fileName = "output.txt";
         var    encoding = Encoding.UTF8;
         using (var writer = new StreamWriter(fileName, false, encoding))
         {
             TXBout.Clear();
             var d = new SentenceSplitter.WordIdentifiedEventHandler((word) =>
             {
                 writer.WriteLine(word);
                 TXBout.Invoke(new Action(() =>
                 {
                     if (TXBout.TextLength < 10000)
                     {
                         TXBout.AppendText($"{word}\r\n");
                         if (TXBout.TextLength >= 10000)
                         {
                             TXBout.AppendText("......(Cut)\r\n");
                         }
                     }
                 }));
             });
             var ss = new SentenceSplitter(trie, baseDataLength);
             try
             {
                 ss.WordIdentified += d;
                 Trace.WriteLine("Splitting...");
                 var  mainInputs = string.IsNullOrWhiteSpace(TXBdata.Text) ? (txbDataFileContent != null ? txbDataFileContent : data) : TXBdata.Text;
                 var  inputs     = mainInputs.Split(' ', '\r', '\n', '\t');
                 long cnt        = 0;
                 foreach (var input in inputs)
                 {
                     cnt += (await ss.SplitAsync(
                                 input,
                                 maxWordLength,
                                 probType,
                                 true)).Count;
                 }
                 Trace.WriteLine($"{cnt} words identified.");
             }
             catch (Exception error) { TXBout.Text = error.ToString(); }
             finally { ss.WordIdentified -= d; }
             writer.Close();
         }
     }
     catch (Exception error) { TXBout.Text = error.ToString(); }
     finally { Trace.Unindent(); CHBsplit.CheckState = CheckState.Indeterminate; CHBsplit.Enabled = true; }
 }
예제 #2
0
        private async void BTNexportList_Click(object sender, EventArgs e)
        {
            try
            {
                Trace.Indent();
                int threshold = int.Parse(Microsoft.VisualBasic.Interaction.InputBox("Threshold?", "", "25"));

                List <Tuple <int, int> > s = new List <Tuple <int, int> >();
                Trace.WriteLine("Searching...");
                try
                {
                    Trace.Indent();
                    using (StreamWriter writer = new StreamWriter("output.txt", false, Encoding.UTF8))
                    {
                        TXBout.Clear();
                        long progress       = 0;
                        var  lastUpdateTime = DateTime.Now;
                        await sa.ListFrequentWords(threshold, new Func <string, Task>(async(str) =>
                        {
                            ++progress;
                            var count = sa.UpperBound(str) - sa.LowerBound(str);
                            await writer.WriteLineAsync($"{str},{count}");
                            if (TXBout.TextLength < 100000)
                            {
                                TXBout.AppendText($"{str}\t{count}\r\n");
                                if (TXBout.TextLength >= 100000)
                                {
                                    TXBout.AppendText("......(Cut)");
                                }
                            }
                            if ((DateTime.Now - lastUpdateTime).TotalSeconds > 0.5)
                            {
                                Trace.WriteLine($"{progress} words listed, Ex:{str}\t{count}");
                                lastUpdateTime = DateTime.Now;
                            }
                        }));
                    }
                }
                finally { Trace.Unindent(); }
                Trace.Write("Done");
            }
            catch (Exception error)
            {
                TXBout.Text = error.ToString();
            }
            finally { Trace.Unindent(); }
        }
예제 #3
0
        private async Task PerformSplit()
        {
            try
            {
                Trace.Indent();
                CHBsplit.Enabled = false;
                string fileName = "output.txt";
                var    encoding = Encoding.UTF8;
                using (var writer = new StreamWriter(fileName, false, encoding))
                {
                    TXBout.Clear();
                    var d = new SentenceSplitter.WordIdentifiedEventHandler((word) =>
                    {
                        writer.WriteLine(word);
                        TXBout.Invoke(new Action(() =>
                        {
                            if (TXBout.TextLength < 10000)
                            {
                                TXBout.AppendText($"{word}\r\n");
                                if (TXBout.TextLength >= 10000)
                                {
                                    TXBout.AppendText("......(Cut)\r\n");
                                }
                            }
                        }));
                    });
                    try
                    {
                        ss.WordIdentified += d;
                        Trace.WriteLine("Splitting...");
                        var ans = await ss.SplitAsync(
                            string.IsNullOrWhiteSpace(TXBdata.Text)?(txbDataFileContent != null ? txbDataFileContent : sa.S) : TXBdata.Text,
                            maxWordLength,
                            probType,
                            CHBverbose.Checked);

                        Trace.WriteLine($"{ans.Count} words identified.");
                    }
                    catch (Exception error) { TXBout.Text = error.ToString(); }
                    finally { ss.WordIdentified -= d; }
                    writer.Close();
                }
            }
            catch (Exception error) { TXBout.Text = error.ToString(); }
            finally { Trace.Unindent(); CHBsplit.CheckState = CheckState.Indeterminate; CHBsplit.Enabled = true; }
        }
예제 #4
0
        private async void TXBin_KeyDown(object sender, KeyEventArgs e)
        {
            if (e.KeyCode == Keys.Enter && e.Control)
            {
                switch (CBmethod.SelectedItem)
                {
                case "Send Socket":
                {
                    try
                    {
                        Trace.Indent();
                        Trace.WriteLine($"Initializing Socket...");
                        var input = TXBin.Text.Split('\n').Select(s => s.TrimEnd('\r')).ToArray();
                        var port  = int.Parse(input[0]);
                        TXBout.Text = $"port: {port}\r\nmsg: {input[1]}\r\n";
                        Socket client_sock = new Socket(AddressFamily.InterNetwork, SocketType.Stream, ProtocolType.Tcp);
                        client_sock.Connect(new IPEndPoint(IPAddress.Parse("127.0.0.1"), port));
                        Trace.WriteLine($"Connected: {client_sock.Connected}");
                        client_sock.Send(Encoding.UTF8.GetBytes(input[1]));
                        client_sock.Send(new[] { (byte)'\0' });
                        var thread = new Thread(() =>
                            {
                                using (var stream = new NetworkStream(client_sock))
                                {
                                    List <byte> data  = new List <byte>();
                                    const byte target = (byte)'\0';
                                    while (true)
                                    {
                                        var _b = stream.ReadByte();
                                        if (_b == -1)
                                        {
                                            Trace.WriteLine("Connection closed.");
                                        }
                                        var b = (byte)_b;
                                        if (b == target)
                                        {
                                            Trace.WriteLine($"receive_length={data.Count}");
                                            var s = Encoding.UTF8.GetString(data.ToArray());
                                            TXBout.Invoke(new Action(() => TXBout.AppendText(s + "\r\n")));
                                        }
                                        else
                                        {
                                            data.Add(b);
                                        }
                                    }
                                }
                            });
                        thread.Start();
                        new Thread(() => { Thread.Sleep(1000 * 60); thread.Abort(); });
                    }
                    catch (Exception error)
                    {
                        TXBout.Text = error.ToString();
                    }
                    finally { Trace.Unindent(); }
                }
                break;

                case "Cut by Code":
                {
                    string s = await CutByCode(TXBin.Text); if (s != null)
                    {
                        TXBout.Text = s;
                    }
                }
                break;
                }
            }
        }
예제 #5
0
        private async void BTNiteration_Click(object sender, EventArgs e)
        {
            try
            {
                Trace.Indent();
                int iterCount = int.Parse(Microsoft.VisualBasic.Interaction.InputBox("Iteration count?", "", "1"));
                for (int iterIdx = 0; iterIdx < iterCount; iterIdx++)
                {
                    Trace.Unindent();
                    Trace.Indent();
                    var iterationStatus = $"Iteration: {iterIdx + 1}/{iterCount}";
                    TXBout.AppendText(iterationStatus + "\r\n");
                    Trace.WriteLine(iterationStatus);
                    try
                    {
                        Trace.Indent();
                        var            maxWordLength = int.Parse(IFdata.GetField("maxWordLength"));
                        var            probRatio     = double.Parse(IFdata.GetField("probRatio"));
                        var            bemsRatio     = double.Parse(IFdata.GetField("bemsRatio"));
                        var            words         = new List <string>();
                        var            ss            = new SentenceSplitter(trie, baseDataLength);
                        List <FPLtype> fpl           = null;
                        Trace.WriteLine("Getting FPL...");
                        await Task.Run(() => fpl = SentenceSplitter.MethodsForTrie.FrequencyPerLength(trie));

                        string[] ddd = null;
                        Trace.WriteLine("Preprocessing data...");
                        var data = string.IsNullOrWhiteSpace(TXBdata.Text) ? (txbDataFileContent != null ? txbDataFileContent : this.data) : TXBdata.Text;
                        await Task.Run(() => ddd = data.Split(' '));

                        {
                            int    progress = 0, total_progress = ddd.Length, percent = -1;
                            object syncRoot = new object();
                            Trace.WriteLine("Splitting...");
                            await Task.Run(() => Parallel.For(0, (ddd.Length + 9) / 10, _ =>
                            {
                                List <string> ans = new List <string>();
                                for (int i = _ * 10; i < (_ + 1) * 10 && i < ddd.Length; i++)
                                {
                                    {
                                        var p = System.Threading.Interlocked.Increment(ref progress) * 1000L / total_progress;
                                        if (p > percent)
                                        {
                                            percent = (int)p;
                                            Trace.WriteLine($"Splitting... {0.1 * percent}%");
                                        }
                                    }
                                    ans.AddRange(ss.Split(
                                                     ddd[i],
                                                     maxWordLength,
                                                     fpl,
                                                     probType,
                                                     false));
                                }
                                lock (syncRoot) words.AddRange(ans);
                            }));

                            Trace.Assert(progress == total_progress);
                        }
                        Trace.WriteLine($"{words.Count} words / {data.Length} chars identified.");
                        TXBout.Text = iterationStatus + "\r\n";
                        for (int i = 0; i < 1000 && i < words.Count; i++)
                        {
                            TXBout.AppendText(words[i] + " ");
                        }
                        var decayRatio = double.Parse(IFdata.GetField("decayRatio"));
                        await Task.Run(() =>
                        {
                            Trace.WriteLine($"Decaying... ratio = {decayRatio}");
                            long cnt = 0;
                            trie.Traverse(c => { }, () => { }, c => cnt += c);
                            Trace.Write($"\t{cnt}→");
                            trie.Decay(decayRatio);
                            cnt = 0;
                            trie.Traverse(c => { }, () => { }, c => cnt += c);
                            Trace.Write($"{cnt} OK");
                            try
                            {
                                Trace.Indent();
                                int progress = 0, total_progress = words.Count, percent = -1;
                                foreach (var word in words)
                                {
                                    if (++progress * 100L / total_progress > percent)
                                    {
                                        Trace.WriteLine($"{words.Count} words / {data.Length} chars inserted. {++percent}%");
                                    }
                                    trie.Insert(word);
                                }
                            }
                            finally { Trace.Unindent(); }
                        });

                        Trace.WriteLine("Saving Trie...");
                        var fileName = $"Trie {DateTime.Now.ToString("yyyy-MM-dd HH-mm-ss.fffffff")}.sav";
                        using (var stream = new FileStream(fileName, FileMode.Create))
                        {
                            await Task.Run(() => trie.Save(stream));
                        }
                        Trace.Unindent();
                        Trace.Indent();
                        Trace.WriteLine("OK: " + fileName);
                    }
                    catch (Exception error) { TXBout.Text = error.ToString(); }
                    finally { Trace.Unindent(); }
                }
                TXBout.AppendText("\r\nOK");
            }
            catch (Exception error) { TXBout.Text = error.ToString(); }
            finally { Trace.Unindent(); }
        }