async Task <string> CutByCode(string dataInput)//the method: double(double C,double E) //count, entropy, return score
        {
            var counter = System.Threading.Interlocked.Increment(ref counter_CutByCode);

            try
            {
                await SemaphoreSlim_CutByCode.WaitAsync();

                if (counter != System.Threading.Interlocked.Read(ref counter_CutByCode))
                {
                    return(null);
                }
                const string namespaceName = "WikiDataAnalysis", className = "FooClass", methodName = "FooMethod";
                string       code =
                    "using System;" +
                    $"namespace {namespaceName}" +
                    "{" +
                    $"   class {className}" +
                    "   {" +
                    $"       public static double {methodName}(string S,int N,Func<string,int> C)" +
                    "       {" +
                    $"           {dataInput}" +
                    "       }" +
                    "   }" +
                    "}";
                System.Reflection.MethodInfo methodInfo;
                try
                {
                    Trace.Indent();
                    Trace.WriteLine($"Compiling... code length = {code.Length}");
                    methodInfo = Utils.DynamicCompile.GetMethod(code, namespaceName, className, methodName, "System");
                    var method = new Func <string, int, Func <string, int>, double>((s, n, c) => (double)methodInfo.Invoke(null, new object[] { s, n, c }));
                    Trace.WriteLine("Splitting...");
                    var           maxWordLength = int.Parse(IFdata.GetField("maxWordLength"));
                    var           probRatio     = double.Parse(IFdata.GetField("probRatio"));
                    var           bemsRatio     = double.Parse(IFdata.GetField("bemsRatio"));
                    StringBuilder sb_ret        = new StringBuilder();
                    long          cnt           = 0;
                    await Task.Run(() =>
                    {
                        var mainInputs = string.IsNullOrWhiteSpace(TXBdata.Text) ? (txbDataFileContent != null ? txbDataFileContent : data) : TXBdata.Text;
                        var inputs     = mainInputs.Split(' ', '\r', '\n', '\t');
                        if (ss_CutByCode == null)
                        {
                            ss_CutByCode = new SentenceSplitter(sa);
                        }
                        const int maxoutputLength = 10000;
                        bool appending            = true;
                        int progress       = 0, total_progress = inputs.Length;
                        var lastUpdateTime = DateTime.MinValue;
                        foreach (var input in inputs)
                        {
                            ++progress;
                            if ((DateTime.Now - lastUpdateTime).TotalSeconds > 0.5)
                            {
                                Trace.WriteLine($"Splitting... {progress}/{total_progress}");
                                lastUpdateTime = DateTime.Now;
                            }
                            var cutResult = ss_CutByCode.Split(input, maxWordLength, method, false);
                            cnt          += cutResult.Count;
                            if (sb_ret.Length + cutResult.Sum(s => (long)s.Length) > maxoutputLength)
                            {
                                appending = false;
                            }
                            if (appending)
                            {
                                sb_ret.AppendLine(string.Join(" ", cutResult));
                            }
                        }
                    });

                    Trace.WriteLine($"{cnt} words identified.");
                    return(sb_ret.ToString());
                }
                catch (Exception error) { return(error.ToString()); }
                finally { Trace.Unindent(); }
            }
            finally { lock (SemaphoreSlim_CutByCode) SemaphoreSlim_CutByCode.Release(); }
        }
Exemple #2
0
        private async void BTNiteration_Click(object sender, EventArgs e)
        {
            try
            {
                Trace.Indent();
                int iterCount = int.Parse(Microsoft.VisualBasic.Interaction.InputBox("Iteration count?", "", "1"));
                for (int iterIdx = 0; iterIdx < iterCount; iterIdx++)
                {
                    Trace.Unindent();
                    Trace.Indent();
                    var iterationStatus = $"Iteration: {iterIdx + 1}/{iterCount}";
                    TXBout.AppendText(iterationStatus + "\r\n");
                    Trace.WriteLine(iterationStatus);
                    try
                    {
                        Trace.Indent();
                        var            maxWordLength = int.Parse(IFdata.GetField("maxWordLength"));
                        var            probRatio     = double.Parse(IFdata.GetField("probRatio"));
                        var            bemsRatio     = double.Parse(IFdata.GetField("bemsRatio"));
                        var            words         = new List <string>();
                        var            ss            = new SentenceSplitter(trie, baseDataLength);
                        List <FPLtype> fpl           = null;
                        Trace.WriteLine("Getting FPL...");
                        await Task.Run(() => fpl = SentenceSplitter.MethodsForTrie.FrequencyPerLength(trie));

                        string[] ddd = null;
                        Trace.WriteLine("Preprocessing data...");
                        var data = string.IsNullOrWhiteSpace(TXBdata.Text) ? (txbDataFileContent != null ? txbDataFileContent : this.data) : TXBdata.Text;
                        await Task.Run(() => ddd = data.Split(' '));

                        {
                            int    progress = 0, total_progress = ddd.Length, percent = -1;
                            object syncRoot = new object();
                            Trace.WriteLine("Splitting...");
                            await Task.Run(() => Parallel.For(0, (ddd.Length + 9) / 10, _ =>
                            {
                                List <string> ans = new List <string>();
                                for (int i = _ * 10; i < (_ + 1) * 10 && i < ddd.Length; i++)
                                {
                                    {
                                        var p = System.Threading.Interlocked.Increment(ref progress) * 1000L / total_progress;
                                        if (p > percent)
                                        {
                                            percent = (int)p;
                                            Trace.WriteLine($"Splitting... {0.1 * percent}%");
                                        }
                                    }
                                    ans.AddRange(ss.Split(
                                                     ddd[i],
                                                     maxWordLength,
                                                     fpl,
                                                     probType,
                                                     false));
                                }
                                lock (syncRoot) words.AddRange(ans);
                            }));

                            Trace.Assert(progress == total_progress);
                        }
                        Trace.WriteLine($"{words.Count} words / {data.Length} chars identified.");
                        TXBout.Text = iterationStatus + "\r\n";
                        for (int i = 0; i < 1000 && i < words.Count; i++)
                        {
                            TXBout.AppendText(words[i] + " ");
                        }
                        var decayRatio = double.Parse(IFdata.GetField("decayRatio"));
                        await Task.Run(() =>
                        {
                            Trace.WriteLine($"Decaying... ratio = {decayRatio}");
                            long cnt = 0;
                            trie.Traverse(c => { }, () => { }, c => cnt += c);
                            Trace.Write($"\t{cnt}→");
                            trie.Decay(decayRatio);
                            cnt = 0;
                            trie.Traverse(c => { }, () => { }, c => cnt += c);
                            Trace.Write($"{cnt} OK");
                            try
                            {
                                Trace.Indent();
                                int progress = 0, total_progress = words.Count, percent = -1;
                                foreach (var word in words)
                                {
                                    if (++progress * 100L / total_progress > percent)
                                    {
                                        Trace.WriteLine($"{words.Count} words / {data.Length} chars inserted. {++percent}%");
                                    }
                                    trie.Insert(word);
                                }
                            }
                            finally { Trace.Unindent(); }
                        });

                        Trace.WriteLine("Saving Trie...");
                        var fileName = $"Trie {DateTime.Now.ToString("yyyy-MM-dd HH-mm-ss.fffffff")}.sav";
                        using (var stream = new FileStream(fileName, FileMode.Create))
                        {
                            await Task.Run(() => trie.Save(stream));
                        }
                        Trace.Unindent();
                        Trace.Indent();
                        Trace.WriteLine("OK: " + fileName);
                    }
                    catch (Exception error) { TXBout.Text = error.ToString(); }
                    finally { Trace.Unindent(); }
                }
                TXBout.AppendText("\r\nOK");
            }
            catch (Exception error) { TXBout.Text = error.ToString(); }
            finally { Trace.Unindent(); }
        }