private void cmdManaged_Click(object sender, EventArgs e) { //some lang eg. Thai, Lao, need dictionary breaking //we use dic data from icu-project //1. create dictionary based breaking engine //TODO: dic should be read once var dicProvider = new IcuSimpleTextFileDictionaryProvider() { DataDir = "../../../icu62/brkitr" }; CustomBreakerBuilder.Setup(dicProvider); CustomBreaker breaker1 = CustomBreakerBuilder.NewCustomBreaker(); breaker1.BreakNumberAfterText = true; char[] test = this.textBox1.Text.ToCharArray(); this.listBox1.Items.Clear(); breaker1.SetNewBreakHandler(vis => { BreakSpan span = vis.GetBreakSpan(); string s = new string(test, span.startAt, span.len); this.listBox1.Items.Add(span.startAt + " " + s); }); breaker1.BreakWords(test, 0, test.Length); //foreach (BreakSpan span in breaker1.GetBreakSpanIter()) //{ // // this.listBox1.Items.Add(span.startAt + " " + s); //} }
void InitNewCustomTextBreakerAndBreakWords(char[] inputBuffer) { //--------------------------- //we don't have to create a new text breaker everytime. //we can reuse it.*** //this is just a demonstration. //--------------------------- //some lang eg. Thai, Lao, need dictionary breaking //we use dic data from icu-project //1. create dictionary based breaking engine //TODO: dic should be read once var dicProvider = new IcuSimpleTextFileDictionaryProvider() { DataDir = "../../../icu62/brkitr" }; CustomBreakerBuilder.Setup(dicProvider); CustomBreaker breaker1 = CustomBreakerBuilder.NewCustomBreaker(); //when we want to break into a group of consecutive unicode ranges. (this does not use Dictionry breaker) breaker1.EngBreakingEngine.SurrogatePairBreakingOption = (SurrogatePairBreakingOption)cmbSurrogatePairBreakOptions.SelectedItem; breaker1.UseUnicodeRangeBreaker = chkUseUnicodeRangeBreaker.Checked; breaker1.BreakNumberAfterText = true; this.listBox1.Items.Clear(); breaker1.SetNewBreakHandler(vis => { BreakSpan span = vis.GetBreakSpan(); string s = new string(inputBuffer, span.startAt, span.len); this.listBox1.Items.Add(span.startAt + " " + s); }); breaker1.BreakWords(inputBuffer, 0, inputBuffer.Length); //foreach (BreakSpan span in breaker1.GetBreakSpanIter()) //{ // // this.listBox1.Items.Add(span.startAt + " " + s); //} }
private void button1_Click(object sender, EventArgs e) { //char[] test = this.textBox1.Text.ToCharArray(); //string test_str = "حب"; this.listBox1.Items.Clear(); string test_str = "یہ ایک (car) ہے۔"; char[] test = test_str.ToCharArray(); var dicProvider = new IcuSimpleTextFileDictionaryProvider() { DataDir = "../../../icu58/brkitr_src" }; CustomBreakerBuilder.Setup(dicProvider); CustomBreaker breaker1 = CustomBreakerBuilder.NewCustomBreaker(); breaker1.SetNewBreakHandler(vis => { BreakSpan span = vis.GetBreakSpan(); string s = new string(test, span.startAt, span.len); this.listBox1.Items.Add(span.startAt + " " + s); }); //just break, do nothing about result breaker1.BreakWords(test); //for (int i = 0; i < outputList.Count - 1; i++) //{ // Assert.AreEqual // ( // output[i], // input.Substring(outputList[i], outputList[i + 1] - outputList[i]) // ); //} }