internal PhraseTextItem(StemmerBreaker.Runner runner, string _text) { text = _text; var st = 0; //0..in ftxText, 1..in bracket, 2..in sound text, 3..after sound text short idx = 0 /*act char*/; short brStart = -1; /*bracket start*/ short textStart = 0; /*normal text start*/ var posLens = new List <TPosLen>(); Action addBeforeBracket = () => { if (textStart == idx) { return; } var str = _text.Substring(textStart, idx - textStart); runner.wordBreak(str, (type, pos, len) => { if (type == StemmerBreaker.PutTypes.put) { posLens.Add(new TPosLen() { pos = (short)(pos + textStart), len = len }); } }); }; while (idx < _text.Length) { var ch = _text[idx]; switch (ch) { case '(': addBeforeBracket(); brStart = idx; st = 1; break; case ')': if (st != 1) { break; } st = 0; posLens.Add(new TPosLen { pos = brStart, len = (short)(idx - brStart), type = ItemType.bracket }); textStart = (short)(idx + 1); break; case '{': addBeforeBracket(); text = text.Substring(0, idx); brStart = idx; st = 2; break; case '}': if (st != 2) { break; } soundText = _text.Substring(brStart + 1, idx - brStart - 1).Trim(); st = 3; break; } idx++; } if (st != 3) { addBeforeBracket(); } wordIdxs = posLens.ToArray(); }
public static void importBook(string jsonFile, StringBuilder errorLog) { RewiseContext ctx = new RewiseContext(); Book book = JsonConvert.DeserializeObject <Book>(File.ReadAllText(jsonFile)); var oldBook = ctx.Books.FirstOrDefault(b => b.Name == book.Name); if (oldBook != null) { ctx.Books.Remove(oldBook); ctx.SaveChanges(); } //get all used breakers var breakers = new Dictionary <byte, StemmerBreaker.Runner>(); breakers[book.Lang] = new StemmerBreaker.Runner((Langs)book.Lang); foreach (var lang in book.Phrases.SelectMany(p => p.Locales).Select(l => l.Lang).Distinct()) { breakers[lang] = new StemmerBreaker.Runner((Langs)lang); } var lessonIds = new HashSet <int>(book.Meta.Lessons.Select(l => l.Id)); book.MetaData = JsonConvert.SerializeObject(book.Meta); book.Imported = DateTime.UtcNow; foreach (var phr in book.Phrases) { if (!lessonIds.Contains(phr.LessonId)) { errorLog.AppendLine(string.Format("Cannot find lesson in book={0}, lessonId={1}, phrase={2}", book.Name, phr.LessonId, phr.TextJSON)); } phr.Lang = book.Lang; var phrase = new PhraseLib.PhraseText(breakers[phr.Lang], phr.Text); phr.TextJSON = phrase.encode(); phr.Words = phrase.getFtxWords().Select(posLen => new PhraseWord { Lang = book.Lang, Word = posLen, Book = book }).ToArray(); foreach (var loc in phr.Locales) { var locale = new PhraseLib.PhraseText(breakers[loc.Lang], loc.Text); loc.Book = book; loc.TextJSON = locale.encode();//loc.TextIdxs = breakers[loc.Lang].wordBreakTyBytes(loc.TextJSON); loc.Words = locale.getFtxWords().Select(posLen => new LocaleWord { Lang = loc.Lang, Word = posLen, Book = book, BookSrcLang = book.Lang }).ToArray(); } } ctx.Books.Add(book); ctx.SaveChanges(); }
public PhraseText(StemmerBreaker.Runner runner, string text) { items = text.Split('|').Select(it => new PhraseTextItem(runner, it.Trim())).ToArray(); }