示例#1
0
        internal PhraseTextItem(StemmerBreaker.Runner runner, string _text)
        {
            text = _text;
            var    st = 0;                                                                          //0..in ftxText, 1..in bracket, 2..in sound text, 3..after sound text
            short  idx = 0 /*act char*/; short brStart = -1; /*bracket start*/ short textStart = 0; /*normal text start*/
            var    posLens          = new List <TPosLen>();
            Action addBeforeBracket = () => {
                if (textStart == idx)
                {
                    return;
                }
                var str = _text.Substring(textStart, idx - textStart);
                runner.wordBreak(str, (type, pos, len) => { if (type == StemmerBreaker.PutTypes.put)
                                                            {
                                                                posLens.Add(new TPosLen()
                        {
                            pos = (short)(pos + textStart), len = len
                        });
                                                            }
                                 });
            };

            while (idx < _text.Length)
            {
                var ch = _text[idx];
                switch (ch)
                {
                case '(': addBeforeBracket(); brStart = idx; st = 1; break;

                case ')': if (st != 1)
                    {
                        break;
                    }
                    st = 0; posLens.Add(new TPosLen {
                        pos = brStart, len = (short)(idx - brStart), type = ItemType.bracket
                    }); textStart = (short)(idx + 1); break;

                case '{': addBeforeBracket(); text = text.Substring(0, idx); brStart = idx; st = 2; break;

                case '}': if (st != 2)
                    {
                        break;
                    }
                    soundText = _text.Substring(brStart + 1, idx - brStart - 1).Trim(); st = 3; break;
                }
                idx++;
            }
            if (st != 3)
            {
                addBeforeBracket();
            }
            wordIdxs = posLens.ToArray();
        }
示例#2
0
        public static void importBook(string jsonFile, StringBuilder errorLog)
        {
            RewiseContext ctx     = new RewiseContext();
            Book          book    = JsonConvert.DeserializeObject <Book>(File.ReadAllText(jsonFile));
            var           oldBook = ctx.Books.FirstOrDefault(b => b.Name == book.Name);

            if (oldBook != null)
            {
                ctx.Books.Remove(oldBook); ctx.SaveChanges();
            }

            //get all used breakers
            var breakers = new Dictionary <byte, StemmerBreaker.Runner>();

            breakers[book.Lang] = new StemmerBreaker.Runner((Langs)book.Lang);
            foreach (var lang in book.Phrases.SelectMany(p => p.Locales).Select(l => l.Lang).Distinct())
            {
                breakers[lang] = new StemmerBreaker.Runner((Langs)lang);
            }


            var lessonIds = new HashSet <int>(book.Meta.Lessons.Select(l => l.Id));

            book.MetaData = JsonConvert.SerializeObject(book.Meta);
            book.Imported = DateTime.UtcNow;
            foreach (var phr in book.Phrases)
            {
                if (!lessonIds.Contains(phr.LessonId))
                {
                    errorLog.AppendLine(string.Format("Cannot find lesson in book={0}, lessonId={1}, phrase={2}", book.Name, phr.LessonId, phr.TextJSON));
                }
                phr.Lang = book.Lang;
                var phrase = new PhraseLib.PhraseText(breakers[phr.Lang], phr.Text);
                phr.TextJSON = phrase.encode();
                phr.Words    = phrase.getFtxWords().Select(posLen => new PhraseWord {
                    Lang = book.Lang, Word = posLen, Book = book
                }).ToArray();

                foreach (var loc in phr.Locales)
                {
                    var locale = new PhraseLib.PhraseText(breakers[loc.Lang], loc.Text);
                    loc.Book     = book;
                    loc.TextJSON = locale.encode();//loc.TextIdxs = breakers[loc.Lang].wordBreakTyBytes(loc.TextJSON);
                    loc.Words    = locale.getFtxWords().Select(posLen => new LocaleWord {
                        Lang = loc.Lang, Word = posLen, Book = book, BookSrcLang = book.Lang
                    }).ToArray();
                }
            }
            ctx.Books.Add(book);
            ctx.SaveChanges();
        }
示例#3
0
 public PhraseText(StemmerBreaker.Runner runner, string text)
 {
     items = text.Split('|').Select(it => new PhraseTextItem(runner, it.Trim())).ToArray();
 }