protected override void BeforeConsumption() { Log(LogLevel.MajorInfo, "Started"); base.BeforeConsumption(); curElementIndex = -1; curSegment = curWord = null; curSyl = null; LoadStressHeuristics(stressHeuristicsFile); }
protected override void BeforeConsumption() { Console.WriteLine("segmenter started"); base.BeforeConsumption(); curElementIndex = -1; curSegment = curWord = null; curSyl = null; LoadStressHeuristics(stressHeuristicsFile); }
public Word(List <SpeechElement> Elements) { Phonemes = Elements; List <SpeechElement> elems = new List <SpeechElement>(Phonemes); while ((elems.Count > 0) && (elems[elems.Count - 1] is Separator)) { elems.RemoveAt(elems.Count - 1); } List <SpeechElement> nuclei = elems.FindAll( delegate(SpeechElement e) { return((e is Vowel) && (!((Vowel)e).IsVowelIn(Vowels.VeryShort | Vowels.Inaudible | Vowels.PatahGnuva))); } ); Syllable syl; if (nuclei.Count > 0) { foreach (SpeechElement nucleus in nuclei) { int i = elems.IndexOf(nucleus); syl = new Syllable(this, (Vowel)nucleus, i, i); for (int j = i + 1; j < elems.Count; j++) { if (elems[j] is Consonant) { if (((((Consonant)elems[j]).Flags & ConsonantFlags.StrongDagesh) != 0) || (j + 1 == elems.Count) || ((elems[j + 1] is Vowel) && ((Vowel)elems[j + 1]).IsVowelIn(Vowels.Inaudible | Vowels.PatahGnuva))) { syl.HintStrongDagesh = (((Consonant)elems[j]).Flags & ConsonantFlags.StrongDagesh) != 0; syl.End = j; } else { break; } } else if ((elems[j] is Vowel) && (((Vowel)elems[j]).IsVowelIn(Vowels.Inaudible | Vowels.PatahGnuva))) { syl.End = j; } else { break; } } Syllables.Add(syl); } syl = null; for (int i = 0; i < Syllables.Count; i++) { syl = Syllables[i]; if (i == 0) { syl.Start = 0; } else if (Syllables[i - 1].HintStrongDagesh) { syl.Start = Syllables[i - 1].End; } else { syl.Start = Syllables[i - 1].End + 1; } } if (syl != null) { if (syl.End < elems.Count - 1) { syl.End = elems.Count - 1; } } List <SpeechElement> cant = elems.FindAll(delegate(SpeechElement e) { return(e is Cantillation); } ); foreach (Cantillation c in cant) { int ci = Phonemes.IndexOf(c); int si = Syllables.FindLastIndex(delegate(Syllable s) { return(s.Start < ci); } ); Syllables[si].CantillationMarks.Add(c.Mark); this.CantillationMarks.Add(c.Mark); Phonemes.RemoveAt(ci); Syllables.ForEach(delegate(Syllable s) { if (s.Start > ci) { s.Start--; } if (s.End >= ci) { s.End--; } }); } } }
public bool Match(Syllable syl) { return(syl.Match(this)); }
protected override void Consume(Queue <Segment> InQueue) { if (InQueue.Count == 0) { return; } Segment seg = InQueue.Dequeue(); _ItemConsumed(seg); Segment nextSeg = null; if (InQueue.Count > 0) { nextSeg = InQueue.Peek(); } if (seg is SeparatorSegment) { if (HebrewChar.IsPunctuation((seg[0] as Separator).Latin[0])) { Phone phn = Phone.Create(seg[0]); if (phn == null) { phn = new Phone("_", 1); } Emit(phn); } firstStressInClause = true; } else if (seg is Word) { /*bool nextIsPunctuation = false; * if (nextSeg is SeparatorSegment) * nextIsPunctuation = HebrewChar.IsPunctuation(nextSeg[0].Latin[0]);*/ Word w = (Word)seg; bool beforeStress = true; SpeechElement HintStrongDagesh = null; for (int sylIndex = 0; sylIndex < w.Syllables.Count; sylIndex++) { Syllable syl = w.Syllables[sylIndex]; bool stressed = syl.IsStressed; if (beforeStress && stressed) { beforeStress = false; } bool beforeNucleus = true; //bool heavySyl = ((syl.Coda == SyllableCoda.Closed) ^ ((syl.Nucleus!=null) && syl.Nucleus.IsVowelIn(Vowels.Long))); // bool sylStart=true; //foreach (SpeechElement e in w.Phonemes.GetRange(syl.Start,syl.End-syl.Start+1)) { for (int elemIndex = 0; elemIndex < syl.Phonemes.Count; elemIndex++) { SpeechElement e = syl.Phonemes[elemIndex]; Phone phone = null; // bool sylEnd = (elemIndex == syl.Phonemes.Count - 1); if ((e is Consonant) && ((((Consonant)e).Flags & ConsonantFlags.StrongDagesh) != 0) && (e == HintStrongDagesh)) { continue; } phone = Phone.Create(e); if (phone == null) { continue; } phone.Context.SylIndex = sylIndex; phone.Context.SylReverseIndex = w.Syllables.Count - sylIndex - 1; phone.Context.IsNucleus = (syl.Nucleus == e); phone.Context.IsAccented = stressed; if (nextSeg is SeparatorSegment) { phone.Context.NextSeparator = (nextSeg as SeparatorSegment)[0].Latin; } if (!stressed) { if (Options.Akanye) { if (phone.Symbol == "o") { phone.Symbol = "a"; } } if (Options.Ikanye) { if (phone.Symbol == "e") { phone.Symbol = "i"; } } } else { phone.Context.AccentStrength = 1; } if (phone.PitchCurve.Count > 0) { phone.PitchCurve.Clear(); } phone.Duration = 80; if (e is Vowel) { Vowel v = (Vowel)e; if (v.IsVowelIn(Vowels.VeryShort)) { if (v.vowel == Vowels.AudibleSchwa) { phone.Duration = 28; } else { phone.Duration = 40; } } else if (v.IsVowelIn(Vowels.Short)) { phone.Duration = 90; } else if (v.IsVowelIn(Vowels.Long)) { phone.Duration = 94; } else if (v.IsVowelIn(Vowels.VeryLong)) { phone.Duration = 97; } if (v.IsVowelIn(Vowels.HighVowels)) { phone.Duration += 25; } else { phone.Duration += 30; } } else if (e is Consonant) { phone.Duration = (((Consonant)e).Sonority) * 1.6 + 60; } else { phone.Duration = 100; } if (stressed) { if (firstStressInClause) { firstStressInClause = false; } if (e == syl.Nucleus) { beforeNucleus = false; } phone.Duration *= 1; if (beforeNucleus && (e is Consonant) && ((Consonant)e).IsLiquid) { phone.Duration *= 1.2; } } else { if (e is Vowel) { if (beforeStress) { phone.Duration *= 0.85; } else { phone.Duration *= 0.9; } } } if ((e is Consonant) && ((((Consonant)e).Flags & ConsonantFlags.StrongDagesh) != 0)) { HintStrongDagesh = e; if (Options.DistinguishStrongDagesh) { phone.Duration *= 1.4; } else { phone.Duration *= 1.1F; } } /* if ((!nextIsPunctuation) && !heavySyl) * phone.Duration *= 0.7;*/ // if ((nextSeg is SeparatorSegment) && (sylIndex == w.Syllables.Count - 1)) // phone.Duration *= 1.6; Emit(phone); // sylStart=false; } } /* if (w.CantillationMarks.Contains('֑')) * silpr.Phones.Add(new Phone("_",200));*/ #region cantillation stuff if (Options.SingCantillation) { foreach (char ch in w.CantillationMarks) { int i = HebrewChar.DisjunctiveRank(ch), len = 0; if (i < 5) { switch (i) { case 1: len = 230; break; case 2: len = 165; break; case 3: len = 60; break; case 4: len = 30; break; } Emit(new Phone("_", len)); } } } #endregion } }
protected override void Consume(Queue <SpeechElement> InQueue) { if (InQueue.Count == 0) { return; } SpeechElement curElement = InQueue.Dequeue(); Log("Consuming {0} ({1})", curElement.GetType().Name, curElement.Latin); _ItemConsumed(curElement); SpeechElement nextElement = null; if (InQueue.Count > 0) { nextElement = InQueue.Peek(); } if (curSegment == null) { if ((curElement is Phoneme) || (curElement is WordTag)) { curSegment = new Word(); } else { curSegment = new SeparatorSegment(); } curElementIndex = -1; } if (curSegment is Word) { curWord = (curSegment as Word); if (curElement is Phoneme) { curWord.Add(curElement); curElementIndex++; if (curSyl == null) { curSyl = new Syllable(curWord); curSyl.Start = curElementIndex; curSyl.End = curElementIndex; } else if (curSyl.Nucleus == null) { curSyl.End++; if ((curElement is Vowel) && (curElement as Vowel).IsVowelIn(Vowels.LegalNuclei)) { curSyl.Nucleus = curElement as Vowel; } } else if (curElement is Consonant) { if ((nextElement != null) && (nextElement is Vowel) && (nextElement as Vowel).IsVowelIn(Vowels.Inaudible)) { curSyl.End++; nextElement = InQueue.Dequeue(); Log("Assimilating a {1}", nextElement.GetType().Name, (nextElement as Vowel).vowel); } else if ((nextElement == null) || (nextElement is Separator)) { curSyl.End++; // curWord.Syllables.Add(curSyl); // curSyl=null; } else { if (((curElement as Consonant).Flags & ConsonantFlags.StrongDagesh) != 0) { curSyl.End++; curSyl.HintStrongDagesh = true; } curWord.Syllables.Add(curSyl); curSyl = new Syllable(curWord); curSyl.Start = curElementIndex; curSyl.End = curElementIndex; } } else { // non-nucleic vowel, move on curSyl.End++; } } else if (curElement is WordTag) { curWord.Tag = (curElement as WordTag).Tag; } else if (curElement is Qaryan.Core.Cantillation) { curWord.CantillationMarks.Add((curElement as Cantillation).Mark); } else if (curElement is Separator) { AddAndProcessWord(curSegment as Word); curSegment = null; } } if (curElement is Separator) { if (curSegment != null) { if (curSegment is SeparatorSegment) { curSegment.Add(curElement); } else { if (curSegment is Word) { AddAndProcessWord(curSegment as Word); } else { Emit(curSegment); } curSegment = new SeparatorSegment(curElement as Separator); } Log("Adding separator segment"); Emit(curSegment); curSegment = null; } else { Log("Adding separator segment"); Emit(curSegment = new SeparatorSegment(curElement as Separator)); curSegment = null; } } }
/// <summary> /// Performs stress assignment and related processing on a word, and adds it to the queue of produced segments. /// </summary> /// <param name="w">The word obtained from the segmentation step.</param> void AddAndProcessWord(Word w) { if ((curSyl != null) && !w.Syllables.Contains(curSyl)) { w.Syllables.Add(curSyl); curSyl = null; } SyllablePattern?sp = w.PlaceStress(StressHeuristics, DefaultStress); // Log(LogLevel.Debug, sp); bool beforeStress = true; foreach (Syllable syl in w.Syllables) { bool stressed = syl.IsStressed; if (stressed) { beforeStress = false; } for (int i = 0; i < syl.Phonemes.Count; i++) { SpeechElement e = syl.Phonemes[i]; if (e is Vowel) { Vowel v = (Vowel)e; if (v.vowel == Vowels.KamatzIndeterminate) { if (beforeStress && (syl.Coda == SyllableCoda.Closed) && (syl.Phonemes[syl.Phonemes.Count - 1] is Consonant) /*&& ((w.Tag&TagTypes.Origin)!=TagTypes.Foreign)*/ && !stressed) { v.vowel = Vowels.KamatzKatan; } else { v.vowel = Vowels.KamatzGadol; } } else if (v.vowel == Vowels.AudibleSchwa) { if ((w.Tag & TagTypes.Origin) == TagTypes.Foreign) { v.vowel = Vowels.SilentSchwa; } else { int j = w.Phonemes.IndexOf(e); if ((j + 1 < w.Phonemes.Count) && (w.Phonemes[j + 1] is Consonant) && (j - 1 >= 0) && (w.Phonemes[j - 1] is Consonant) && (w.Phonemes[j + 1].Latin != w.Phonemes[j - 1].Latin)) { switch (w.Phonemes[j - 1].Latin) { /* case "k": * case "l": * case "b": * case "m": * break;*/ case Consonants.Vav: case Consonants.Lamed: break; default: if (!relaxAudibleSchwa) { break; } int son = ((Consonant)w.Phonemes[j + 1]).Sonority - ((Consonant)w.Phonemes[j - 1]).Sonority; if (son >= 0) { v.Silent = true; } break; } } } } } } } if (sp != null) { Log(LogLevel.Info, "/{0}/ from pattern {1}", w.TranslitSyllablesStress, sp.ToString()); } else { Log(LogLevel.Info, "/{0}/", w.TranslitSyllablesStress); } Emit(w); }
void AddAndProcessWord(Word w) { if ((curSyl != null) && !w.Syllables.Contains(curSyl)) { w.Syllables.Add(curSyl); curSyl = null; } Console.WriteLine("segmenter: /{0}/", w.TranslitSyllables); w.PlaceStress(StressHeuristics, DefaultStress); foreach (Syllable syl in w.Syllables) { bool stressed = syl.IsStressed; for (int i = 0; i < syl.Phonemes.Count; i++) { SpeechElement e = syl.Phonemes[i]; if (e is Vowel) { Vowel v = (Vowel)e; if (v.vowel == Vowels.KamatzIndeterminate) { if ((syl.Coda == SyllableCoda.Closed) && (syl.Phonemes[syl.Phonemes.Count - 1] is Consonant) /*&& ((w.Tag&TagTypes.Origin)!=TagTypes.Foreign)*/ && !stressed) { v.vowel = Vowels.KamatzKatan; } else { v.vowel = Vowels.KamatzGadol; } // Log.Analyzer.WriteLine("Kamatz determined to be "+v.vowel.ToString()); } else if (v.vowel == Vowels.AudibleSchwa) { if ((w.Tag & TagTypes.Origin) == TagTypes.Foreign) { v.vowel = Vowels.SilentSchwa; } else { int j = w.Phonemes.IndexOf(e); if ((j + 1 < w.Phonemes.Count) && (w.Phonemes[j + 1] is Consonant) && (j - 1 >= 0) && (w.Phonemes[j - 1] is Consonant) && (w.Phonemes[j + 1].Latin != w.Phonemes[j - 1].Latin)) { switch (w.Phonemes[j - 1].Latin) { /* case "k": * case "l": * case "b": * case "m": * break;*/ case Consonants.Vav: case Consonants.Lamed: break; default: if (!relaxAudibleSchwa) { break; } int son = ((Consonant)w.Phonemes[j + 1]).Sonority - ((Consonant)w.Phonemes[j - 1]).Sonority; if (son >= 0) { v.Silent = true; } break; } } } } } } } Emit(w); }