public bool Match(SyllableMask pattern) { bool match = true; if (pattern.StartsWith.Length > 0) { match = match && Translit.StartsWith(pattern.StartsWith); } if (pattern.EndsWith.Length > 0) { match = match && Translit.EndsWith(pattern.EndsWith); } return(match && ((Onset & pattern.AllowedOnsets) == Onset) && ((Coda & pattern.AllowedCodas) == Coda) && (Nucleus != null) && (Nucleus.IsVowelIn(pattern.AllowedVowels))); }
protected override void Consume(Queue <Token> InQueue) { Token[] tokens; lock (InQueue) { tokens = InQueue.ToArray(); InQueue.Dequeue(); } int tokensToConsume = 1; if (tokensToConsume > windowSize) { tokensToConsume -= windowSize; } else { isLastWindow = !this.IsRunning; } for (int i = 0; i < tokensToConsume;) { newElement = null; t = tokens[i]; _ItemConsumed(t); if (!(t is LetterToken)) { if (t is TagToken) { newElement = new WordTag(((TagToken)t).Type); prev = null; i++; } else if (t is CantillationToken) { newElement = new Cantillation((t as CantillationToken).Value[0]); i++; } else { newElement = new Separator(t.Value); prev = null; i++; } } else { LetterToken next = null; LetterToken further = null; int nextIndex = -1 /*, furtherIndex = -1*/; int j; /*j=i-1; * while ((j>0)&&!(tokens[j] is LetterToken)) { * if (!(tokens[j] is CantillationToken)) * break; * j--; * } * if (j>=0) { * prev=tokens[j] as LetterToken; * prevIndex=j; * }*/ j = i + 1; while ((j < tokens.Length - 1) && !(tokens[j] is LetterToken)) { if (!(tokens[j] is CantillationToken)) { break; } j++; } if (j < tokens.Length) { next = tokens[j] as LetterToken; nextIndex = j; } j++; while ((j < tokens.Length - 1) && !(tokens[j] is LetterToken)) { if (!(tokens[j] is CantillationToken)) { break; } j++; } if (j < tokens.Length) { further = tokens[j] as LetterToken; // furtherIndex = j; } bool curIsWordEnd = (isLastWindow && (i == tokens.Length - 1)) || (next == null); bool curIsWordStart = (isFirstWindow && (i == 0)) || (prev == null); // Look for a consonant LetterToken l = (LetterToken)t; switch (l.Letter) { case 'א': newElement = new Consonant(Consonants.Aleph); break; case 'ב': if (l.HasDagesh) { newElement = new Consonant(Consonants.Bet); } else { newElement = new Consonant(Consonants.Vet); } break; case 'ג': if (l.HasApostrophe) { newElement = new Consonant(Consonants.Jimmel); } else { newElement = new Consonant(Consonants.Gimmel); } break; case 'ד': newElement = new Consonant(Consonants.Dalet); break; case 'ה': newElement = new Consonant(Consonants.He); break; case 'ו': if ((l.HasDagesh && l.HasAnyVowels) || l.HasAnyVowelsExcept('\u05B9')) { newElement = new Consonant(Consonants.Vav); } else if ((next != null) && (next.Letter == 'ו') && (next.HasAnyModifier('\u05B9', HebrewChar.Shuruk))) { newElement = new Consonant(Consonants.Vav); } else { v = l.FirstVowel; switch (v) { case '\u05B9': newElement = new Vowel(Vowels.HolamMale); break; case '\0': if (l.HasShuruk) { if (curIsWordStart) { AddElement(newElement = new Consonant(Consonants.Aleph)); // Log.Parser.WriteLine("Added consonant "+newElement.Latin+" (sonority "+((Consonant)newElement).Sonority+")"); } newElement = new Vowel(Vowels.Shuruk); } else { newElement = new Consonant(Consonants.Vav); } break; } } if ((newElement is Consonant) && (lastTag != null)) { if ((lastTag.Tag & TagTypes.Origin) == TagTypes.Foreign) { newElement = new Consonant(Consonants.W); } } break; case 'ז': if (l.HasApostrophe) { newElement = new Consonant(Consonants.Zhayin); } else { newElement = new Consonant(Consonants.Zayin); } break; case 'ח': if (l.HasApostrophe) { newElement = new Consonant(Consonants.Khaf); } else { newElement = new Consonant(Consonants.Het); } break; case 'י': newElement = new Consonant(Consonants.Yud); break; case 'ט': newElement = new Consonant(Consonants.Tet); break; case 'כ': case 'ך': if (l.HasDagesh) { newElement = new Consonant(Consonants.Kaf); } else { newElement = new Consonant(Consonants.Khaf); } break; case 'ל': newElement = new Consonant(Consonants.Lamed); break; case 'מ': case 'ם': newElement = new Consonant(Consonants.Mem); break; case 'נ': case 'ן': newElement = new Consonant(Consonants.Nun); break; case 'ס': newElement = new Consonant(Consonants.Samekh); break; case 'ע': newElement = new Consonant(Consonants.Ayin); break; case 'פ': case 'ף': if (l.HasDagesh) { newElement = new Consonant(Consonants.Pe); } else { newElement = new Consonant(Consonants.Fe); } break; case 'צ': case 'ץ': if (l.HasApostrophe) { newElement = new Consonant(Consonants.Tchaddik); } else { newElement = new Consonant(Consonants.Tsaddik); } break; case 'ק': newElement = new Consonant(Consonants.Quf); break; case 'ר': newElement = new Consonant(Consonants.Resh); break; case 'ש': if (l.HasModifier(HebrewChar.SinDot)) { newElement = new Consonant(Consonants.Sin); } else { newElement = new Consonant(Consonants.Shin); } break; case 'ת': newElement = new Consonant(Consonants.Tav); break; } if (newElement is Consonant) { TagTypes wordOrigin = TagTypes.Unrecognized; if (lastTag != null) { wordOrigin = lastTag.Tag & TagTypes.Origin; } Consonant curConsonant = (Consonant)newElement; if (l.HasDagesh) { if (wordOrigin == TagTypes.Foreign) { if (!HebrewChar.IsBegedKefet(l.Letter)) { curConsonant.Flags |= ConsonantFlags.LightDagesh; } } else if (!HebrewChar.IsGuttural(l.Letter) && l.Letter != 'י') { if (!HebrewChar.IsBegedKefet(l.Letter)) { curConsonant.Flags |= ConsonantFlags.StrongDagesh; } else { if ((prev == null) || (prevVowel == null) || (prevVowel.vowel == Vowels.SilentSchwa)) { curConsonant.Flags |= ConsonantFlags.LightDagesh; } else { curConsonant.Flags |= ConsonantFlags.StrongDagesh; } } } } v = l.FirstVowel; bool patahGnuva = false; if (curIsWordEnd && /*&& (v=='\u05B7')*/ ((curConsonant.Latin == Consonants.Het) || (curConsonant.Latin == Consonants.Ayin))) { if ((prevVowel != null) && prevVowel.IsVowelIn(Vowels.E | Vowels.I | Vowels.U | Vowels.O)) { if ((v == '\u05B7') || (v == '\0')) { AddElement(new Vowel(Vowels.PatahGnuva)); patahGnuva = true; } } } if (Options.EverydayRegister) { if ((newElement.Latin == Consonants.Ayin) || (newElement.Latin == Consonants.Aleph) || (newElement.Latin == Consonants.He)) { newElement.Silent = true; } } AddElement(newElement); // Log.Parser.WriteLine("Added consonant "+curConsonant.Latin+" (sonority "+curConsonant.Sonority+")"); newElement = null; bool nextIsUnvoicedEhevi = (next != null) && HebrewChar.IsEhevi(next.Letter) && !next.HasAnyVowels && !next.HasMappiq; if (nextIsUnvoicedEhevi) { if (next.Letter == 'ו') { nextIsUnvoicedEhevi &= !l.HasAnyModifier('\u05B7', '\u05B8'); } if (further != null) { if (further.Letter == 'ו') { nextIsUnvoicedEhevi &= !further.HasAnyModifier('\u05B9' /* holam */, HebrewChar.Shuruk); nextIsUnvoicedEhevi &= !further.HasAnyVowelsExcept(HebrewChar.Shuruk); } } else if (next.Letter == 'י') { nextIsUnvoicedEhevi &= (l.HasModifier('\u05B4')); } } bool nextHasHatafKamatz = (next != null) && next.HasModifier('\u05B3'); bool nextHasSchwa = (next != null) && next.HasModifier('\u05B0'); bool nextHasHataf = (next != null) && next.HasAnyModifier('\u05B1', '\u05B2', '\u05B3'); bool nextIsBegedKefet = (next != null) && HebrewChar.IsBegedKefet(next.Letter); /* if (nextIsUnvoicedEhevi) * Log.Parser.WriteLine("Next token is an extender אהו\"י");*/ i++; switch (v) { case '\u05B0': if (wordOrigin == TagTypes.Foreign) { newElement = new Vowel(Vowels.SilentSchwa); } else if (prev == null) { newElement = new Vowel(Vowels.AudibleSchwa); } else if (next == null) { newElement = new Vowel(Vowels.SilentSchwa); } else if (nextHasSchwa | nextHasHataf) { newElement = new Vowel(Vowels.SilentSchwa); } else if (nextIsBegedKefet) { if (next.HasDagesh) { newElement = new Vowel(Vowels.SilentSchwa); } else { newElement = new Vowel(Vowels.AudibleSchwa); } } /* else if (((curConsonant.Latin)==Consonants.Aleph) || * ((curConsonant.Latin)==Consonants.Ayin) || * ((curConsonant.Latin)==Consonants.Het) || * ((curConsonant.Latin)==Consonants.He) || * ((curConsonant.Latin)==Consonants.Resh)) * newElement=new Vowel(Vowels.AudibleSchwa);*/ else if (prevVowel != null) { switch (prevVowel.vowel) { case Vowels.SilentSchwa: newElement = new Vowel(Vowels.AudibleSchwa); break; default: if (prevVowel.IsVowelIn(Vowels.Short)) { newElement = new Vowel(Vowels.SilentSchwa); } else if ((curConsonant.Flags & ConsonantFlags.StrongDagesh) != 0) { newElement = new Vowel(Vowels.AudibleSchwa); } // else if (prevVowel.IsVowelIn(Vowels.Long|Vowels.VeryLong)) // newElement=new Vowel(Vowels.AudibleSchwa); else { newElement = new Vowel(Vowels.SilentSchwa); } break; } } else if ((curConsonant.Flags & ConsonantFlags.StrongDagesh) != 0) { newElement = new Vowel(Vowels.AudibleSchwa); } else { newElement = new Vowel(Vowels.SilentSchwa); } break; case '\u05B1': newElement = new Vowel(Vowels.HatafSegol); break; case '\u05B2': newElement = new Vowel(Vowels.HatafPatah); break; case '\u05B3': newElement = new Vowel(Vowels.HatafKamatz); break; case '\u05B4': if (nextIsUnvoicedEhevi) { newElement = new Vowel(Vowels.HirikMale); } else { newElement = new Vowel(Vowels.HirikHaser); } break; case '\u05B5': if (nextIsUnvoicedEhevi) { newElement = new Vowel(Vowels.TzereMale); } else { newElement = new Vowel(Vowels.Tzere); } break; case '\u05B6': if (nextIsUnvoicedEhevi) { newElement = new Vowel(Vowels.SegolMale); } else { newElement = new Vowel(Vowels.Segol); } break; case '\u05B7': if (!patahGnuva) { if (nextIsUnvoicedEhevi) { newElement = new Vowel(Vowels.PatahMale); } else { newElement = new Vowel(Vowels.Patah); } } break; case '\u05B8': if (nextIsUnvoicedEhevi) { newElement = new Vowel(Vowels.KamatzMale); } else if (nextHasHatafKamatz) { newElement = new Vowel(Vowels.KamatzKatan); } else { newElement = new Vowel(Vowels.KamatzIndeterminate); } break; case '\u05B9': if (nextIsUnvoicedEhevi) { newElement = new Vowel(Vowels.HolamMale); } else { newElement = new Vowel(Vowels.HolamHaser); } break; case '\u05BB': newElement = new Vowel(Vowels.Kubutz); break; default: // if (v!=(char)0) // Log.Parser.WriteLine("Unknown vowel char: {0:X4}",(int)v); break; } prev = l; if (newElement != null) { /*if (curIsWordEnd && (((Vowel)newElement).vowel==Vowels.Patah) * && ((curConsonant.Latin==Consonants.Het)||(curConsonant.Latin==Consonants.Ayin)||(curConsonant.Latin==Consonants.He))) { * ((Vowel)newElement).vowel=Vowels.PatahGnuva; * parsed.Insert(parsed.Count-1,newElement); * Log.Parser.WriteLine("Added element "+((Vowel)newElement).vowel+" as patah gnuva"); * } * else {*/ AddElement(newElement); // Log.Parser.WriteLine("Added element "+((Vowel)newElement).vowel); //} newElement = null; if (nextIsUnvoicedEhevi) { Log("UNVOICED EHEVI FOR CRYING OUT LOUD>>>>>>>>>>>>>"); for (int k = i; k < nextIndex; k++) { Token tk = tokens[k]; if (tk is CantillationToken) { newElement = new Cantillation((tk as CantillationToken).Value[0]); AddElement(newElement); Log("Added element " + newElement.Latin + " (" + newElement.GetType().Name + ") while skipping unvoiced ehevi"); newElement = null; } } lock (InQueue) for (int z = 0; z < nextIndex + 1 - tokensToConsume; z++) { _ItemConsumed(InQueue.Dequeue()); } i = nextIndex + 1; } } /*else if (i<tokensToConsume) { * t=tokens[i]; * if (t is LetterToken) { * l=(LetterToken)t; * if (l.Letter=='ו') { * * if (newElement!=null) { * AddElement(newElement); * newElement=null; * i++; * } * } * } * }*/ } else// if (newElement!=null) { i++; } } if (newElement != null) { AddElement(newElement); if (newElement is WordTag) { // Log.Parser.WriteLine("Added tag "+((WordTag)newElement).Tag); lastTag = (WordTag)newElement; } else { if (newElement is Separator) { lastTag = null; } // Log.Parser.WriteLine("Added element "+newElement.Latin+" ("+newElement.GetType().Name+")"); } newElement = null; } } if (isFirstWindow) { isFirstWindow = false; } }
protected override void Consume(Queue <Segment> InQueue) { if (InQueue.Count == 0) { return; } Segment seg = InQueue.Dequeue(); _ItemConsumed(seg); Segment nextSeg = null; if (InQueue.Count > 0) { nextSeg = InQueue.Peek(); } if (seg is SeparatorSegment) { if (HebrewChar.IsPunctuation((seg[0] as Separator).Latin[0])) { Phone phn = Phone.Create(seg[0]); if (phn == null) { phn = new Phone("_", 1); } Emit(phn); } firstStressInClause = true; } else if (seg is Word) { /*bool nextIsPunctuation = false; * if (nextSeg is SeparatorSegment) * nextIsPunctuation = HebrewChar.IsPunctuation(nextSeg[0].Latin[0]);*/ Word w = (Word)seg; bool beforeStress = true; SpeechElement HintStrongDagesh = null; for (int sylIndex = 0; sylIndex < w.Syllables.Count; sylIndex++) { Syllable syl = w.Syllables[sylIndex]; bool stressed = syl.IsStressed; if (beforeStress && stressed) { beforeStress = false; } bool beforeNucleus = true; //bool heavySyl = ((syl.Coda == SyllableCoda.Closed) ^ ((syl.Nucleus!=null) && syl.Nucleus.IsVowelIn(Vowels.Long))); // bool sylStart=true; //foreach (SpeechElement e in w.Phonemes.GetRange(syl.Start,syl.End-syl.Start+1)) { for (int elemIndex = 0; elemIndex < syl.Phonemes.Count; elemIndex++) { SpeechElement e = syl.Phonemes[elemIndex]; Phone phone = null; // bool sylEnd = (elemIndex == syl.Phonemes.Count - 1); if ((e is Consonant) && ((((Consonant)e).Flags & ConsonantFlags.StrongDagesh) != 0) && (e == HintStrongDagesh)) { continue; } phone = Phone.Create(e); if (phone == null) { continue; } phone.Context.SylIndex = sylIndex; phone.Context.SylReverseIndex = w.Syllables.Count - sylIndex - 1; phone.Context.IsNucleus = (syl.Nucleus == e); phone.Context.IsAccented = stressed; if (nextSeg is SeparatorSegment) { phone.Context.NextSeparator = (nextSeg as SeparatorSegment)[0].Latin; } if (!stressed) { if (Options.Akanye) { if (phone.Symbol == "o") { phone.Symbol = "a"; } } if (Options.Ikanye) { if (phone.Symbol == "e") { phone.Symbol = "i"; } } } else { phone.Context.AccentStrength = 1; } if (phone.PitchCurve.Count > 0) { phone.PitchCurve.Clear(); } phone.Duration = 80; if (e is Vowel) { Vowel v = (Vowel)e; if (v.IsVowelIn(Vowels.VeryShort)) { if (v.vowel == Vowels.AudibleSchwa) { phone.Duration = 28; } else { phone.Duration = 40; } } else if (v.IsVowelIn(Vowels.Short)) { phone.Duration = 90; } else if (v.IsVowelIn(Vowels.Long)) { phone.Duration = 94; } else if (v.IsVowelIn(Vowels.VeryLong)) { phone.Duration = 97; } if (v.IsVowelIn(Vowels.HighVowels)) { phone.Duration += 25; } else { phone.Duration += 30; } } else if (e is Consonant) { phone.Duration = (((Consonant)e).Sonority) * 1.6 + 60; } else { phone.Duration = 100; } if (stressed) { if (firstStressInClause) { firstStressInClause = false; } if (e == syl.Nucleus) { beforeNucleus = false; } phone.Duration *= 1; if (beforeNucleus && (e is Consonant) && ((Consonant)e).IsLiquid) { phone.Duration *= 1.2; } } else { if (e is Vowel) { if (beforeStress) { phone.Duration *= 0.85; } else { phone.Duration *= 0.9; } } } if ((e is Consonant) && ((((Consonant)e).Flags & ConsonantFlags.StrongDagesh) != 0)) { HintStrongDagesh = e; if (Options.DistinguishStrongDagesh) { phone.Duration *= 1.4; } else { phone.Duration *= 1.1F; } } /* if ((!nextIsPunctuation) && !heavySyl) * phone.Duration *= 0.7;*/ // if ((nextSeg is SeparatorSegment) && (sylIndex == w.Syllables.Count - 1)) // phone.Duration *= 1.6; Emit(phone); // sylStart=false; } } /* if (w.CantillationMarks.Contains('֑')) * silpr.Phones.Add(new Phone("_",200));*/ #region cantillation stuff if (Options.SingCantillation) { foreach (char ch in w.CantillationMarks) { int i = HebrewChar.DisjunctiveRank(ch), len = 0; if (i < 5) { switch (i) { case 1: len = 230; break; case 2: len = 165; break; case 3: len = 60; break; case 4: len = 30; break; } Emit(new Phone("_", len)); } } } #endregion } }
protected override void Consume(Queue <Segment> InQueue) { if (InQueue.Count == 0) { return; } Segment seg = InQueue.Dequeue(); _ItemConsumed(seg); Segment nextSeg = null; if (InQueue.Count > 0) { nextSeg = InQueue.Peek(); } if (seg is SeparatorSegment) { if (HebrewChar.IsPunctuation((seg[0] as Separator).Latin[0])) { Phone phn = Phone.Create(seg[0]); if (phn == null) { phn = new Phone("_", 1); } Emit(phn); Console.WriteLine("phonetizer: {0}", phn); } firstStressInClause = true; } else if (seg is Word) { Word w = (Word)seg; bool beforeStress = true; SpeechElement HintStrongDagesh = null; foreach (Syllable syl in w.Syllables) { bool stressed = syl.IsStressed; if (beforeStress && stressed) { beforeStress = false; } bool beforeNucleus = true; // bool sylStart=true; foreach (SpeechElement e in w.Phonemes.GetRange(syl.Start, syl.End - syl.Start + 1)) { Phone phone = null; bool sylEnd = (syl.Phonemes.IndexOf(e) == syl.Phonemes.Count - 1); if ((e is Consonant) && ((((Consonant)e).Flags & ConsonantFlags.StrongDagesh) != 0) && (e == HintStrongDagesh)) { continue; } phone = Phone.Create(e); if (phone == null) { continue; } phone.Context.IsNucleus = (syl.Nucleus == e); phone.Context.IsAccented = stressed; if (nextSeg is SeparatorSegment) { phone.Context.NextSeparator = (nextSeg as SeparatorSegment)[0].Latin; } if (!stressed) { if (Options.Akanye) { if (phone.Symbol == "o") { phone.Symbol = "a"; } } if (Options.Ikanye) { if (phone.Symbol == "e") { phone.Symbol = "i"; } } } else { phone.Context.AccentStrength = 1; } if (phone.PitchCurve.Count > 0) { phone.PitchCurve.Clear(); } phone.Duration = 80; if (e is Vowel) { Vowel v = (Vowel)e; if (v.IsVowelIn(Vowels.VeryShort)) { phone.Duration = 28; //38 } else if (v.IsVowelIn(Vowels.Short)) { phone.Duration = 90; } else if (v.IsVowelIn(Vowels.Long)) { phone.Duration = 94; } else if (v.IsVowelIn(Vowels.VeryLong)) { phone.Duration = 97; } if (v.IsVowelIn(Vowels.HighVowels)) { phone.Duration += 25; } else { phone.Duration += 30; } } else if (e is Consonant) { phone.Duration = (((Consonant)e).Sonority) * 2.6 + 60; } else { phone.Duration = 100; } if (stressed) { if (firstStressInClause) { firstStressInClause = false; } if (e == syl.Nucleus) { beforeNucleus = false; } phone.Duration *= 1; if (beforeNucleus && (e is Consonant) && ((Consonant)e).IsLiquid) { phone.Duration *= 1.2; } } else { if (e is Vowel) { if (beforeStress) { phone.Duration *= 0.5; } else { phone.Duration *= 0.6; } } } if ((e is Consonant) && ((((Consonant)e).Flags & ConsonantFlags.StrongDagesh) != 0)) { HintStrongDagesh = e; if (Options.DistinguishStrongDagesh) { phone.Duration *= 1.4; } else { phone.Duration *= 1.1F; } } Emit(phone); Console.WriteLine("phonetizer: {0}", phone); // sylStart=false; } } /* if (w.CantillationMarks.Contains('֑')) * silpr.Phones.Add(new Phone("_",200));*/ #region cantillation stuff if (Options.SingCantillation) { foreach (char ch in w.CantillationMarks) { int i = HebrewChar.DisjunctiveRank(ch), len = 0; if (i < 5) { switch (i) { case 1: len = 230; break; case 2: len = 165; break; case 3: len = 60; break; case 4: len = 30; break; } Phone phone; Emit(phone = new Phone("_", len)); Console.WriteLine("phonetizer: {0}", phone); } } } #endregion } }