private static void CopyCharStructToString(StringBuilder str, CharStruct s, int level) { /* s is a shaped CharStruct; level is the index into the string */ if (s.BaseChar == 0) { return; } str.Append(s.BaseChar); s.LigNum--; if (s.Mark1 != 0) { if ((level & ar_novowel) == 0) { str.Append(s.Mark1); } s.LigNum--; } if (s.Vowel != 0) { if ((level & ar_novowel) == 0) { str.Append(s.Vowel); } s.LigNum--; } }
public void BadChar() { ParseT9 parseT9 = new ParseT9(); CharStruct s = parseT9.GetCharStruct('%'); Assert.That(s.Code, Is.EqualTo("")); }
private static void CopyCharStructToString(StringBuilder str, CharStruct s, int level) { /* s is a shaped CharStruct; level is the index into the string */ if (s.BaseChar == 0) { return; } str.Append(s.BaseChar); }
private static void Shape(char[] text, StringBuilder str, int level) { /* string is assumed to be empty and big enough. * text is the original text. * This routine does the basic arabic reshaping. * len the number of non-null characters. * * Note: We have to unshape each character first! */ int join; int which; char nextLetter; int p = 0; /* initialize for output */ CharStruct oldchar = new CharStruct(); CharStruct curchar = new CharStruct(); while (p < text.Length) { nextLetter = text[p++]; //nextletter = unshape (nextletter); join = Ligature(nextLetter, curchar); if (join == 0) { /* shape curchar */ int nc = ShapeCount(nextLetter); // from 1 to 4 //(*len)++; /* which 0 == isolated 1 == final 2 == initial 3 == medial */ if (nc == 1) { which = 0; /* final or isolated */ } else { which = 2; /* medial or initial */ } if (ConnectsToLeft(oldchar)) { which++; } which = which % (curchar.NumShapes); curchar.BaseChar = GetCharShape(curchar.BaseChar, which); /* get rid of oldchar */ CopyCharStructToString(str, oldchar, level); oldchar = curchar; /* new values in oldchar */ /* init new curchar */ curchar = new CharStruct(); curchar.BaseChar = nextLetter; curchar.NumShapes = nc; curchar.LigNum++; // (*len) += unligature (&curchar, level); } else if (join == 1) { } // else // { // (*len) += unligature (&curchar, level); // } // p = g_utf8_next_char (p); } /* handle last char */ which = 0; if (ConnectsToLeft(oldchar)) { which++; } which = which % (curchar.NumShapes); curchar.BaseChar = GetCharShape(curchar.BaseChar, which); /* get rid of oldchar */ CopyCharStructToString(str, oldchar, level); CopyCharStructToString(str, curchar, level); }
private static bool ConnectsToLeft(CharStruct a) { return(a.NumShapes > 2); }
private static int Ligature(char newChar, CharStruct oldChar) { /* 0 == no ligature possible; 1 == vowel; 2 == two chars; 3 == Lam+Alef */ int retval = 0; if (oldChar.BaseChar == 0) // base char yet is not defined { return(0); } if (IsVowel(newChar)) { retval = 1; if ((oldChar.Vowel != 0) && (newChar != SHADDA)) { retval = 2; /* we eliminate the old vowel .. */ } switch (newChar) { case SHADDA: { if (oldChar.Mark1 == 0) { oldChar.Mark1 = SHADDA; } else { return(0); /* no ligature possible */ } break; } case HAMZA_BELOW: { switch (oldChar.BaseChar) { case ALEF: { oldChar.BaseChar = ALEF_HAMZA_BELOW; retval = 2; break; } case LAM_ALEF: { oldChar.BaseChar = LAM_ALEF_HAMZA_BELOW; retval = 2; break; } default: { oldChar.Mark1 = HAMZA_BELOW; break; } } break; } case HAMZA_ABOVE: { switch (oldChar.BaseChar) { case ALEF: { oldChar.BaseChar = ALEF_HAMZA_ABOVE; retval = 2; break; } case LAM_ALEF: { oldChar.BaseChar = LAM_ALEF_HAMZA_ABOVE; retval = 2; break; } case WAW: { oldChar.BaseChar = WAW_HAMZA_ABOVE; retval = 2; break; } case YEH: case ALEF_MAKSURA: case FARSI_YEH: { oldChar.BaseChar = YEH_HAMZA_ABOVE; retval = 2; break; } default: { oldChar.Mark1 = HAMZA_ABOVE; break; } /* whatever sense this may make .. */ } break; } case MADDA_ABOVE: { switch (oldChar.BaseChar) { case ALEF: { oldChar.BaseChar = ALEF_MADDA_ABOVE; retval = 2; break; } } break; } default: { oldChar.Vowel = newChar; break; } } if (retval == 1) { oldChar.LigNum++; } return(retval); } if (oldChar.Vowel != 0) /* if we already joined a vowel, we can't join a Hamza */ { return(0); } switch (oldChar.BaseChar) { case LAM: { switch (newChar) { case ALEF: { oldChar.BaseChar = LAM_ALEF; oldChar.NumShapes = 2; retval = 3; break; } case ALEF_HAMZA_ABOVE: { oldChar.BaseChar = LAM_ALEF_HAMZA_ABOVE; oldChar.NumShapes = 2; retval = 3; break; } case ALEF_HAMZA_BELOW: { oldChar.BaseChar = LAM_ALEF_HAMZA_BELOW; oldChar.NumShapes = 2; retval = 3; break; } case ALEF_MADDA_ABOVE: { oldChar.BaseChar = LAM_ALEF_MADDA_ABOVE; oldChar.NumShapes = 2; retval = 3; break; } } break; } case (char)0: { oldChar.BaseChar = newChar; oldChar.NumShapes = ShapeCount(newChar); retval = 1; break; } } return(retval); }
private static int Ligature(char newChar, CharStruct oldChar) { /* 0 == no ligature possible; 2 == two chars; 3 == three chars*/ int retval = 0; if (oldChar.BaseChar == 0) // base char yet is not defined { return(0); } switch (newChar) { case HIRIQ: { switch (oldChar.BaseChar) { case YOD: { oldChar.BaseChar = YOD_HIRIQ; retval = 2; break; } } break; } case PATAH: { switch (oldChar.BaseChar) { case ALEF: { oldChar.BaseChar = ALEF_PATAH; retval = 2; break; } } break; } case QAMATS: { switch (oldChar.BaseChar) { case ALEF: { oldChar.BaseChar = ALEF_QAMATS; retval = 2; break; } } break; } case HOLAM: { switch (oldChar.BaseChar) { case VAV: { oldChar.BaseChar = VAV_HOLAM; retval = 2; break; } } break; } case DAGESH: // == case MAPIQ: { switch (oldChar.BaseChar) { case ALEF: { oldChar.BaseChar = ALEF_MAPIQ; retval = 2; break; } case HE: { oldChar.BaseChar = HE_MAPIQ; retval = 2; break; } case BET: { oldChar.BaseChar = BET_DAGESH; retval = 2; break; } case GIMEL: { oldChar.BaseChar = GIMEL_DAGESH; retval = 2; break; } case DALET: { oldChar.BaseChar = DALET_DAGESH; retval = 2; break; } case VAV: { oldChar.BaseChar = VAV_DAGESH; retval = 2; break; } case ZAYIN: { oldChar.BaseChar = ZAYIN_DAGESH; retval = 2; break; } case TET: { oldChar.BaseChar = TET_DAGESH; retval = 2; break; } case YOD: { oldChar.BaseChar = YOD_DAGESH; retval = 2; break; } case FINAL_KAF: { oldChar.BaseChar = FINAL_KAF_DAGESH; retval = 2; break; } case KAF: { oldChar.BaseChar = KAF_DAGESH; retval = 2; break; } case LAMED: { oldChar.BaseChar = LAMED_DAGESH; retval = 2; break; } case MEM: { oldChar.BaseChar = MEM_DAGESH; retval = 2; break; } case NUN: { oldChar.BaseChar = NUN_DAGESH; retval = 2; break; } case SAMEKH: { oldChar.BaseChar = SAMEKH_DAGESH; retval = 2; break; } case FINAL_PE: { oldChar.BaseChar = FINAL_PE_DAGESH; retval = 2; break; } case PE: { oldChar.BaseChar = PE_DAGESH; retval = 2; break; } case TSADI: { oldChar.BaseChar = TSADI_DAGESH; retval = 2; break; } case QOF: { oldChar.BaseChar = QOF_DAGESH; retval = 2; break; } case RESH: { oldChar.BaseChar = RESH_DAGESH; retval = 2; break; } case SHIN: { oldChar.BaseChar = SHIN_DAGESH; retval = 2; break; } case TAV: { oldChar.BaseChar = TAV_DAGESH; retval = 2; break; } } break; } case SHIN_DOT: { switch (oldChar.BaseChar) { case SHIN: { oldChar.BaseChar = SHIN_SHIN_DOT; retval = 2; break; } case SHIN_DAGESH: { oldChar.BaseChar = SHIN_DAGESH_SHIN_DOT; retval = 3; break; } // 3 CHARS } break; } case SIN_DOT: { switch (oldChar.BaseChar) { case SHIN: { oldChar.BaseChar = SHIN_SIN_DOT; retval = 2; break; } case SHIN_DAGESH: { oldChar.BaseChar = SHIN_DAGESH_SIN_DOT; retval = 3; break; } // 3 CHARS } break; } case RAFE: { switch (oldChar.BaseChar) { case BET: { oldChar.BaseChar = BET_RAFE; retval = 2; break; } case KAF: { oldChar.BaseChar = KAF_RAFE; retval = 2; break; } case PE: { oldChar.BaseChar = PE_RAFE; retval = 2; break; } } break; } case LAMED: { switch (oldChar.BaseChar) { case ALEF: { oldChar.BaseChar = ALEF_LAMED; retval = 2; break; } } break; } } return(retval); }