public override Tree TransformTree(Tree t, Tree root)
        {
            // Perform tregex-powered annotations
            t = base.TransformTree(t, root);
            string cat = t.Value();

            //Add morphosyntactic features if this is a POS tag
            if (t.IsPreTerminal() && tagSpec != null)
            {
                if (!(t.FirstChild().Label() is CoreLabel) || ((CoreLabel)t.FirstChild().Label()).OriginalText() == null)
                {
                    throw new Exception(string.Format("%s: Term lacks morpho analysis: %s", this.GetType().FullName, t.ToString()));
                }
                string morphoStr = ((CoreLabel)t.FirstChild().Label()).OriginalText();
                Pair <string, string> lemmaMorph = MorphoFeatureSpecification.SplitMorphString(string.Empty, morphoStr);
                MorphoFeatures        feats      = tagSpec.StrToFeatures(lemmaMorph.Second());
                cat = feats.GetTag(cat);
            }
            //Update the label(s)
            t.SetValue(cat);
            if (t.IsPreTerminal() && t.Label() is IHasTag)
            {
                ((IHasTag)t.Label()).SetTag(cat);
            }
            return(t);
        }
        private static void ReplacePOSTags(Tree tree)
        {
            IList <ILabel> yield    = tree.Yield();
            IList <ILabel> preYield = tree.PreTerminalYield();

            System.Diagnostics.Debug.Assert(yield.Count == preYield.Count);
            MorphoFeatureSpecification spec = new FrenchMorphoFeatureSpecification();

            for (int i = 0; i < yield.Count; i++)
            {
                // Morphological Analysis
                string morphStr = ((CoreLabel)yield[i]).OriginalText();
                if (morphStr == null || morphStr.Equals(string.Empty))
                {
                    morphStr = preYield[i].Value();
                    // POS subcategory
                    string subCat = ((CoreLabel)yield[i]).Category();
                    if (subCat != null && subCat != string.Empty)
                    {
                        morphStr += "-" + subCat + "--";
                    }
                    else
                    {
                        morphStr += "---";
                    }
                }
                MorphoFeatures feats = spec.StrToFeatures(morphStr);
                if (feats.GetAltTag() != null && !feats.GetAltTag().Equals(string.Empty))
                {
                    CoreLabel cl = (CoreLabel)preYield[i];
                    cl.SetValue(feats.GetAltTag());
                    cl.SetTag(feats.GetAltTag());
                }
            }
        }
 /// <summary>For debugging</summary>
 /// <param name="args"/>
 public static void Main(string[] args)
 {
     if (args.Length != 1)
     {
         System.Console.Error.Printf("Usage: java %s file%n", typeof(FrenchMorphoFeatureSpecification).FullName);
         System.Environment.Exit(-1);
     }
     try
     {
         BufferedReader             br  = new BufferedReader(new FileReader(args[0]));
         MorphoFeatureSpecification mfs = new FrenchMorphoFeatureSpecification();
         //Activate all features for debugging
         mfs.Activate(MorphoFeatureSpecification.MorphoFeatureType.Gen);
         mfs.Activate(MorphoFeatureSpecification.MorphoFeatureType.Num);
         mfs.Activate(MorphoFeatureSpecification.MorphoFeatureType.Per);
         for (string line; (line = br.ReadLine()) != null;)
         {
             MorphoFeatures feats = mfs.StrToFeatures(line);
             System.Console.Out.Printf("%s\t%s%n", line.Trim(), feats.ToString());
         }
         br.Close();
     }
     catch (FileNotFoundException e)
     {
         Sharpen.Runtime.PrintStackTrace(e);
     }
     catch (IOException e)
     {
         Sharpen.Runtime.PrintStackTrace(e);
     }
 }
 private void ProcessInflectionalFeaturesHelper(MorphoFeatures feats, string spec)
 {
     if (IsActive(MorphoFeatureSpecification.MorphoFeatureType.Gen))
     {
         if (spec.Contains("M"))
         {
             feats.AddFeature(MorphoFeatureSpecification.MorphoFeatureType.Gen, genVals[0]);
         }
         else
         {
             if (spec.Contains("F"))
             {
                 feats.AddFeature(MorphoFeatureSpecification.MorphoFeatureType.Gen, genVals[1]);
             }
         }
     }
     if (IsActive(MorphoFeatureSpecification.MorphoFeatureType.Num))
     {
         if (spec.EndsWith("S"))
         {
             feats.AddFeature(MorphoFeatureSpecification.MorphoFeatureType.Num, numVals[0]);
         }
         else
         {
             if (spec.EndsWith("D"))
             {
                 feats.AddFeature(MorphoFeatureSpecification.MorphoFeatureType.Num, numVals[1]);
             }
             else
             {
                 if (spec.EndsWith("P"))
                 {
                     feats.AddFeature(MorphoFeatureSpecification.MorphoFeatureType.Num, numVals[2]);
                 }
             }
         }
     }
     if (IsActive(MorphoFeatureSpecification.MorphoFeatureType.Per))
     {
         if (spec.Contains("1"))
         {
             feats.AddFeature(MorphoFeatureSpecification.MorphoFeatureType.Per, perVals[0]);
         }
         else
         {
             if (spec.Contains("2"))
             {
                 feats.AddFeature(MorphoFeatureSpecification.MorphoFeatureType.Per, perVals[1]);
             }
             else
             {
                 if (spec.Contains("3"))
                 {
                     feats.AddFeature(MorphoFeatureSpecification.MorphoFeatureType.Per, perVals[2]);
                 }
             }
         }
     }
 }
        /// <summary>Extract features from a standard phi feature specification.</summary>
        /// <param name="feats"/>
        /// <param name="spec"/>
        private void ProcessInflectionalFeatures(MorphoFeatures feats, string spec)
        {
            // Extract the feature tuple
            Matcher m = pFeatureTuple.Matcher(spec);

            if (m.Find())
            {
                spec = m.Group(1);
                ProcessInflectionalFeaturesHelper(feats, spec);
            }
        }
        private static void ReplacePOSTag(Tree t, MorphoFeatureSpecification morpho)
        {
            if (!t.IsPreTerminal())
            {
                throw new ArgumentException("Can only operate on preterminals");
            }
            if (!(t.Label() is CoreLabel))
            {
                throw new ArgumentException("Only operates on CoreLabels");
            }
            CoreLabel label = (CoreLabel)t.Label();
            Tree      child = t.Children()[0];

            if (!(child.Label() is CoreLabel))
            {
                throw new ArgumentException("Only operates on CoreLabels");
            }
            CoreLabel childLabel = (CoreLabel)child.Label();
            // Morphological Analysis
            string morphStr = childLabel.OriginalText();

            if (morphStr == null || morphStr.Equals(string.Empty))
            {
                morphStr = label.Value();
                // POS subcategory
                string subCat = childLabel.Category();
                if (subCat != null && subCat != string.Empty)
                {
                    morphStr += "-" + subCat + "--";
                }
                else
                {
                    morphStr += "---";
                }
            }
            MorphoFeatures feats = morpho.StrToFeatures(morphStr);

            if (feats.GetAltTag() != null && !feats.GetAltTag().Equals(string.Empty))
            {
                label.SetValue(feats.GetAltTag());
                label.SetTag(feats.GetAltTag());
            }
        }
Ejemplo n.º 7
0
        /// <summary>First map to the LDC short tags.</summary>
        /// <remarks>
        /// First map to the LDC short tags. Then map to the Universal POS. Then add
        /// morphological annotations.
        /// </remarks>
        public override string Map(string posTag, string terminal)
        {
            string rawTag   = posTag.Trim();
            string shortTag = tagsToEscape.Contains(rawTag) ? rawTag : tagMap[rawTag];

            if (shortTag == null)
            {
                System.Console.Error.Printf("%s: No LDC shortened tag for %s%n", this.GetType().FullName, rawTag);
                return(rawTag);
            }
            string universalTag = universalMap[shortTag];

            if (!universalMap.Contains(shortTag))
            {
                System.Console.Error.Printf("%s: No universal tag for LDC tag %s%n", this.GetType().FullName, shortTag);
                universalTag = shortTag;
            }
            MorphoFeatures feats         = new MorphoFeatures(morphoSpec.StrToFeatures(rawTag));
            string         functionalTag = feats.GetTag(universalTag);

            return(functionalTag);
        }
        /// <summary>For debugging.</summary>
        /// <remarks>
        /// For debugging. Converts a set of long tags (BAMA analyses as in the ATB) to their morpho
        /// feature specification. The input file should have one long tag per line.
        /// </remarks>
        /// <param name="args"/>
        public static void Main(string[] args)
        {
            if (args.Length != 2)
            {
                System.Console.Error.Printf("Usage: java %s filename feats%n", typeof(ArabicMorphoFeatureSpecification).FullName);
                System.Environment.Exit(-1);
            }
            MorphoFeatureSpecification fSpec = new ArabicMorphoFeatureSpecification();

            string[] feats = args[1].Split(",");
            foreach (string feat in feats)
            {
                MorphoFeatureSpecification.MorphoFeatureType fType = MorphoFeatureSpecification.MorphoFeatureType.ValueOf(feat);
                fSpec.Activate(fType);
            }
            File fName = new File(args[0]);

            try
            {
                BufferedReader br    = new BufferedReader(new InputStreamReader(new FileInputStream(fName)));
                int            nLine = 0;
                for (string line; (line = br.ReadLine()) != null; nLine++)
                {
                    MorphoFeatures mFeats = fSpec.StrToFeatures(line.Trim());
                    System.Console.Out.Printf("%s\t%s%n", line.Trim(), mFeats.ToString());
                }
                br.Close();
                System.Console.Out.Printf("%nRead %d lines%n", nLine);
            }
            catch (FileNotFoundException e)
            {
                Sharpen.Runtime.PrintStackTrace(e);
            }
            catch (IOException e)
            {
                Sharpen.Runtime.PrintStackTrace(e);
            }
        }
        public override MorphoFeatures StrToFeatures(string spec)
        {
            MorphoFeatures feats = new MorphoFeatures();

            //Usually this is the boundary symbol
            if (spec == null || spec.Equals(string.Empty))
            {
                return(feats);
            }
            bool isOtherActive = IsActive(MorphoFeatureSpecification.MorphoFeatureType.Other);

            if (spec.StartsWith("ADV"))
            {
                feats.SetAltTag("ADV");
                if (spec.Contains("int"))
                {
                    if (isOtherActive)
                    {
                        feats.AddFeature(MorphoFeatureSpecification.MorphoFeatureType.Other, "advint");
                    }
                    feats.SetAltTag("ADVWH");
                }
            }
            else
            {
                if (spec.StartsWith("A"))
                {
                    feats.SetAltTag("ADJ");
                    if (spec.Contains("int"))
                    {
                        if (isOtherActive)
                        {
                            feats.AddFeature(MorphoFeatureSpecification.MorphoFeatureType.Other, "adjint");
                        }
                        feats.SetAltTag("ADJWH");
                    }
                    AddPhiFeatures(feats, spec);
                }
                else
                {
                    if (spec.Equals("CC") || spec.Equals("C-C"))
                    {
                        if (isOtherActive)
                        {
                            feats.AddFeature(MorphoFeatureSpecification.MorphoFeatureType.Other, "Cc");
                        }
                        feats.SetAltTag("CC");
                    }
                    else
                    {
                        if (spec.Equals("CS") || spec.Equals("C-S"))
                        {
                            if (isOtherActive)
                            {
                                feats.AddFeature(MorphoFeatureSpecification.MorphoFeatureType.Other, "Cs");
                            }
                            feats.SetAltTag("CS");
                        }
                        else
                        {
                            if (spec.StartsWith("CL"))
                            {
                                feats.SetAltTag("CL");
                                if (spec.Contains("suj") || spec.Equals("CL-S-3fp"))
                                {
                                    //"CL-S-3fp" is equivalent to suj
                                    if (isOtherActive)
                                    {
                                        feats.AddFeature(MorphoFeatureSpecification.MorphoFeatureType.Other, "Sbj");
                                    }
                                    feats.SetAltTag("CLS");
                                }
                                else
                                {
                                    if (spec.Contains("obj"))
                                    {
                                        if (isOtherActive)
                                        {
                                            feats.AddFeature(MorphoFeatureSpecification.MorphoFeatureType.Other, "Obj");
                                        }
                                        feats.SetAltTag("CLO");
                                    }
                                    else
                                    {
                                        if (spec.Contains("refl"))
                                        {
                                            if (isOtherActive)
                                            {
                                                feats.AddFeature(MorphoFeatureSpecification.MorphoFeatureType.Other, "Rfl");
                                            }
                                            feats.SetAltTag("CLR");
                                        }
                                    }
                                }
                                AddPhiFeatures(feats, spec);
                            }
                            else
                            {
                                if (spec.StartsWith("D"))
                                {
                                    feats.SetAltTag("DET");
                                    if (spec.Contains("int"))
                                    {
                                        if (isOtherActive)
                                        {
                                            feats.AddFeature(MorphoFeatureSpecification.MorphoFeatureType.Other, "dint");
                                        }
                                        feats.SetAltTag("DETWH");
                                    }
                                    AddPhiFeatures(feats, spec);
                                }
                                else
                                {
                                    if (spec.StartsWith("N"))
                                    {
                                        feats.SetAltTag("N");
                                        //TODO These are usually N-card...make these CD?
                                        if (spec.Contains("P"))
                                        {
                                            if (isOtherActive)
                                            {
                                                feats.AddFeature(MorphoFeatureSpecification.MorphoFeatureType.Other, "Np");
                                            }
                                            feats.SetAltTag("NPP");
                                        }
                                        else
                                        {
                                            if (spec.Contains("C"))
                                            {
                                                if (isOtherActive)
                                                {
                                                    feats.AddFeature(MorphoFeatureSpecification.MorphoFeatureType.Other, "Nc");
                                                }
                                                feats.SetAltTag("NC");
                                            }
                                        }
                                        AddPhiFeatures(feats, spec);
                                    }
                                    else
                                    {
                                        if (spec.StartsWith("PRO"))
                                        {
                                            feats.SetAltTag("PRO");
                                            if (spec.Contains("int"))
                                            {
                                                if (isOtherActive)
                                                {
                                                    feats.AddFeature(MorphoFeatureSpecification.MorphoFeatureType.Other, "Ni");
                                                }
                                                feats.SetAltTag("PROWH");
                                            }
                                            else
                                            {
                                                if (spec.Contains("rel"))
                                                {
                                                    if (isOtherActive)
                                                    {
                                                        feats.AddFeature(MorphoFeatureSpecification.MorphoFeatureType.Other, "Nr");
                                                    }
                                                    feats.SetAltTag("PROREL");
                                                }
                                            }
                                            AddPhiFeatures(feats, spec);
                                        }
                                        else
                                        {
                                            if (spec.StartsWith("V"))
                                            {
                                                feats.SetAltTag("V");
                                                if (spec.Contains("Y"))
                                                {
                                                    if (isOtherActive)
                                                    {
                                                        feats.AddFeature(MorphoFeatureSpecification.MorphoFeatureType.Other, "Vp");
                                                    }
                                                    feats.SetAltTag("VIMP");
                                                }
                                                else
                                                {
                                                    if (spec.Contains("W"))
                                                    {
                                                        if (isOtherActive)
                                                        {
                                                            feats.AddFeature(MorphoFeatureSpecification.MorphoFeatureType.Other, "Vf");
                                                        }
                                                        feats.SetAltTag("VINF");
                                                    }
                                                    else
                                                    {
                                                        if (spec.Contains("S") || spec.Contains("T"))
                                                        {
                                                            if (isOtherActive)
                                                            {
                                                                feats.AddFeature(MorphoFeatureSpecification.MorphoFeatureType.Other, "Vs");
                                                            }
                                                            feats.SetAltTag("VS");
                                                        }
                                                        else
                                                        {
                                                            if (spec.Contains("K"))
                                                            {
                                                                if (isOtherActive)
                                                                {
                                                                    feats.AddFeature(MorphoFeatureSpecification.MorphoFeatureType.Other, "Vp");
                                                                }
                                                                feats.SetAltTag("VPP");
                                                            }
                                                            else
                                                            {
                                                                if (spec.Contains("G"))
                                                                {
                                                                    if (isOtherActive)
                                                                    {
                                                                        feats.AddFeature(MorphoFeatureSpecification.MorphoFeatureType.Other, "Vr");
                                                                    }
                                                                    feats.SetAltTag("VPR");
                                                                }
                                                            }
                                                        }
                                                    }
                                                }
                                                AddPhiFeatures(feats, spec);
                                            }
                                            else
                                            {
                                                if (spec.Equals("P") || spec.Equals("I"))
                                                {
                                                    feats.SetAltTag(spec);
                                                }
                                            }
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
            }
            //    else {
            //      log.info("Could not map spec: " + spec);
            //    }
            return(feats);
        }
        private void AddPhiFeatures(MorphoFeatures feats, string spec)
        {
            string[] toks = spec.Split("\\-+");
            string   morphStr;

            if (toks.Length == 3 && toks[0].Equals("PRO") && toks[2].Equals("neg"))
            {
                morphStr = toks[1];
            }
            else
            {
                morphStr = toks[toks.Length - 1];
            }
            //wsg2011: The analyses have mixed casing....
            morphStr = morphStr.ToLower();
            if (IsActive(MorphoFeatureSpecification.MorphoFeatureType.Gen))
            {
                if (morphStr.Contains("m"))
                {
                    feats.AddFeature(MorphoFeatureSpecification.MorphoFeatureType.Gen, genVals[0]);
                }
                else
                {
                    if (morphStr.Contains("f"))
                    {
                        feats.AddFeature(MorphoFeatureSpecification.MorphoFeatureType.Gen, genVals[1]);
                    }
                }
            }
            if (IsActive(MorphoFeatureSpecification.MorphoFeatureType.Per))
            {
                if (morphStr.Contains("1"))
                {
                    feats.AddFeature(MorphoFeatureSpecification.MorphoFeatureType.Per, perVals[0]);
                }
                else
                {
                    if (morphStr.Contains("2"))
                    {
                        feats.AddFeature(MorphoFeatureSpecification.MorphoFeatureType.Per, perVals[1]);
                    }
                    else
                    {
                        if (morphStr.Contains("3"))
                        {
                            feats.AddFeature(MorphoFeatureSpecification.MorphoFeatureType.Per, perVals[2]);
                        }
                    }
                }
            }
            if (IsActive(MorphoFeatureSpecification.MorphoFeatureType.Num))
            {
                if (morphStr.Contains("s"))
                {
                    feats.AddFeature(MorphoFeatureSpecification.MorphoFeatureType.Num, numVals[0]);
                }
                else
                {
                    if (morphStr.Contains("p"))
                    {
                        feats.AddFeature(MorphoFeatureSpecification.MorphoFeatureType.Num, numVals[1]);
                    }
                }
            }
        }
Ejemplo n.º 11
0
        /// <summary>Convert token to a sequence of datums and add to iobList.</summary>
        /// <param name="iobList"/>
        /// <param name="token"/>
        /// <param name="tokType"/>
        /// <param name="tokenLabel"/>
        /// <param name="lastToken"/>
        /// <param name="applyRewriteRules"/>
        /// <param name="tf">a TokenizerFactory returning ArabicTokenizers (for determining original segment boundaries)</param>
        /// <param name="origText">the original string before tokenization (for determining original segment boundaries)</param>
        private static void TokenToDatums(IList <CoreLabel> iobList, CoreLabel cl, string token, IOBUtils.TokenType tokType, CoreLabel tokenLabel, string lastToken, bool applyRewriteRules, bool stripRewrites, ITokenizerFactory <CoreLabel> tf, string origText
                                          )
        {
            if (token.IsEmpty())
            {
                return;
            }
            string lastLabel        = ContinuationSymbol;
            string firstLabel       = BeginSymbol;
            string rewritten        = cl.Get(typeof(ArabicDocumentReaderAndWriter.RewrittenArabicAnnotation));
            bool   crossRefRewrites = true;

            if (rewritten == null)
            {
                rewritten        = token;
                crossRefRewrites = false;
            }
            else
            {
                rewritten = StripSegmentationMarkers(rewritten, tokType);
            }
            if (applyRewriteRules)
            {
                // Apply Arabic-specific re-write rules
                string rawToken = tokenLabel.Word();
                string tag      = tokenLabel.Tag();
                MorphoFeatureSpecification featureSpec = new ArabicMorphoFeatureSpecification();
                featureSpec.Activate(MorphoFeatureSpecification.MorphoFeatureType.Ngen);
                featureSpec.Activate(MorphoFeatureSpecification.MorphoFeatureType.Nnum);
                featureSpec.Activate(MorphoFeatureSpecification.MorphoFeatureType.Def);
                featureSpec.Activate(MorphoFeatureSpecification.MorphoFeatureType.Tense);
                MorphoFeatures features = featureSpec.StrToFeatures(tag);
                // Rule #1 : ت --> ة
                if (features.GetValue(MorphoFeatureSpecification.MorphoFeatureType.Ngen).Equals("F") && features.GetValue(MorphoFeatureSpecification.MorphoFeatureType.Nnum).Equals("SG") && rawToken.EndsWith("ت-") && !stripRewrites)
                {
                    lastLabel = RewriteSymbol;
                }
                else
                {
                    if (rawToken.EndsWith("ة-"))
                    {
                        System.Diagnostics.Debug.Assert(token.EndsWith("ة"));
                        token     = Sharpen.Runtime.Substring(token, 0, token.Length - 1) + "ت";
                        lastLabel = RewriteSymbol;
                    }
                }
                // Rule #2 : لل --> ل ال
                if (lastToken.Equals("ل") && features.GetValue(MorphoFeatureSpecification.MorphoFeatureType.Def).Equals("D"))
                {
                    if (rawToken.StartsWith("-ال"))
                    {
                        if (!token.StartsWith("ا"))
                        {
                            log.Info("Bad REWAL: " + rawToken + " / " + token);
                        }
                        token     = Sharpen.Runtime.Substring(token, 1);
                        rewritten = Sharpen.Runtime.Substring(rewritten, 1);
                        if (!stripRewrites)
                        {
                            firstLabel = RewriteSymbol;
                        }
                    }
                    else
                    {
                        if (rawToken.StartsWith("-ل"))
                        {
                            if (!token.StartsWith("ل"))
                            {
                                log.Info("Bad REWAL: " + rawToken + " / " + token);
                            }
                            if (!stripRewrites)
                            {
                                firstLabel = RewriteSymbol;
                            }
                        }
                        else
                        {
                            log.Info("Ignoring REWAL: " + rawToken + " / " + token);
                        }
                    }
                }
                // Rule #3 : ي --> ى
                // Rule #4 : ا --> ى
                if (rawToken.EndsWith("ى-"))
                {
                    if (features.GetValue(MorphoFeatureSpecification.MorphoFeatureType.Tense) != null)
                    {
                        // verb: ى becomes ا
                        token = Sharpen.Runtime.Substring(token, 0, token.Length - 1) + "ا";
                    }
                    else
                    {
                        // assume preposition:
                        token = Sharpen.Runtime.Substring(token, 0, token.Length - 1) + "ي";
                    }
                    if (!stripRewrites)
                    {
                        lastLabel = RewriteSymbol;
                    }
                }
                else
                {
                    if (rawToken.Equals("علي-") || rawToken.Equals("-علي-"))
                    {
                        if (!stripRewrites)
                        {
                            lastLabel = RewriteSymbol;
                        }
                    }
                }
            }
            string origWord;

            if (origText == null)
            {
                origWord = tokenLabel.Word();
            }
            else
            {
                origWord = Sharpen.Runtime.Substring(origText, cl.BeginPosition(), cl.EndPosition());
            }
            int origIndex = 0;

            while (origIndex < origWord.Length && IsDeletedCharacter(origWord[origIndex], tf))
            {
                ++origIndex;
            }
            // Create datums and add to iobList
            if (token.IsEmpty())
            {
                log.Info("Rewriting resulted in empty token: " + tokenLabel.Word());
            }
            string firstChar = token[0].ToString();

            // Start at 0 to make sure we include the whole token according to the tokenizer
            iobList.Add(CreateDatum(cl, firstChar, firstLabel, 0, origIndex + 1));
            int numChars = token.Length;

            if (crossRefRewrites && rewritten.Length != numChars)
            {
                System.Console.Error.Printf("Rewritten annotation doesn't have correct length: %s>>>%s%n", token, rewritten);
                crossRefRewrites = false;
            }
            ++origIndex;
            for (int j = 1; j < numChars; ++j, ++origIndex)
            {
                while (origIndex < origWord.Length && IsDeletedCharacter(origWord[origIndex], tf))
                {
                    ++origIndex;
                }
                if (origIndex >= origWord.Length)
                {
                    origIndex = origWord.Length - 1;
                }
                string charLabel = (j == numChars - 1) ? lastLabel : ContinuationSymbol;
                string thisChar  = token[j].ToString();
                if (crossRefRewrites && !rewritten[j].ToString().Equals(thisChar))
                {
                    charLabel = RewriteSymbol;
                }
                if (charLabel == ContinuationSymbol && thisChar.Equals("ى") && j != numChars - 1)
                {
                    charLabel = RewriteSymbol;
                }
                // Assume all mid-word alef maqsura are supposed to be yah
                iobList.Add(CreateDatum(cl, thisChar, charLabel, origIndex, origIndex + 1));
            }
            // End at endPosition to make sure we include the whole token according to the tokenizer
            if (!iobList.IsEmpty())
            {
                iobList[iobList.Count - 1].SetEndPosition(cl.EndPosition());
            }
        }