コード例 #1
0
        /// <summary>Converts a tree to the Morfette training format.</summary>
        private static string TreeToMorfette(Tree tree)
        {
            StringBuilder  sb       = new StringBuilder();
            IList <ILabel> yield    = tree.Yield();
            IList <ILabel> tagYield = tree.PreTerminalYield();

            System.Diagnostics.Debug.Assert(yield.Count == tagYield.Count);
            int listLen = yield.Count;

            for (int i = 0; i < listLen; ++i)
            {
                CoreLabel token    = (CoreLabel)yield[i];
                CoreLabel tag      = (CoreLabel)tagYield[i];
                string    morphStr = token.OriginalText();
                if (morphStr == null || morphStr.Equals(string.Empty))
                {
                    morphStr = tag.Value();
                }
                string lemma = token.Lemma();
                if (lemma == null || lemma.Equals(string.Empty))
                {
                    lemma = token.Value();
                }
                sb.Append(string.Format("%s %s %s%n", token.Value(), lemma, morphStr));
            }
            return(sb.ToString());
        }
コード例 #2
0
        /// <param name="args"/>
        public static void Main(string[] args)
        {
            if (args.Length != 1)
            {
                System.Console.Error.Printf("Usage: java %s tree_file%n", typeof(TreeToMorfette).FullName);
                System.Environment.Exit(-1);
            }
            string             treeFile = args[0];
            ITreeReaderFactory trf      = new FrenchTreeReaderFactory();

            try
            {
                ITreeReader tr = trf.NewTreeReader(new BufferedReader(new InputStreamReader(new FileInputStream(treeFile), "UTF-8")));
                for (Tree tree1; (tree1 = tr.ReadTree()) != null;)
                {
                    IList <ILabel> pretermYield = tree1.PreTerminalYield();
                    IList <ILabel> yield        = tree1.Yield();
                    int            yieldLen     = yield.Count;
                    for (int i = 0; i < yieldLen; ++i)
                    {
                        CoreLabel             rawToken   = (CoreLabel)yield[i];
                        string                word       = rawToken.Value();
                        string                morphStr   = rawToken.OriginalText();
                        Pair <string, string> lemmaMorph = MorphoFeatureSpecification.SplitMorphString(word, morphStr);
                        string                lemma      = lemmaMorph.First();
                        string                morph      = lemmaMorph.Second();
                        if (morph == null || morph.Equals(string.Empty) || morph.Equals("XXX"))
                        {
                            morph = ((CoreLabel)pretermYield[i]).Value();
                        }
                        System.Console.Out.Printf("%s %s %s%n", word, lemma, morph);
                    }
                    System.Console.Out.WriteLine();
                }
                tr.Close();
            }
            catch (UnsupportedEncodingException e)
            {
                Sharpen.Runtime.PrintStackTrace(e);
            }
            catch (FileNotFoundException e)
            {
                Sharpen.Runtime.PrintStackTrace(e);
            }
            catch (IOException e)
            {
                Sharpen.Runtime.PrintStackTrace(e);
            }
        }
コード例 #3
0
        private static void ReplacePOSTag(Tree t, MorphoFeatureSpecification morpho)
        {
            if (!t.IsPreTerminal())
            {
                throw new ArgumentException("Can only operate on preterminals");
            }
            if (!(t.Label() is CoreLabel))
            {
                throw new ArgumentException("Only operates on CoreLabels");
            }
            CoreLabel label = (CoreLabel)t.Label();
            Tree      child = t.Children()[0];

            if (!(child.Label() is CoreLabel))
            {
                throw new ArgumentException("Only operates on CoreLabels");
            }
            CoreLabel childLabel = (CoreLabel)child.Label();
            // Morphological Analysis
            string morphStr = childLabel.OriginalText();

            if (morphStr == null || morphStr.Equals(string.Empty))
            {
                morphStr = label.Value();
                // POS subcategory
                string subCat = childLabel.Category();
                if (subCat != null && subCat != string.Empty)
                {
                    morphStr += "-" + subCat + "--";
                }
                else
                {
                    morphStr += "---";
                }
            }
            MorphoFeatures feats = morpho.StrToFeatures(morphStr);

            if (feats.GetAltTag() != null && !feats.GetAltTag().Equals(string.Empty))
            {
                label.SetValue(feats.GetAltTag());
                label.SetTag(feats.GetAltTag());
            }
        }
コード例 #4
0
        public static void MungeLeaves(Tree tree, bool lemmasAsLeaves, bool addMorphoToLeaves)
        {
            IList <ILabel> labels = tree.Yield();

            foreach (ILabel label in labels)
            {
                ++nTokens;
                if (!(label is CoreLabel))
                {
                    throw new ArgumentException("Only works with CoreLabels trees");
                }
                CoreLabel coreLabel = (CoreLabel)label;
                string    lemma     = coreLabel.Lemma();
                //PTB escaping since we're going to put this in the leaf
                if (lemma == null)
                {
                    // No lemma, so just add the surface form
                    lemma = coreLabel.Word();
                }
                else
                {
                    if (lemma.Equals("("))
                    {
                        lemma = "-LRB-";
                    }
                    else
                    {
                        if (lemma.Equals(")"))
                        {
                            lemma = "-RRB-";
                        }
                    }
                }
                if (lemmasAsLeaves)
                {
                    string escapedLemma = lemma;
                    coreLabel.SetWord(escapedLemma);
                    coreLabel.SetValue(escapedLemma);
                    coreLabel.SetLemma(lemma);
                }
                if (addMorphoToLeaves)
                {
                    string morphStr = coreLabel.OriginalText();
                    if (morphStr == null || morphStr.Equals(string.Empty))
                    {
                        morphStr = MorphoFeatureSpecification.NoAnalysis;
                    }
                    else
                    {
                        ++nMorphAnalyses;
                    }
                    // Normalize punctuation analyses
                    if (morphStr.StartsWith("PONCT"))
                    {
                        morphStr = "PUNC";
                    }
                    string newLeaf = string.Format("%s%s%s%s%s", coreLabel.Value(), MorphoFeatureSpecification.MorphoMark, lemma, MorphoFeatureSpecification.LemmaMark, morphStr);
                    coreLabel.SetValue(newLeaf);
                    coreLabel.SetWord(newLeaf);
                }
            }
        }