C# (CSharp) PTBEscapingProcessor 예제들

프로그래밍 언어: C# (CSharp)

클래스/타입: PTBEscapingProcessor

hotexamples.com에서의 예제들: 2

C# (CSharp) PTBEscapingProcessor - 2개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 C# (CSharp)의 PTBEscapingProcessor에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

EscapeString(1)

예제 #1

파일 보기

파일: ReadSentimentDataset.cs 프로젝트: zerouid/Stanford.CoreNLP.NET

        // static class
        public static Tree ConvertTree(IList <int> parentPointers, IList <string> sentence, IDictionary <IList <string>, int> phraseIds, IDictionary <int, double> sentimentScores, PTBEscapingProcessor escaper, int numClasses)
        {
            int maxNode = 0;

            foreach (int parent in parentPointers)
            {
                maxNode = Math.Max(maxNode, parent);
            }
            Tree[] subtrees = new Tree[maxNode + 1];
            for (int i = 0; i < sentence.Count; ++i)
            {
                CoreLabel word = new CoreLabel();
                word.SetValue(sentence[i]);
                Tree leaf = new LabeledScoredTreeNode(word);
                subtrees[i] = new LabeledScoredTreeNode(new CoreLabel());
                subtrees[i].AddChild(leaf);
            }
            for (int i_1 = sentence.Count; i_1 <= maxNode; ++i_1)
            {
                subtrees[i_1] = new LabeledScoredTreeNode(new CoreLabel());
            }
            bool[] connected = new bool[maxNode + 1];
            Tree   root      = null;

            for (int index = 0; index < parentPointers.Count; ++index)
            {
                if (parentPointers[index] == -1)
                {
                    if (root != null)
                    {
                        throw new Exception("Found two roots for sentence " + sentence);
                    }
                    root = subtrees[index];
                }
                else
                {
                    // Walk up the tree structure to make sure that leftmost
                    // phrases are added first.  Otherwise, if the numbers are
                    // inverted, we might get the right phrase added to a parent
                    // first, resulting in "case zero in this", for example,
                    // instead of "in this case zero"
                    // Note that because we keep track of which ones are already
                    // connected, we process this at most once per parent, so the
                    // overall construction time is still efficient.
                    Connect(parentPointers, subtrees, connected, index);
                }
            }
            for (int i_2 = 0; i_2 <= maxNode; ++i_2)
            {
                IList <Tree>   leaves = subtrees[i_2].GetLeaves();
                IList <string> words  = CollectionUtils.TransformAsList(leaves, TransformTreeToWord);
                // First we look for a copy of the phrase with -LRB- -RRB-
                // instead of ().  The sentiment trees sometimes have both, and
                // the escaped versions seem to have more reasonable scores.
                // If a particular phrase doesn't have -LRB- -RRB- we fall back
                // to the unescaped versions.
                int phraseId = phraseIds[CollectionUtils.TransformAsList(words, TransformParens)];
                if (phraseId == null)
                {
                    phraseId = phraseIds[words];
                }
                if (phraseId == null)
                {
                    throw new Exception("Could not find phrase id for phrase " + sentence);
                }
                // TODO: should we make this an option?  Perhaps we want cases
                // where the trees have the phrase id and not their class
                double score = sentimentScores[phraseId];
                if (score == null)
                {
                    throw new Exception("Could not find sentiment score for phrase id " + phraseId);
                }
                // TODO: make this a numClasses option
                int classLabel = Math.Round((float)Math.Floor(score * (float)numClasses));
                if (classLabel > numClasses - 1)
                {
                    classLabel = numClasses - 1;
                }
                subtrees[i_2].Label().SetValue(int.ToString(classLabel));
            }
            for (int i_3 = 0; i_3 < sentence.Count; ++i_3)
            {
                Tree leaf = subtrees[i_3].Children()[0];
                leaf.Label().SetValue(escaper.EscapeString(leaf.Label().Value()));
            }
            for (int i_4 = 0; i_4 < tregexPatterns.Length; ++i_4)
            {
                root = Edu.Stanford.Nlp.Trees.Tregex.Tsurgeon.Tsurgeon.ProcessPattern(tregexPatterns[i_4], tsurgeonPatterns[i_4], root);
            }
            return(root);
        }

예제 #2

파일 보기

파일: ReadSentimentDataset.cs 프로젝트: zerouid/Stanford.CoreNLP.NET

        /// <summary>
        /// This program converts the format of the Sentiment data set
        /// prepared by Richard, Jean, etc.
        /// </summary>
        /// <remarks>
        /// This program converts the format of the Sentiment data set
        /// prepared by Richard, Jean, etc. into trees readable with the
        /// normal TreeReaders.
        /// <br />
        /// An example command line is
        /// <br />
        /// <code>java edu.stanford.nlp.sentiment.ReadSentimentDataset -dictionary stanfordSentimentTreebank/dictionary.txt -sentiment stanfordSentimentTreebank/sentiment_labels.txt -tokens stanfordSentimentTreebank/SOStr.txt -parse stanfordSentimentTreebank/STree.txt  -split stanfordSentimentTreebank/datasetSplit.txt  -train train.txt -dev dev.txt -test test.txt</code>
        /// <br />
        /// The arguments are as follows: <br />
        /// <code>-dictionary</code>, <code>-sentiment</code>,
        /// <code>-tokens</code>, <code>-parse</code>, <code>-split</code>
        /// Path to the corresponding files from the dataset <br />
        /// <code>-train</code>, <code>-dev</code>, <code>-test</code>
        /// Paths for saving the corresponding output files <br />
        /// Each of these arguments is required.
        /// <br />
        /// Macro arguments exist in -inputDir and -outputDir, so you can for example run <br />
        /// <code>java edu.stanford.nlp.sentiment.ReadSentimentDataset -inputDir ../data/sentiment/stanfordSentimentTreebank  -outputDir .</code>
        /// </remarks>
        public static void Main(string[] args)
        {
            string dictionaryFilename = null;
            string sentimentFilename  = null;
            string tokensFilename     = null;
            string parseFilename      = null;
            string splitFilename      = null;
            string trainFilename      = null;
            string devFilename        = null;
            string testFilename       = null;
            int    numClasses         = 5;
            int    argIndex           = 0;

            while (argIndex < args.Length)
            {
                if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-dictionary"))
                {
                    dictionaryFilename = args[argIndex + 1];
                    argIndex          += 2;
                }
                else
                {
                    if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-sentiment"))
                    {
                        sentimentFilename = args[argIndex + 1];
                        argIndex         += 2;
                    }
                    else
                    {
                        if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-tokens"))
                        {
                            tokensFilename = args[argIndex + 1];
                            argIndex      += 2;
                        }
                        else
                        {
                            if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-parse"))
                            {
                                parseFilename = args[argIndex + 1];
                                argIndex     += 2;
                            }
                            else
                            {
                                if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-split"))
                                {
                                    splitFilename = args[argIndex + 1];
                                    argIndex     += 2;
                                }
                                else
                                {
                                    if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-inputDir") || Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-inputDirectory"))
                                    {
                                        dictionaryFilename = args[argIndex + 1] + "/dictionary.txt";
                                        sentimentFilename  = args[argIndex + 1] + "/sentiment_labels.txt";
                                        tokensFilename     = args[argIndex + 1] + "/SOStr.txt";
                                        parseFilename      = args[argIndex + 1] + "/STree.txt";
                                        splitFilename      = args[argIndex + 1] + "/datasetSplit.txt";
                                        argIndex          += 2;
                                    }
                                    else
                                    {
                                        if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-train"))
                                        {
                                            trainFilename = args[argIndex + 1];
                                            argIndex     += 2;
                                        }
                                        else
                                        {
                                            if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-dev"))
                                            {
                                                devFilename = args[argIndex + 1];
                                                argIndex   += 2;
                                            }
                                            else
                                            {
                                                if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-test"))
                                                {
                                                    testFilename = args[argIndex + 1];
                                                    argIndex    += 2;
                                                }
                                                else
                                                {
                                                    if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-outputDir") || Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-outputDirectory"))
                                                    {
                                                        trainFilename = args[argIndex + 1] + "/train.txt";
                                                        devFilename   = args[argIndex + 1] + "/dev.txt";
                                                        testFilename  = args[argIndex + 1] + "/test.txt";
                                                        argIndex     += 2;
                                                    }
                                                    else
                                                    {
                                                        if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-numClasses"))
                                                        {
                                                            numClasses = System.Convert.ToInt32(args[argIndex + 1]);
                                                            argIndex  += 2;
                                                        }
                                                        else
                                                        {
                                                            log.Info("Unknown argument " + args[argIndex]);
                                                            System.Environment.Exit(2);
                                                        }
                                                    }
                                                }
                                            }
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
            }
            // Sentence file is formatted
            //   w1|w2|w3...
            IList <IList <string> > sentences = Generics.NewArrayList();

            foreach (string line in IOUtils.ReadLines(tokensFilename, "utf-8"))
            {
                string[] sentence = line.Split("\\|");
                sentences.Add(Arrays.AsList(sentence));
            }
            // Split and read the phrase ids file.  This file is in the format
            //   w1 w2 w3 ... | id
            IDictionary <IList <string>, int> phraseIds = Generics.NewHashMap();

            foreach (string line_1 in IOUtils.ReadLines(dictionaryFilename, "utf-8"))
            {
                string[] pieces   = line_1.Split("\\|");
                string[] sentence = pieces[0].Split(" ");
                int      id       = int.Parse(pieces[1]);
                phraseIds[Arrays.AsList(sentence)] = id;
            }
            // Split and read the sentiment scores file.  Each line of this
            // file is of the format:
            //   phrasenum | score
            IDictionary <int, double> sentimentScores = Generics.NewHashMap();

            foreach (string line_2 in IOUtils.ReadLines(sentimentFilename, "utf-8"))
            {
                if (line_2.StartsWith("phrase"))
                {
                    continue;
                }
                string[] pieces = line_2.Split("\\|");
                int      id     = int.Parse(pieces[0]);
                double   score  = double.ValueOf(pieces[1]);
                sentimentScores[id] = score;
            }
            // Read lines from the tree structure file.  This is a file of parent pointers for each tree.
            int index = 0;
            PTBEscapingProcessor escaper = new PTBEscapingProcessor();
            IList <Tree>         trees   = Generics.NewArrayList();

            foreach (string line_3 in IOUtils.ReadLines(parseFilename, "utf-8"))
            {
                string[]    pieces         = line_3.Split("\\|");
                IList <int> parentPointers = CollectionUtils.TransformAsList(Arrays.AsList(pieces), null);
                Tree        tree           = ConvertTree(parentPointers, sentences[index], phraseIds, sentimentScores, escaper, numClasses);
                ++index;
                trees.Add(tree);
            }
            IDictionary <int, IList <int> > splits = Generics.NewHashMap();

            splits[1] = Generics.NewArrayList <int>();
            splits[2] = Generics.NewArrayList <int>();
            splits[3] = Generics.NewArrayList <int>();
            foreach (string line_4 in IOUtils.ReadLines(splitFilename, "utf-8"))
            {
                if (line_4.StartsWith("sentence_index"))
                {
                    continue;
                }
                string[] pieces = line_4.Split(",");
                int      treeId = int.Parse(pieces[0]) - 1;
                int      fileId = int.Parse(pieces[1]);
                splits[fileId].Add(treeId);
            }
            WriteTrees(trainFilename, trees, splits[1]);
            WriteTrees(testFilename, trees, splits[2]);
            WriteTrees(devFilename, trees, splits[3]);
        }