상속: java.io.Reader
예제 #1
0
        static void Main()
        {
            // Path to models extracted from `stanford-parser-3.6.0-models.jar`
            var jarRoot = @"..\..\..\..\paket-files\nlp.stanford.edu\stanford-parser-full-2016-10-31\models\";
            var modelsDirectory = jarRoot + @"\edu\stanford\nlp\models";

            // Loading english PCFG parser from file
            var lp = LexicalizedParser.loadModel(modelsDirectory + @"\lexparser\englishPCFG.ser.gz");

            // This sample shows parsing a list of correctly tokenized words
            var sent = new[] { "This", "is", "an", "easy", "sentence", "." };
            var rawWords = SentenceUtils.toCoreLabelList(sent);
            var tree = lp.apply(rawWords);
            tree.pennPrint();

            // This option shows loading and using an explicit tokenizer
            var sent2 = "This is another sentence.";
            var tokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(), "");
            var sent2Reader = new StringReader(sent2);
            var rawWords2 = tokenizerFactory.getTokenizer(sent2Reader).tokenize();
            sent2Reader.close();
            var tree2 = lp.apply(rawWords2);

            // Extract dependencies from lexical tree
            var tlp = new PennTreebankLanguagePack();
            var gsf = tlp.grammaticalStructureFactory();
            var gs = gsf.newGrammaticalStructure(tree2);
            var tdl = gs.typedDependenciesCCprocessed();
            Console.WriteLine("\n{0}\n", tdl);

            // Extract collapsed dependencies from parsed tree
            var tp = new TreePrint("penn,typedDependenciesCollapsed");
            tp.printTree(tree2);
        }
        public string parse(string sentence)
        {
            string parsedout = "";
            // This option shows loading and using an explicit tokenizer
            var sent2 = sentence;
            var tokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(), "");
            var sent2Reader = new StringReader(sent2);
            var rawWords2 = tokenizerFactory.getTokenizer(sent2Reader).tokenize();
            var parse = _sdpModel.apply(rawWords2);

            var tlp = new PennTreebankLanguagePack();
            var gsf = tlp.grammaticalStructureFactory();
            var gs = gsf.newGrammaticalStructure(parse);
            var tdl = gs.typedDependenciesCCprocessed();

            System.Console.WriteLine();
            for (var it = tdl.iterator(); it.hasNext(); )
            {
                parsedout = parsedout + "\n" +it.next();
            }
                //System.Console.WriteLine("{0}", it.next());
            //System.Console.WriteLine();

            //var tp = new TreePrint("penn,typedDependenciesCollapsed");
           
            return parsedout;
            
        }
예제 #3
0
        /// <summary>
        /// Convert a Molfile into a CDK AtomContainer (e.g. Molecule)
        /// </summary>
        /// <param name="molfile"></param>
        /// <returns></returns>

        public static IAtomContainer MolfileToAtomContainer(string molfile)
        {
            // Do basic read step

            if (Lex.Contains(molfile, "V2000"))
            {
                cdk.io.DefaultChemObjectReader cor;

                java.io.StringReader sr = new java.io.StringReader(molfile);
                cor = new MDLV2000Reader(sr);
                cor.setReaderMode(IChemObjectReader.Mode.RELAXED);

                IAtomContainer mol = (IAtomContainer)cor.read(new AtomContainer());
                cor.close();

                ConfigureAtomContainer(mol);
                return(mol);
            }


            else if (Lex.Contains(molfile, "V3000"))
            {
                return(MolfileV3000ToAtomContainer(molfile));
            }

            else
            {
                throw new Exception("Unrecognized molfile format");
            }
        }
예제 #4
0
        //use Stanford.NLP.Net to parse the sentence
        static Tree Parse(string sent)
        {
            // Loading english PCFG parser from file
            var lp = LexicalizedParser.loadModel(modelsDirectory + "\\lexparser\\englishPCFG.ser.gz");

            var tokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(), "");
            var sentReader       = new java.io.StringReader(sent);
            var rawWords         = tokenizerFactory.getTokenizer(sentReader).tokenize();

            sentReader.close();
            var tree = lp.apply(rawWords);

            // Extract dependencies from lexical tree
            var tlp = new PennTreebankLanguagePack();
            var gsf = tlp.grammaticalStructureFactory();
            var gs  = gsf.newGrammaticalStructure(tree);
            var tdl = gs.typedDependenciesCCprocessed();

            // Extract collapsed dependencies from parsed tree
            //var tp = new TreePrint("penn,typedDependenciesCollapsed");
            var tp = new TreePrint("penn");

            tp.printTree(tree);

            return(tree);
        }
예제 #5
0
        /// <summary>
        /// MolfileV3000ToAtomContainer
        /// </summary>
        /// <param name="molfile"></param>
        /// <returns></returns>
        public static IAtomContainer MolfileV3000ToAtomContainer(string molfile)
        {
            // Extract any mass info before conversion to avoid losing our custom info in conversion
            Dictionary <int, int> map = new Dictionary <int, int>();

            if (molfile.Contains(" MASS="))
            {
                map = ExtractMassAttributes(ref molfile);
            }

            cdk.io.DefaultChemObjectReader cor;
            java.io.StringReader           sr = new java.io.StringReader(molfile);
            cor = new MDLV3000Reader(sr);
            cor.setReaderMode(IChemObjectReader.Mode.RELAXED);

            IAtomContainer mol = (IAtomContainer)cor.read(new AtomContainer());

            cor.close();

            for (int ai = 0; ai < mol.getAtomCount(); ai++)
            {
                IAtom a = mol.getAtom(ai);
                if (map.ContainsKey(ai + 1))
                {
                    a.setMassNumber(new java.lang.Integer(map[ai + 1]));
                }
                else
                {
                    a.setMassNumber(null);
                }
            }

            ConfigureAtomContainer(mol);
            return(mol);
        }
예제 #6
0
        Tree parseSentence(string sentence)
        {
            var sentenceReader = new StringReader(sentence);
            var result = _tokenizerFactory.getTokenizer(sentenceReader).tokenize();
            sentenceReader.close();

            var tree = _parser.parse(result);
            return tree;
        }
    // Test the classification result of each map that a user played,
    // with the data available as if they were playing through it
    public static void classifyTest(String dataString, String playerID)
    {
        String results = "";
        try {
            java.io.StringReader stringReader = new java.io.StringReader(dataString);
            java.io.BufferedReader buffReader = new java.io.BufferedReader(stringReader);

            /* NOTE THAT FOR NAIVE BAYES ALL WEIGHTS CAN BE = 1*/
            //weka.core.converters.ConverterUtils.DataSource source = new weka.core.converters.ConverterUtils.DataSource("iris.arff");
            weka.core.Instances data = new weka.core.Instances(buffReader); //source.getDataSet();
            // setting class attribute if the data format does not provide this information
            // For example, the XRFF format saves the class attribute information as well
            if (data.classIndex() == -1)
                data.setClassIndex(data.numAttributes() - 1);

            weka.classifiers.Classifier cl;
            for (int i = 3; i < data.numInstances(); i++) {
                cl = new weka.classifiers.bayes.NaiveBayes();
                //cl = new weka.classifiers.trees.J48();
                //cl = new weka.classifiers.lazy.IB1();
                //cl = new weka.classifiers.functions.MultilayerPerceptron();
                ((weka.classifiers.functions.MultilayerPerceptron)cl).setHiddenLayers("12");

                weka.core.Instances subset = new weka.core.Instances(data,0,i);
                cl.buildClassifier(subset);

                weka.classifiers.Evaluation eval = new weka.classifiers.Evaluation(subset);
         		eval.crossValidateModel(cl, subset, 3, new java.util.Random(1));
                results = results + eval.pctCorrect(); // For accuracy measurement
                /* For Mathews Correlation Coefficient */
                //double TP = eval.numTruePositives(1);
                //double FP = eval.numFalsePositives(1);
                //double TN = eval.numTrueNegatives(1);
                //double FN = eval.numFalseNegatives(1);
                //double correlationCoeff = ((TP*TN)-(FP*FN))/Math.Sqrt((TP+FP)*(TP+FN)*(TN+FP)*(TN+FN));
                //results = results + correlationCoeff;
                if (i != data.numInstances()-1)
                    results = results + ", ";
                if(i == data.numInstances()-1)
                    Debug.Log("Player: " + playerID + ", Num Maps: " + data.numInstances() + ", AUC: " + eval.areaUnderROC(1));
            }
        } catch (java.lang.Exception ex)
        {
            Debug.LogError(ex.getMessage());
        }
        // Write values to file for a matlab read
        // For accuracy
         	StreamWriter writer = new StreamWriter("DataForMatlab/"+playerID+"_CrossFoldValidations_NeuralNet.txt");

        //StreamWriter writer = new StreamWriter("DataForMatlab/"+playerID+"_CrossFoldCorrCoeff.txt"); // For mathews cc
        writer.WriteLine(results);
        writer.Close();
        Debug.Log(playerID + " has been written to file");
    }
예제 #8
0
        /// <summary>
        /// 進行斷詞,取回斷詞後的結果
        /// </summary>
        /// <param name="txt"></param>
        /// <returns></returns>
        public List<Pair<String, Int32>> SegWords(String txt)
        {
            Reader input = new StringReader(txt);
            Seg seg = GetSeg();
            MMSeg mmSeg = new MMSeg(input, seg);
            Word word = null;

            List<Pair<String, Int32>> result = new List<Pair<String, Int32>>();
            while ((word = mmSeg.next()) != null)
            {
                // 兩種 Offset 方式
                //word.getWordOffset();
                //word.getStartOffset();
                result.Add(new Pair<String, Int32>(word.getString(), word.getStartOffset()));
            }
            return result;
        }
예제 #9
0
        public static List <string> Tokenize(string s)
        {
            var tokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(), "");

            tokenizerFactory.setOptions("untokenizable=noneDelete");
            var reader = new jio.StringReader(s);
            var words  = tokenizerFactory.getTokenizer(reader).tokenize();
            var tokens = new List <string>();

            for (int i = 0; i < words.size(); i++)
            {
                ling.CoreLabel word = (ling.CoreLabel)words.get(i);
                string         w    = word.toString().ToUpper();
                tokens.Add(w);
            }
            return(tokens);
        }
예제 #10
0
파일: Program.cs 프로젝트: lrank/QA_task
        static void Main(string[] args)
        {
            using (TextReader reader = System.IO.File.OpenText("C:\\Data\\msr_paraphrase_train.txt"))
            {
                TextWriter writer1 = System.IO.File.CreateText("C:\\Data\\msr_paraphrase_train_s1.token");
                TextWriter writer2 = System.IO.File.CreateText("C:\\Data\\msr_paraphrase_train_s2.token");

                string[] inputdata = reader.ReadToEnd().Split('\n');

                    foreach (string line in inputdata)
                    {
                        string[] sp = line.Split('\t');

                        //writer.Write(sp[0] + '\t');

                        var tokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(), "");
                        var sent2Reader1 = new java.io.StringReader(sp[3]);
                        java.util.List rawWords1 = tokenizerFactory.getTokenizer(sent2Reader1).tokenize();
                        sent2Reader1.close();
                        var sent2Reader2 = new java.io.StringReader(sp[4]);
                        java.util.List rawWords2 = tokenizerFactory.getTokenizer(sent2Reader2).tokenize();
                        sent2Reader2.close();

                        for (int i = 0; i < rawWords1.size(); ++i)
                        {
                            writer1.Write(rawWords1.get(i) + " ");
                        }
                        writer1.Write('\n');

                        for (int i = 0; i < rawWords2.size(); ++i)
                        {
                            writer2.Write(rawWords2.get(i) + " ");
                        }
                        writer2.Write('\n');
                    }
                writer1.Close();
                writer2.Close();
            }
            System.Console.ReadKey();
        }
예제 #11
0
        static void Main(string[] args)
        {
            using (TextReader reader = System.IO.File.OpenText("C:\\Data\\msr_paraphrase_train.txt"))
            {
                TextWriter writer1 = System.IO.File.CreateText("C:\\Data\\msr_paraphrase_train_s1.token");
                TextWriter writer2 = System.IO.File.CreateText("C:\\Data\\msr_paraphrase_train_s2.token");

                string[] inputdata = reader.ReadToEnd().Split('\n');

                foreach (string line in inputdata)
                {
                    string[] sp = line.Split('\t');

                    //writer.Write(sp[0] + '\t');

                    var            tokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(), "");
                    var            sent2Reader1     = new java.io.StringReader(sp[3]);
                    java.util.List rawWords1        = tokenizerFactory.getTokenizer(sent2Reader1).tokenize();
                    sent2Reader1.close();
                    var            sent2Reader2 = new java.io.StringReader(sp[4]);
                    java.util.List rawWords2    = tokenizerFactory.getTokenizer(sent2Reader2).tokenize();
                    sent2Reader2.close();

                    for (int i = 0; i < rawWords1.size(); ++i)
                    {
                        writer1.Write(rawWords1.get(i) + " ");
                    }
                    writer1.Write('\n');

                    for (int i = 0; i < rawWords2.size(); ++i)
                    {
                        writer2.Write(rawWords2.get(i) + " ");
                    }
                    writer2.Write('\n');
                }
                writer1.Close();
                writer2.Close();
            }
            System.Console.ReadKey();
        }
    /* Use when the player logs in to initially create the classifier with data from server */
    public void InitializeClassifier(String dataString)
    {
        try {
            java.io.StringReader   stringReader = new java.io.StringReader(dataString);
            java.io.BufferedReader buffReader   = new java.io.BufferedReader(stringReader);

            playerData = new weka.core.Instances(buffReader);

            /* State where in each Instance the class attribute is, if its not already specified by the file */
            if (playerData.classIndex() == -1)
            {
                playerData.setClassIndex(playerData.numAttributes() - 1);
            }

            /* NAIVE BAYES */
            //classifier = new weka.classifiers.bayes.NaiveBayes();

            /* NEURAL NET */
            //classifier = new weka.classifiers.functions.MultilayerPerceptron();
            //((weka.classifiers.functions.MultilayerPerceptron)classifier).setHiddenLayers("12");

            /* J48 TREE */
            //classifier = new weka.classifiers.trees.J48();

            /* IB1 NEAREST NEIGHBOUR */
            //classifier = new weka.classifiers.lazy.IB1();

            /* RANDOM FOREST */
            classifier = new weka.classifiers.trees.RandomForest();


            classifier.buildClassifier(playerData);
            Debug.Log("Initialized Classifier");
        }
        catch (java.lang.Exception ex)
        {
            Debug.LogError(ex.getMessage());
        }
    }
예제 #13
0
        //use Stanford.NLP.Net to parse the sentence
        Tree Parse(string sent)
        {
            var tokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(), "");
            var sentReader       = new java.io.StringReader(sent);
            var rawWords         = tokenizerFactory.getTokenizer(sentReader).tokenize();

            sentReader.close();
            var tree = lp.apply(rawWords);

            // Extract dependencies from lexical tree
            var tlp = new PennTreebankLanguagePack();
            var gsf = tlp.grammaticalStructureFactory();
            var gs  = gsf.newGrammaticalStructure(tree);
            var tdl = gs.typedDependenciesCCprocessed();

            // Extract collapsed dependencies from parsed tree
            //var tp = new TreePrint("penn,typedDependenciesCollapsed");
            var tp = new TreePrint("penn");

            tp.printTree(tree);

            return(tree);
        }
예제 #14
0
        public static void DemoAPI(LexicalizedParser lp)
        {
            // This option shows parsing a list of correctly tokenized words
            var sent = new[] { "This", "is", "an", "easy", "sentence", "." };
            var rawWords = Sentence.toCoreLabelList(sent);
            var parse = lp.apply(rawWords);
            parse.pennPrint();

            // This option shows loading and using an explicit tokenizer
            const string Sent2 = "This is another sentence.";
            var tokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(), "");
            var sent2Reader = new StringReader(Sent2);
            var rawWords2 = tokenizerFactory.getTokenizer(sent2Reader).tokenize();
            parse = lp.apply(rawWords2);

            var tlp = new PennTreebankLanguagePack();
            var gsf = tlp.grammaticalStructureFactory();
            var gs = gsf.newGrammaticalStructure(parse);
            var tdl = gs.typedDependenciesCCprocessed();
            System.Console.WriteLine("\n{0}\n", tdl);

            var tp = new TreePrint("penn,typedDependenciesCollapsed");
            tp.printTree(parse);
        }
    /* Use when the player logs in to initially create the classifier with data from server */
    public void InitializeClassifier(String dataString)
    {
        try {
            java.io.StringReader stringReader = new java.io.StringReader(dataString);
            java.io.BufferedReader buffReader = new java.io.BufferedReader(stringReader);

            playerData = new weka.core.Instances(buffReader);

            /* State where in each Instance the class attribute is, if its not already specified by the file */
            if (playerData.classIndex() == -1)
                playerData.setClassIndex(playerData.numAttributes() - 1);

            /* NAIVE BAYES */
            //classifier = new weka.classifiers.bayes.NaiveBayes();

            /* NEURAL NET */
            //classifier = new weka.classifiers.functions.MultilayerPerceptron();
            //((weka.classifiers.functions.MultilayerPerceptron)classifier).setHiddenLayers("12");

            /* J48 TREE */
            //classifier = new weka.classifiers.trees.J48();

            /* IB1 NEAREST NEIGHBOUR */
            //classifier = new weka.classifiers.lazy.IB1();

            /* RANDOM FOREST */
            classifier = new weka.classifiers.trees.RandomForest();

            classifier.buildClassifier(playerData);
            Debug.Log("Initialized Classifier");
        }
        catch (java.lang.Exception ex)
        {
            Debug.LogError(ex.getMessage());
        }
    }
예제 #16
0
    public string Tags(string input)
    {
        // Path to models extracted from `stanford-parser-3.6.0-models.jar`
        var jarRoot         = @"";
        var modelsDirectory = jarRoot;

        var lp = LexicalizedParser.loadModel(modelsDirectory + @"\lexparser\englishPCFG.ser.gz");


        // This option shows loading and using an explicit tokenizer
        var sent2            = input;
        var tokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(), "");
        var sent2Reader      = new java.io.StringReader(sent2);
        var rawWords2        = tokenizerFactory.getTokenizer(sent2Reader).tokenize();

        sent2Reader.close();
        var tree2 = lp.apply(rawWords2);

        // Extract dependencies from lexical tree
        var tlp = new PennTreebankLanguagePack();
        var gsf = tlp.grammaticalStructureFactory();
        var gs  = gsf.newGrammaticalStructure(tree2);
        var tdl = gs.typedDependenciesCCprocessed();


        // Extract collapsed dependencies from parsed tree
        var tp = new TreePrint("penn,typedDependenciesCollapsed");

        UnityEngine.Debug.Log(tdl);
        //tp.printTree(tree2);

        for (int i = 0; i < tdl.size(); i++)
        {
            TypedDependency node = (TypedDependency)tdl.get(i);

            string relation = node.reln().getShortName();

            if (relation.Contains("nsubj"))
            {
                IndexedWord act = node.gov();
                //node.dep().getword()
                action = act.value();

                UnityEngine.Debug.Log("This is the action " + action);

                IndexedWord subject = node.dep();
                subj = subject.value();

                UnityEngine.Debug.Log("This is the subject " + subj);
            }

            if (relation.Contains("dobj"))
            {
                IndexedWord act = node.gov();
                //node.dep().getword()
                action = act.value();
                UnityEngine.Debug.Log("This is the action " + action);

                IndexedWord tar = node.dep();
                target = tar.value();
                UnityEngine.Debug.Log("This is the target " + target);
            }

            if (relation.Contains("nmod"))
            {
                IndexedWord tar_two = node.dep();
                second_target = tar_two.value();
                UnityEngine.Debug.Log("This is the target second " + second_target);
            }
        }

        return(tdl.ToString());
    }
    // Test the classification result of each map that a user played,
    // with the data available as if they were playing through it
    public static void classifyTest(String dataString, String playerID)
    {
        String results = "";

        try {
            java.io.StringReader   stringReader = new java.io.StringReader(dataString);
            java.io.BufferedReader buffReader   = new java.io.BufferedReader(stringReader);

            /* NOTE THAT FOR NAIVE BAYES ALL WEIGHTS CAN BE = 1*/
            //weka.core.converters.ConverterUtils.DataSource source = new weka.core.converters.ConverterUtils.DataSource("iris.arff");
            weka.core.Instances data = new weka.core.Instances(buffReader);             //source.getDataSet();
            // setting class attribute if the data format does not provide this information
            // For example, the XRFF format saves the class attribute information as well
            if (data.classIndex() == -1)
            {
                data.setClassIndex(data.numAttributes() - 1);
            }

            weka.classifiers.Classifier cl;
            for (int i = 3; i < data.numInstances(); i++)
            {
                cl = new weka.classifiers.bayes.NaiveBayes();
                //cl = new weka.classifiers.trees.J48();
                //cl = new weka.classifiers.lazy.IB1();
                //cl = new weka.classifiers.functions.MultilayerPerceptron();
                ((weka.classifiers.functions.MultilayerPerceptron)cl).setHiddenLayers("12");

                weka.core.Instances subset = new weka.core.Instances(data, 0, i);
                cl.buildClassifier(subset);

                weka.classifiers.Evaluation eval = new weka.classifiers.Evaluation(subset);
                eval.crossValidateModel(cl, subset, 3, new java.util.Random(1));
                results = results + eval.pctCorrect();                 // For accuracy measurement
                /* For Mathews Correlation Coefficient */
                //double TP = eval.numTruePositives(1);
                //double FP = eval.numFalsePositives(1);
                //double TN = eval.numTrueNegatives(1);
                //double FN = eval.numFalseNegatives(1);
                //double correlationCoeff = ((TP*TN)-(FP*FN))/Math.Sqrt((TP+FP)*(TP+FN)*(TN+FP)*(TN+FN));
                //results = results + correlationCoeff;
                if (i != data.numInstances() - 1)
                {
                    results = results + ", ";
                }
                if (i == data.numInstances() - 1)
                {
                    Debug.Log("Player: " + playerID + ", Num Maps: " + data.numInstances() + ", AUC: " + eval.areaUnderROC(1));
                }
            }
        } catch (java.lang.Exception ex)
        {
            Debug.LogError(ex.getMessage());
        }
        // Write values to file for a matlab read
        // For accuracy
        StreamWriter writer = new StreamWriter("DataForMatlab/" + playerID + "_CrossFoldValidations_NeuralNet.txt");

        //StreamWriter writer = new StreamWriter("DataForMatlab/"+playerID+"_CrossFoldCorrCoeff.txt"); // For mathews cc
        writer.WriteLine(results);
        writer.Close();
        Debug.Log(playerID + " has been written to file");
    }
    // Test the classification result of each map that a user played,
    // with the data available as if they were playing through it
    public static void classifyTest(String dataString, String playerID)
    {
        try {
            java.io.StringReader stringReader = new java.io.StringReader(dataString);
            java.io.BufferedReader buffReader = new java.io.BufferedReader(stringReader);

            /* NOTE THAT FOR NAIVE BAYES ALL WEIGHTS CAN BE = 1*/
            //weka.core.converters.ConverterUtils.DataSource source = new weka.core.converters.ConverterUtils.DataSource("iris.arff");
            weka.core.Instances thisData = new weka.core.Instances(buffReader); //source.getDataSet();
            if (thisData.classIndex() == -1)
                thisData.setClassIndex(thisData.numAttributes() - 1);

            weka.core.Instances thisUniqueData = new weka.core.Instances(thisData);
            if (thisUniqueData.classIndex() == -1)
                thisUniqueData.setClassIndex(thisUniqueData.numAttributes() - 1);
            thisUniqueData.delete();

            if (allUniqueData == null) {
                allUniqueData = new weka.core.Instances(thisData);
                if (allUniqueData.classIndex() == -1)
                    allUniqueData.setClassIndex(allUniqueData.numAttributes() - 1);
                allUniqueData.delete();
            }

            weka.core.InstanceComparator com = new weka.core.InstanceComparator(false);

            for (int i = 0; i < thisData.numInstances(); i++)
            {
                bool dup = false;
                for (int j = 0; j < allUniqueData.numInstances(); j++)
                {
                    if (com.compare(thisData.instance(i),allUniqueData.instance(j)) == 0)
                    {
                        Debug.Log("Duplicate found!");
                        dup = true;
                        break;
                    }
                }
                if (!dup)
                    allUniqueData.add(thisData.instance(i));
                else
                    dupInstances++;
            }

            for (int i = 0; i < thisData.numInstances(); i++)
            {
                bool dup = false;
                for (int j = 0; j < thisUniqueData.numInstances(); j++)
                {
                    if (com.compare(thisData.instance(i),thisUniqueData.instance(j)) == 0)
                    {
                        Debug.Log("Duplicate found!");
                        dup = true;
                        break;
                    }
                }
                if (!dup)
                    thisUniqueData.add(thisData.instance(i));
                else
                    dupInstancesSamePlayer++;
            }

            //Debug.Log("All Data Instance Count = " + thisData.numInstances());
            //Debug.Log("Unique Data Instance Count = " + thisUniqueData.numInstances());
            //Debug.Log("Done!");

        } catch (java.lang.Exception ex)
        {
            Debug.LogError(ex.getMessage());
        }
    }
예제 #19
0
        public void SentenceParser(string sent2)
        {
            var modelsDirectory = jarRoot + @"edu\stanford\nlp\models";

            // Loading english PCFG parser from file
            var lp = LexicalizedParser.loadModel(modelsDirectory + @"\lexparser\englishPCFG.ser.gz");

            // This option shows loading and using an explicit tokenizer
            sent2.ToLower();
            var tokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(), "");
            var sent2Reader      = new java.io.StringReader(sent2);
            var rawWords2        = tokenizerFactory.getTokenizer(sent2Reader).tokenize();

            sent2Reader.close();
            var tree2 = lp.apply(rawWords2);

            // Extract dependencies from lexical tree

            var tlp = new PennTreebankLanguagePack();
            var gsf = tlp.grammaticalStructureFactory();
            var gs  = gsf.newGrammaticalStructure(tree2);
            var tdl = gs.typedDependenciesCCprocessed();
            //Console.WriteLine("\n{0}\n", tdl);


            // Extract collapsed dependencies from parsed tree

            var tp = new TreePrint("penn,typedDependenciesCollapsed");

            tp.printTree(tree2);



            ArrayList dep = gs.typedDependenciesCollapsed() as ArrayList;

            foreach (TypedDependency td in dep)
            {
                for (int i = 0; i < keyword.Length; i++)
                {
                    if (td.dep().originalText().Equals(keyword[i]))
                    {
                        keyFlag = true;
                        key     = keyword[i];
                        break;
                    }
                }
                if (keyFlag)
                {
                    break;
                }
            }

            keyFlag = false;


            switch (key)
            {
            case "circle":

                Circle circle = new Circle();
                shape     = circle.GetProps();
                propsUsed = Associator(shape, dep);

                break;

            case "rectangle":

                Rectangle rect = new Rectangle();
                shape     = rect.GetProps();
                propsUsed = Associator(shape, dep);

                break;

            case "triangle":

                Triangle tri = new Triangle();
                shape     = tri.GetProps();
                propsUsed = Associator(shape, dep);

                break;

            case "square":

                Square square = new Square();
                shape     = square.GetProps();
                propsUsed = Associator(shape, dep);

                break;

            default:

                break;
            } //End of Switch

            dependency = tdl.ToString();
        } //End of SentenceParser
예제 #20
0
    // Test the classification result of each map that a user played,
    // with the data available as if they were playing through it
    public static void classifyTest(String dataString, String playerID)
    {
        try {
            java.io.StringReader   stringReader = new java.io.StringReader(dataString);
            java.io.BufferedReader buffReader   = new java.io.BufferedReader(stringReader);

            /* NOTE THAT FOR NAIVE BAYES ALL WEIGHTS CAN BE = 1*/
            //weka.core.converters.ConverterUtils.DataSource source = new weka.core.converters.ConverterUtils.DataSource("iris.arff");
            weka.core.Instances thisData = new weka.core.Instances(buffReader);             //source.getDataSet();
            if (thisData.classIndex() == -1)
            {
                thisData.setClassIndex(thisData.numAttributes() - 1);
            }

            weka.core.Instances thisUniqueData = new weka.core.Instances(thisData);
            if (thisUniqueData.classIndex() == -1)
            {
                thisUniqueData.setClassIndex(thisUniqueData.numAttributes() - 1);
            }
            thisUniqueData.delete();

            if (allUniqueData == null)
            {
                allUniqueData = new weka.core.Instances(thisData);
                if (allUniqueData.classIndex() == -1)
                {
                    allUniqueData.setClassIndex(allUniqueData.numAttributes() - 1);
                }
                allUniqueData.delete();
            }

            weka.core.InstanceComparator com = new weka.core.InstanceComparator(false);

            for (int i = 0; i < thisData.numInstances(); i++)
            {
                bool dup = false;
                for (int j = 0; j < allUniqueData.numInstances(); j++)
                {
                    if (com.compare(thisData.instance(i), allUniqueData.instance(j)) == 0)
                    {
                        Debug.Log("Duplicate found!");
                        dup = true;
                        break;
                    }
                }
                if (!dup)
                {
                    allUniqueData.add(thisData.instance(i));
                }
                else
                {
                    dupInstances++;
                }
            }

            for (int i = 0; i < thisData.numInstances(); i++)
            {
                bool dup = false;
                for (int j = 0; j < thisUniqueData.numInstances(); j++)
                {
                    if (com.compare(thisData.instance(i), thisUniqueData.instance(j)) == 0)
                    {
                        Debug.Log("Duplicate found!");
                        dup = true;
                        break;
                    }
                }
                if (!dup)
                {
                    thisUniqueData.add(thisData.instance(i));
                }
                else
                {
                    dupInstancesSamePlayer++;
                }
            }


            //Debug.Log("All Data Instance Count = " + thisData.numInstances());
            //Debug.Log("Unique Data Instance Count = " + thisUniqueData.numInstances());
            //Debug.Log("Done!");
        } catch (java.lang.Exception ex)
        {
            Debug.LogError(ex.getMessage());
        }
    }