/// <summary>Applies a feature count threshold to the RVFDataset.</summary>
        /// <remarks>
        /// Applies a feature count threshold to the RVFDataset. All features that
        /// occur fewer than <i>k</i> times are expunged.
        /// </remarks>
        public override void ApplyFeatureCountThreshold(int k)
        {
            float[]       counts          = GetFeatureCounts();
            HashIndex <F> newFeatureIndex = new HashIndex <F>();

            int[] featMap = new int[featureIndex.Size()];
            for (int i = 0; i < featMap.Length; i++)
            {
                F feat = featureIndex.Get(i);
                if (counts[i] >= k)
                {
                    int newIndex = newFeatureIndex.Count;
                    newFeatureIndex.Add(feat);
                    featMap[i] = newIndex;
                }
                else
                {
                    featMap[i] = -1;
                }
            }
            // featureIndex.remove(feat);
            featureIndex = newFeatureIndex;
            // counts = null; // This is unnecessary; JVM can clean it up
            for (int i_1 = 0; i_1 < size; i_1++)
            {
                IList <int>    featList  = new List <int>(data[i_1].Length);
                IList <double> valueList = new List <double>(values[i_1].Length);
                for (int j = 0; j < data[i_1].Length; j++)
                {
                    if (featMap[data[i_1][j]] >= 0)
                    {
                        featList.Add(featMap[data[i_1][j]]);
                        valueList.Add(values[i_1][j]);
                    }
                }
                data[i_1]   = new int[featList.Count];
                values[i_1] = new double[valueList.Count];
                for (int j_1 = 0; j_1 < data[i_1].Length; j_1++)
                {
                    data[i_1][j_1]   = featList[j_1];
                    values[i_1][j_1] = valueList[j_1];
                }
            }
        }
예제 #2
0
        public static void ReadHashlistAndLoad(string file, PackageFileEntry be)
        {
            using FileStream fs   = new FileStream(file, FileMode.Open, FileAccess.Read);
            using BinaryReader br = new BinaryReader(fs);

            byte[]        data;
            StringBuilder sb = new StringBuilder();

            string[]         idstring_data;
            HashSet <string> new_paths = new HashSet <string>();

            fs.Position = be.Address;
            if (be.Length == -1)
            {
                data = br.ReadBytes((int)(fs.Length - fs.Position));
            }
            else
            {
                data = br.ReadBytes((int)be.Length);
            }

            foreach (byte read in data)
            {
                sb.Append((char)read);
            }

            idstring_data = sb.ToString().Split('\0');
            sb.Clear();

            foreach (string idstring in idstring_data)
            {
                new_paths.Add(idstring);
            }

            new_paths.Add("idstring_lookup");
            new_paths.Add("existing_banks");
            new_paths.Add("engine-package");

            HashIndex.Load(ref new_paths);

            new_paths.Clear();
            br.Close();
        }
예제 #3
0
        public void Read(FileReader reader)
        {
            string Signature = reader.ReadString(8, Encoding.ASCII);

            if (Signature != "GFLXPACK")
            {
                throw new Exception($"Invalid signature {Signature}! Expected GFLXPACK.");
            }

            version = reader.ReadInt32();
            uint padding   = reader.ReadUInt32();
            uint FileCount = reader.ReadUInt32();

            FolderCount = reader.ReadInt32();
            ulong FileInfoOffset       = reader.ReadUInt64();
            ulong hashArrayOffset      = reader.ReadUInt64();
            ulong hashArrayIndexOffset = reader.ReadUInt64();

            reader.Seek((long)hashArrayOffset, SeekOrigin.Begin);
            for (int i = 0; i < FileCount; i++)
            {
                ulong hash = reader.ReadUInt64();
                hashes.Add(hash);
            }
            reader.Seek((long)hashArrayIndexOffset, SeekOrigin.Begin);
            for (int i = 0; i < FileCount; i++)
            {
                HashIndex hashindex = new HashIndex();
                hashindex.Read(reader);
                hashIndices.Add(hashindex);
            }

            reader.Seek((long)FileInfoOffset, SeekOrigin.Begin);
            for (int i = 0; i < FileCount; i++)
            {
                FileEntry fileEntry = new FileEntry();
                fileEntry.Read(reader);
                fileEntry.Text = hashes[i].ToString();
                Nodes.Add(fileEntry);
                files.Add(fileEntry);
            }
        }
예제 #4
0
    public void execute(PackageBrowser browser)
    {
        this.ExtractedPaths = new HashSet <Idstring>();

        this.error_output = new StreamWriter("./heist_extractor.log");

        this._browser = browser;
        System.Diagnostics.Stopwatch clock = new System.Diagnostics.Stopwatch();
        clock.Start();
        this.error_output.Write("Heist Extractor executed" + "\n");
        this.error_output.Flush();
        Idstring ids     = HashIndex.Get(this.heist_world);
        Idstring ids_ext = HashIndex.Get("world");
        var      tids    = new Tuple <Idstring, Idstring, Idstring>(ids, new Idstring(0), ids_ext);

        if (browser.RawFiles.ContainsKey(tids))
        {
            this.ProcessWorld(browser.RawFiles[tids]);
        }
        else
        {
            Console.WriteLine("World File does not exist");
        }

        //this.ProcessFolder(browser.Root);
        //Path.Combine(Definitions.HashDir, hashlist_tag)
        using (StreamWriter str = new StreamWriter(new FileStream(Path.Combine(this.OutputPath, "add.xml"), FileMode.Create, FileAccess.Write)))
        {
            str.Write("<table>\n");
            foreach (Idstring path in this.ExtractedPaths)
            {
                string[] split = path.ToString().Split('.');
                str.Write(String.Format("\t<{0} path=\"{1}\" force=\"true\"/>\n", split[1], split[0]));
            }

            str.Write("</table>\n");
        }

        clock.Stop();
        this.error_output.Write("Scrape operation took {0} seconds" + "\n", clock.Elapsed.TotalSeconds.ToString());
        this.error_output.Close();
    }
예제 #5
0
        public App()
        {
#if !DEBUG
            Dispatcher.UnhandledException += OnException;
            if (File.Exists("debug"))
#endif
            AllocConsole();


            Console.WriteLine("Loading local hashlist");
            if (File.Exists("Data/hashlist"))
            {
                HashIndex.LoadParallel("Data/hashlist");
            }
            else
            {
                Console.WriteLine("Local hashlist is missing!");
            }

            LoadConverters();
        }
예제 #6
0
        protected internal virtual void InitTagBins()
        {
            IIndex <string> tagBinIndex = new HashIndex <string>();

            tagBin = new int[tagIndex.Size()];
            for (int t = 0; t < tagBin.Length; t++)
            {
                string tagStr = tagIndex.Get(t);
                string binStr;
                if (tagProjection == null)
                {
                    binStr = tagStr;
                }
                else
                {
                    binStr = tagProjection.Project(tagStr);
                }
                tagBin[t] = tagBinIndex.AddToIndex(binStr);
            }
            numTagBins = tagBinIndex.Size();
        }
예제 #7
0
        public virtual LexicalizedParser GetParserDataFromTreebank(Treebank trainTreebank)
        {
            log.Info("Binarizing training trees...");
            IList <Tree> binaryTrainTrees = GetAnnotatedBinaryTreebankFromTreebank(trainTreebank);

            Timing.Tick("done.");
            IIndex <string> stateIndex = new HashIndex <string>();

            log.Info("Extracting PCFG...");
            IExtractor <Pair <UnaryGrammar, BinaryGrammar> > bgExtractor = new BinaryGrammarExtractor(op, stateIndex);
            Pair <UnaryGrammar, BinaryGrammar> bgug = bgExtractor.Extract(binaryTrainTrees);
            BinaryGrammar bg = bgug.second;

            bg.SplitRules();
            UnaryGrammar ug = bgug.first;

            ug.PurgeRules();
            Timing.Tick("done.");
            log.Info("Extracting Lexicon...");
            IIndex <string> wordIndex = new HashIndex <string>();
            IIndex <string> tagIndex  = new HashIndex <string>();
            ILexicon        lex       = op.tlpParams.Lex(op, wordIndex, tagIndex);

            lex.InitializeTraining(binaryTrainTrees.Count);
            lex.Train(binaryTrainTrees);
            lex.FinishTraining();
            Timing.Tick("done.");
            IExtractor <IDependencyGrammar> dgExtractor = op.tlpParams.DependencyGrammarExtractor(op, wordIndex, tagIndex);
            IDependencyGrammar dg = null;

            if (op.doDep)
            {
                log.Info("Extracting Dependencies...");
                dg = dgExtractor.Extract(binaryTrainTrees);
                dg.SetLexicon(lex);
                Timing.Tick("done.");
            }
            log.Info("Done extracting grammars and lexicon.");
            return(new LexicalizedParser(lex, bg, ug, dg, stateIndex, wordIndex, tagIndex, op));
        }
예제 #8
0
        /// <summary>Retains the given features in the Dataset.</summary>
        /// <remarks>
        /// Retains the given features in the Dataset.  All features that
        /// do not occur in features are expunged.
        /// </remarks>
        public virtual void RetainFeatures(ICollection <F> features)
        {
            //float[] counts = getFeatureCounts();
            IIndex <F> newFeatureIndex = new HashIndex <F>();

            int[] featMap = new int[featureIndex.Size()];
            for (int i = 0; i < featMap.Length; i++)
            {
                F feat = featureIndex.Get(i);
                if (features.Contains(feat))
                {
                    int newIndex = newFeatureIndex.Size();
                    newFeatureIndex.Add(feat);
                    featMap[i] = newIndex;
                }
                else
                {
                    featMap[i] = -1;
                }
            }
            // featureIndex.remove(feat);
            featureIndex = newFeatureIndex;
            // counts = null; // This is unnecessary; JVM can clean it up
            for (int i_1 = 0; i_1 < size; i_1++)
            {
                IList <int> featList = new List <int>(data[i_1].Length);
                for (int j = 0; j < data[i_1].Length; j++)
                {
                    if (featMap[data[i_1][j]] >= 0)
                    {
                        featList.Add(featMap[data[i_1][j]]);
                    }
                }
                data[i_1] = new int[featList.Count];
                for (int j_1 = 0; j_1 < data[i_1].Length; j_1++)
                {
                    data[i_1][j_1] = featList[j_1];
                }
            }
        }
        private void ModDetails_Load(object sender, EventArgs e)
        {
            this.Text = "Mod Details - " + this._mod.Name;

            this.ModNameText.Text     = this._mod.Name;
            this.AuthorText.Text      = this._mod.Author;
            this.DescriptionText.Text = this._mod.Description;

            foreach (BundleRewriteItem item in this._mod.ItemQueue)
            {
                BundleRewriteItem newBri = new BundleRewriteItem();

                newBri.ReplacementFile = Path.GetFileName(this._mod.file) + "/" + item.ReplacementFile;

                String sourcefile = "";
                String path       = HashIndex.GetUnhashed(item.BundlePath);
                String extension  = HashIndex.GetUnhashed(item.BundleExtension);

                if (!string.IsNullOrEmpty(path) && !string.IsNullOrEmpty(extension))
                {
                    if (item.IsLanguageSpecific)
                    {
                        sourcefile = path + "." + item.BundleLanguage + "." + extension;
                    }
                    else
                    {
                        sourcefile = path + "." + extension;
                    }


                    newBri.SourceFile = sourcefile;

                    _items.Add(newBri);
                }
            }

            this.ReplacementFilesGridView.DataSource = _items;
            this.ReplacementFilesGridView.Update();
        }
예제 #10
0
    /*private void ProcessFolder(IParent folder)
     * {
     *  foreach (IChild child in folder.Children.Values)
     *  {
     *      if (child is FileEntry)
     *          this.ProcessFile(child as FileEntry);
     *      else if (child is IParent)
     *          this.ProcessFolder(child as IParent);
     *  }
     * }*/

    private void WriteFile(FileEntry entry, byte[] byt = null)
    {
        Idstring ids = HashIndex.Get(entry.Path);

        if (entry.BundleEntries.Count == 0 || this.ExtractedPaths.Contains(ids))
        {
            return;
        }

        string path   = Path.Combine(this.OutputPath, entry.Path);
        string folder = Path.GetDirectoryName(path);

        if (!Directory.Exists(folder))
        {
            Directory.CreateDirectory(folder);
        }

        byte[] bytes = byt ?? entry.FileBytes() ?? new byte[0];

        File.WriteAllBytes(path, bytes);
        this.ExtractedPaths.Add(ids);
    }
예제 #11
0
        internal static RVFDatum <string, int> ReadDatum(string[] values, int classColumn, ICollection <int> skip, IDictionary <int, IIndex <string> > indices)
        {
            ClassicCounter <int>   c = new ClassicCounter <int>();
            RVFDatum <string, int> d = new RVFDatum <string, int>(c);
            int attrNo = 0;

            for (int index = 0; index < values.Length; index++)
            {
                if (index == classColumn)
                {
                    d.SetLabel(values[index]);
                    continue;
                }
                if (skip.Contains(int.Parse(index)))
                {
                    continue;
                }
                int             featKey = int.Parse(attrNo);
                IIndex <string> ind     = indices[featKey];
                if (ind == null)
                {
                    ind = new HashIndex <string>();
                    indices[featKey] = ind;
                }
                // MG: condition on isLocked is useless, since add(E) contains such a condition:
                //if (!ind.isLocked()) {
                ind.Add(values[index]);
                //}
                int valInd = ind.IndexOf(values[index]);
                if (valInd == -1)
                {
                    valInd = 0;
                    logger.Info("unknown attribute value " + values[index] + " of attribute " + attrNo);
                }
                c.IncrementCount(featKey, valInd);
                attrNo++;
            }
            return(d);
        }
        /// <summary>Removes all features from the dataset that are not in featureSet.</summary>
        /// <param name="featureSet"/>
        public virtual void SelectFeaturesFromSet(ICollection <F> featureSet)
        {
            HashIndex <F> newFeatureIndex = new HashIndex <F>();

            int[] featMap = new int[featureIndex.Size()];
            Arrays.Fill(featMap, -1);
            foreach (F feature in featureSet)
            {
                int oldID = featureIndex.IndexOf(feature);
                if (oldID >= 0)
                {
                    // it's a valid feature in the index
                    int newID = newFeatureIndex.AddToIndex(feature);
                    featMap[oldID] = newID;
                }
            }
            featureIndex = newFeatureIndex;
            for (int i = 0; i < size; i++)
            {
                IList <int>    featList  = new List <int>(data[i].Length);
                IList <double> valueList = new List <double>(values[i].Length);
                for (int j = 0; j < data[i].Length; j++)
                {
                    if (featMap[data[i][j]] >= 0)
                    {
                        featList.Add(featMap[data[i][j]]);
                        valueList.Add(values[i][j]);
                    }
                }
                data[i]   = new int[featList.Count];
                values[i] = new double[valueList.Count];
                for (int j_1 = 0; j_1 < data[i].Length; j_1++)
                {
                    data[i][j_1]   = featList[j_1];
                    values[i][j_1] = valueList[j_1];
                }
            }
        }
        /// <summary>Generic method to select features based on the feature scores vector provided as an argument.</summary>
        /// <param name="numFeatures">number of features to be selected.</param>
        /// <param name="scores">a vector of size total number of features in the data.</param>
        public virtual void SelectFeatures(int numFeatures, double[] scores)
        {
            IList <ScoredObject <F> > scoredFeatures = new List <ScoredObject <F> >();

            for (int i = 0; i < scores.Length; i++)
            {
                scoredFeatures.Add(new ScoredObject <F>(featureIndex.Get(i), scores[i]));
            }
            scoredFeatures.Sort(ScoredComparator.DescendingComparator);
            IIndex <F> newFeatureIndex = new HashIndex <F>();

            for (int i_1 = 0; i_1 < scoredFeatures.Count && i_1 < numFeatures; i_1++)
            {
                newFeatureIndex.Add(scoredFeatures[i_1].Object());
            }
            //logger.info(scoredFeatures.get(i));
            for (int i_2 = 0; i_2 < size; i_2++)
            {
                int[] newData  = new int[data[i_2].Length];
                int   curIndex = 0;
                for (int j = 0; j < data[i_2].Length; j++)
                {
                    int index;
                    if ((index = newFeatureIndex.IndexOf(featureIndex.Get(data[i_2][j]))) != -1)
                    {
                        newData[curIndex++] = index;
                    }
                }
                int[] newDataTrimmed = new int[curIndex];
                lock (typeof(Runtime))
                {
                    System.Array.Copy(newData, 0, newDataTrimmed, 0, curIndex);
                }
                data[i_2] = newDataTrimmed;
            }
            featureIndex = newFeatureIndex;
        }
예제 #14
0
    private void ProcessWorld(FileEntry file)
    {
        foreach (KeyValuePair <string, IChild> child in file.Parent.Children)
        {
            if (child.Value is FileEntry)
            {
                this.WriteFile(child.Value as FileEntry);
            }
        }

        this.WriteFile(file);
        this.ProcessScriptData(file, new List <XMLTagLookup> {
            new XMLTagLookup {
                node_name = "environment_values", value = new[] { "environment" }, Converter = (hash) => { return(hash + ".environment"); }
            }
        });

        string   continents_file = Path.Combine(Path.GetDirectoryName(file.Path), "continents").Replace("\\", "/");
        Idstring ids             = HashIndex.Get(continents_file);
        var      t_ids           = new Tuple <Idstring, Idstring, Idstring>(ids, new Idstring(0), HashIndex.Get("continents"));

        if (this._browser.RawFiles.ContainsKey(t_ids))
        {
            FileEntry c_file = this._browser.RawFiles[t_ids];
            this.WriteFile(c_file);

            string xml = ScriptActions.GetConverter("scriptdata", "script_cxml").export(c_file.FileStream(), true);

            XmlDocument doc = new XmlDocument();

            try
            {
                doc.LoadXml(xml);
                foreach (XmlNode child in doc.ChildNodes[0])
                {
                    this.ProcessFile(Path.Combine(Path.GetDirectoryName(file.Path), string.Format("{0}/{0}.continent", child.Attributes.GetNamedItem("name").Value)).Replace("\\", "/"));
                }
            }
            catch (Exception exc)
            {
                this.error_output.Write("Exception occured on file: {0}\n", c_file.Path);
                if (xml != null)
                {
                    this.error_output.Write(xml + "\n");
                }
                this.error_output.Write(exc.Message + "\n");
                this.error_output.Write(exc.StackTrace + "\n");
                this.error_output.Flush();
                return;
            }
        }
        else
        {
            this.error_output.Write("Continents file {0} does not exist!\n", continents_file);
        }

        string   mission_file = Path.Combine(Path.GetDirectoryName(file.Path), "mission").Replace("\\", "/");
        Idstring m_ids        = HashIndex.Get(mission_file);
        var      t_m_ids      = new Tuple <Idstring, Idstring, Idstring>(m_ids, new Idstring(0), HashIndex.Get("mission"));

        if (this._browser.RawFiles.ContainsKey(t_m_ids))
        {
            FileEntry m_file = this._browser.RawFiles[t_m_ids];
            this.WriteFile(m_file);

            string xml = ScriptActions.GetConverter("scriptdata", "script_cxml").export(m_file.FileStream(), true);

            XmlDocument doc = new XmlDocument();

            try
            {
                doc.LoadXml(xml);
                foreach (XmlNode child in doc.ChildNodes[0])
                {
                    this.ProcessFile(Path.Combine(Path.GetDirectoryName(file.Path), string.Format("{0}.mission", child.Attributes.GetNamedItem("file").Value)).Replace("\\", "/"));
                }
            }
            catch (Exception exc)
            {
                this.error_output.Write("Exception occured on file: {0}\n", m_file.Path);
                if (xml != null)
                {
                    this.error_output.Write(xml + "\n");
                }
                this.error_output.Write(exc.Message + "\n");
                this.error_output.Write(exc.StackTrace + "\n");
                this.error_output.Flush();
                return;
            }
        }
        else
        {
            this.error_output.Write("Mission file {0} does not exist!\n", continents_file);
        }

        this.error_output.Flush();
    }
예제 #15
0
 public static void GenerateHashlist(string workingPath, string file, PackageFileEntry be)
 {
     ReadHashlistAndLoad(file, be);
     HashIndex.GenerateHashList(Path.Combine(workingPath, HashlistFile));
 }
예제 #16
0
        public static void Main(string[] args)
        {
            if (args.Length < minArgs)
            {
                System.Console.Out.WriteLine(usage.ToString());
                System.Environment.Exit(-1);
            }
            ITreebankLangParserParams tlpp = new EnglishTreebankParserParams();
            DiskTreebank tb       = null;
            string       encoding = "UTF-8";
            Language     lang     = Language.English;

            for (int i = 0; i < args.Length; i++)
            {
                if (args[i].StartsWith("-"))
                {
                    switch (args[i])
                    {
                    case "-l":
                    {
                        lang = Language.ValueOf(args[++i].Trim());
                        tlpp = lang.@params;
                        break;
                    }

                    case "-e":
                    {
                        encoding = args[++i];
                        break;
                    }

                    default:
                    {
                        System.Console.Out.WriteLine(usage.ToString());
                        System.Environment.Exit(-1);
                        break;
                    }
                    }
                }
                else
                {
                    if (tb == null)
                    {
                        if (tlpp == null)
                        {
                            System.Console.Out.WriteLine(usage.ToString());
                            System.Environment.Exit(-1);
                        }
                        else
                        {
                            tlpp.SetInputEncoding(encoding);
                            tlpp.SetOutputEncoding(encoding);
                            tb = tlpp.DiskTreebank();
                        }
                    }
                    tb.LoadPath(args[i]);
                }
            }
            PrintWriter pw = tlpp.Pw();
            Options     op = new Options();

            Options.LexOptions lexOptions = op.lexOptions;
            if (lang == Language.French)
            {
                lexOptions.useUnknownWordSignatures = 1;
                lexOptions.smartMutation            = false;
                lexOptions.unknownSuffixSize        = 2;
                lexOptions.unknownPrefixSize        = 1;
            }
            else
            {
                if (lang == Language.Arabic)
                {
                    lexOptions.smartMutation            = false;
                    lexOptions.useUnknownWordSignatures = 9;
                    lexOptions.unknownPrefixSize        = 1;
                    lexOptions.unknownSuffixSize        = 1;
                }
            }
            IIndex <string>   wordIndex    = new HashIndex <string>();
            IIndex <string>   tagIndex     = new HashIndex <string>();
            ILexicon          lex          = tlpp.Lex(op, wordIndex, tagIndex);
            int               computeAfter = (int)(0.50 * tb.Count);
            ICounter <string> vocab        = new ClassicCounter <string>();
            ICounter <string> unkCounter   = new ClassicCounter <string>();
            int               treeId       = 0;

            foreach (Tree t in tb)
            {
                IList <ILabel> yield = t.Yield();
                int            posId = 0;
                foreach (ILabel word in yield)
                {
                    vocab.IncrementCount(word.Value());
                    if (treeId > computeAfter && vocab.GetCount(word.Value()) < 2.0)
                    {
                        //          if(lex.getUnknownWordModel().getSignature(word.value(), posId++).equals("UNK"))
                        //            pw.println(word.value());
                        unkCounter.IncrementCount(lex.GetUnknownWordModel().GetSignature(word.Value(), posId++));
                    }
                }
                treeId++;
            }
            IList <string> biggestKeys = new List <string>(unkCounter.KeySet());

            biggestKeys.Sort(Counters.ToComparatorDescending(unkCounter));
            foreach (string wordType in biggestKeys)
            {
                pw.Printf("%s\t%d%n", wordType, (int)unkCounter.GetCount(wordType));
            }
            pw.Close();
            pw.Close();
        }
예제 #17
0
        /// <exception cref="System.Exception"/>
        protected internal override void LoadTextClassifier(BufferedReader br)
        {
            base.LoadTextClassifier(br);
            string line = br.ReadLine();

            string[] toks = line.Split("\\t");
            if (!toks[0].Equals("nodeFeatureIndicesMap.size()="))
            {
                throw new Exception("format error in nodeFeatureIndicesMap");
            }
            int nodeFeatureIndicesMapSize = System.Convert.ToInt32(toks[1]);

            nodeFeatureIndicesMap = new HashIndex <int>();
            int count = 0;

            while (count < nodeFeatureIndicesMapSize)
            {
                line = br.ReadLine();
                toks = line.Split("\\t");
                int idx = System.Convert.ToInt32(toks[0]);
                if (count != idx)
                {
                    throw new Exception("format error");
                }
                nodeFeatureIndicesMap.Add(System.Convert.ToInt32(toks[1]));
                count++;
            }
            line = br.ReadLine();
            toks = line.Split("\\t");
            if (!toks[0].Equals("edgeFeatureIndicesMap.size()="))
            {
                throw new Exception("format error");
            }
            int edgeFeatureIndicesMapSize = System.Convert.ToInt32(toks[1]);

            edgeFeatureIndicesMap = new HashIndex <int>();
            count = 0;
            while (count < edgeFeatureIndicesMapSize)
            {
                line = br.ReadLine();
                toks = line.Split("\\t");
                int idx = System.Convert.ToInt32(toks[0]);
                if (count != idx)
                {
                    throw new Exception("format error");
                }
                edgeFeatureIndicesMap.Add(System.Convert.ToInt32(toks[1]));
                count++;
            }
            int weightsLength = -1;

            if (flags.secondOrderNonLinear)
            {
                line = br.ReadLine();
                toks = line.Split("\\t");
                if (!toks[0].Equals("inputLayerWeights4Edge.length="))
                {
                    throw new Exception("format error");
                }
                weightsLength          = System.Convert.ToInt32(toks[1]);
                inputLayerWeights4Edge = new double[weightsLength][];
                count = 0;
                while (count < weightsLength)
                {
                    line = br.ReadLine();
                    toks = line.Split("\\t");
                    int weights2Length = System.Convert.ToInt32(toks[0]);
                    inputLayerWeights4Edge[count] = new double[weights2Length];
                    string[] weightsValue = toks[1].Split(" ");
                    if (weights2Length != weightsValue.Length)
                    {
                        throw new Exception("weights format error");
                    }
                    for (int i2 = 0; i2 < weights2Length; i2++)
                    {
                        inputLayerWeights4Edge[count][i2] = double.ParseDouble(weightsValue[i2]);
                    }
                    count++;
                }
                line = br.ReadLine();
                toks = line.Split("\\t");
                if (!toks[0].Equals("outputLayerWeights4Edge.length="))
                {
                    throw new Exception("format error");
                }
                weightsLength           = System.Convert.ToInt32(toks[1]);
                outputLayerWeights4Edge = new double[weightsLength][];
                count = 0;
                while (count < weightsLength)
                {
                    line = br.ReadLine();
                    toks = line.Split("\\t");
                    int weights2Length = System.Convert.ToInt32(toks[0]);
                    outputLayerWeights4Edge[count] = new double[weights2Length];
                    string[] weightsValue = toks[1].Split(" ");
                    if (weights2Length != weightsValue.Length)
                    {
                        throw new Exception("weights format error");
                    }
                    for (int i2 = 0; i2 < weights2Length; i2++)
                    {
                        outputLayerWeights4Edge[count][i2] = double.ParseDouble(weightsValue[i2]);
                    }
                    count++;
                }
            }
            else
            {
                line = br.ReadLine();
                toks = line.Split("\\t");
                if (!toks[0].Equals("linearWeights.length="))
                {
                    throw new Exception("format error");
                }
                weightsLength = System.Convert.ToInt32(toks[1]);
                linearWeights = new double[weightsLength][];
                count         = 0;
                while (count < weightsLength)
                {
                    line = br.ReadLine();
                    toks = line.Split("\\t");
                    int weights2Length = System.Convert.ToInt32(toks[0]);
                    linearWeights[count] = new double[weights2Length];
                    string[] weightsValue = toks[1].Split(" ");
                    if (weights2Length != weightsValue.Length)
                    {
                        throw new Exception("weights format error");
                    }
                    for (int i2 = 0; i2 < weights2Length; i2++)
                    {
                        linearWeights[count][i2] = double.ParseDouble(weightsValue[i2]);
                    }
                    count++;
                }
            }
            line = br.ReadLine();
            toks = line.Split("\\t");
            if (!toks[0].Equals("inputLayerWeights.length="))
            {
                throw new Exception("format error");
            }
            weightsLength     = System.Convert.ToInt32(toks[1]);
            inputLayerWeights = new double[weightsLength][];
            count             = 0;
            while (count < weightsLength)
            {
                line = br.ReadLine();
                toks = line.Split("\\t");
                int weights2Length = System.Convert.ToInt32(toks[0]);
                inputLayerWeights[count] = new double[weights2Length];
                string[] weightsValue = toks[1].Split(" ");
                if (weights2Length != weightsValue.Length)
                {
                    throw new Exception("weights format error");
                }
                for (int i2 = 0; i2 < weights2Length; i2++)
                {
                    inputLayerWeights[count][i2] = double.ParseDouble(weightsValue[i2]);
                }
                count++;
            }
            line = br.ReadLine();
            toks = line.Split("\\t");
            if (!toks[0].Equals("outputLayerWeights.length="))
            {
                throw new Exception("format error");
            }
            weightsLength      = System.Convert.ToInt32(toks[1]);
            outputLayerWeights = new double[weightsLength][];
            count = 0;
            while (count < weightsLength)
            {
                line = br.ReadLine();
                toks = line.Split("\\t");
                int weights2Length = System.Convert.ToInt32(toks[0]);
                outputLayerWeights[count] = new double[weights2Length];
                string[] weightsValue = toks[1].Split(" ");
                if (weights2Length != weightsValue.Length)
                {
                    throw new Exception("weights format error");
                }
                for (int i2 = 0; i2 < weights2Length; i2++)
                {
                    outputLayerWeights[count][i2] = double.ParseDouble(weightsValue[i2]);
                }
                count++;
            }
        }
        /// <exception cref="System.IO.IOException"/>
        public static void Main(string[] args)
        {
            IDictionary <string, int> flagsToNumArgs = Generics.NewHashMap();

            flagsToNumArgs["-parser"]        = int.Parse(3);
            flagsToNumArgs["-lex"]           = int.Parse(3);
            flagsToNumArgs["-test"]          = int.Parse(2);
            flagsToNumArgs["-out"]           = int.Parse(1);
            flagsToNumArgs["-lengthPenalty"] = int.Parse(1);
            flagsToNumArgs["-penaltyType"]   = int.Parse(1);
            flagsToNumArgs["-maxLength"]     = int.Parse(1);
            flagsToNumArgs["-stats"]         = int.Parse(2);
            IDictionary <string, string[]> argMap = StringUtils.ArgsToMap(args, flagsToNumArgs);
            bool        eval = argMap.Contains("-eval");
            PrintWriter pw   = null;

            if (argMap.Contains("-out"))
            {
                pw = new PrintWriter(new OutputStreamWriter(new FileOutputStream((argMap["-out"])[0]), "GB18030"), true);
            }
            log.Info("ChineseCharacterBasedLexicon called with args:");
            ChineseTreebankParserParams ctpp = new ChineseTreebankParserParams();

            for (int i = 0; i < args.Length; i++)
            {
                ctpp.SetOptionFlag(args, i);
                log.Info(" " + args[i]);
            }
            log.Info();
            Options op = new Options(ctpp);

            if (argMap.Contains("-stats"))
            {
                string[]       statArgs         = (argMap["-stats"]);
                MemoryTreebank rawTrainTreebank = op.tlpParams.MemoryTreebank();
                IFileFilter    trainFilt        = new NumberRangesFileFilter(statArgs[1], false);
                rawTrainTreebank.LoadPath(new File(statArgs[0]), trainFilt);
                log.Info("Done reading trees.");
                MemoryTreebank trainTreebank;
                if (argMap.Contains("-annotate"))
                {
                    trainTreebank = new MemoryTreebank();
                    TreeAnnotator annotator = new TreeAnnotator(ctpp.HeadFinder(), ctpp, op);
                    foreach (Tree tree in rawTrainTreebank)
                    {
                        trainTreebank.Add(annotator.TransformTree(tree));
                    }
                    log.Info("Done annotating trees.");
                }
                else
                {
                    trainTreebank = rawTrainTreebank;
                }
                PrintStats(trainTreebank, pw);
                System.Environment.Exit(0);
            }
            int maxLength = 1000000;

            //    Test.verbose = true;
            if (argMap.Contains("-norm"))
            {
                op.testOptions.lengthNormalization = true;
            }
            if (argMap.Contains("-maxLength"))
            {
                maxLength = System.Convert.ToInt32((argMap["-maxLength"])[0]);
            }
            op.testOptions.maxLength = 120;
            bool combo = argMap.Contains("-combo");

            if (combo)
            {
                ctpp.useCharacterBasedLexicon = true;
                op.testOptions.maxSpanForTags = 10;
                op.doDep  = false;
                op.dcTags = false;
            }
            LexicalizedParser lp  = null;
            ILexicon          lex = null;

            if (argMap.Contains("-parser"))
            {
                string[] parserArgs = (argMap["-parser"]);
                if (parserArgs.Length > 1)
                {
                    IFileFilter trainFilt = new NumberRangesFileFilter(parserArgs[1], false);
                    lp = LexicalizedParser.TrainFromTreebank(parserArgs[0], trainFilt, op);
                    if (parserArgs.Length == 3)
                    {
                        string filename = parserArgs[2];
                        log.Info("Writing parser in serialized format to file " + filename + " ");
                        System.Console.Error.Flush();
                        ObjectOutputStream @out = IOUtils.WriteStreamFromString(filename);
                        @out.WriteObject(lp);
                        @out.Close();
                        log.Info("done.");
                    }
                }
                else
                {
                    string parserFile = parserArgs[0];
                    lp = LexicalizedParser.LoadModel(parserFile, op);
                }
                lex  = lp.GetLexicon();
                op   = lp.GetOp();
                ctpp = (ChineseTreebankParserParams)op.tlpParams;
            }
            if (argMap.Contains("-rad"))
            {
                ctpp.useUnknownCharacterModel = true;
            }
            if (argMap.Contains("-lengthPenalty"))
            {
                ctpp.lengthPenalty = double.Parse((argMap["-lengthPenalty"])[0]);
            }
            if (argMap.Contains("-penaltyType"))
            {
                ctpp.penaltyType = System.Convert.ToInt32((argMap["-penaltyType"])[0]);
            }
            if (argMap.Contains("-lex"))
            {
                string[] lexArgs = (argMap["-lex"]);
                if (lexArgs.Length > 1)
                {
                    IIndex <string> wordIndex = new HashIndex <string>();
                    IIndex <string> tagIndex  = new HashIndex <string>();
                    lex = ctpp.Lex(op, wordIndex, tagIndex);
                    MemoryTreebank rawTrainTreebank = op.tlpParams.MemoryTreebank();
                    IFileFilter    trainFilt        = new NumberRangesFileFilter(lexArgs[1], false);
                    rawTrainTreebank.LoadPath(new File(lexArgs[0]), trainFilt);
                    log.Info("Done reading trees.");
                    MemoryTreebank trainTreebank;
                    if (argMap.Contains("-annotate"))
                    {
                        trainTreebank = new MemoryTreebank();
                        TreeAnnotator annotator = new TreeAnnotator(ctpp.HeadFinder(), ctpp, op);
                        foreach (Tree tree in rawTrainTreebank)
                        {
                            tree = annotator.TransformTree(tree);
                            trainTreebank.Add(tree);
                        }
                        log.Info("Done annotating trees.");
                    }
                    else
                    {
                        trainTreebank = rawTrainTreebank;
                    }
                    lex.InitializeTraining(trainTreebank.Count);
                    lex.Train(trainTreebank);
                    lex.FinishTraining();
                    log.Info("Done training lexicon.");
                    if (lexArgs.Length == 3)
                    {
                        string filename = lexArgs.Length == 3 ? lexArgs[2] : "parsers/chineseCharLex.ser.gz";
                        log.Info("Writing lexicon in serialized format to file " + filename + " ");
                        System.Console.Error.Flush();
                        ObjectOutputStream @out = IOUtils.WriteStreamFromString(filename);
                        @out.WriteObject(lex);
                        @out.Close();
                        log.Info("done.");
                    }
                }
                else
                {
                    string lexFile = lexArgs.Length == 1 ? lexArgs[0] : "parsers/chineseCharLex.ser.gz";
                    log.Info("Reading Lexicon from file " + lexFile);
                    ObjectInputStream @in = IOUtils.ReadStreamFromString(lexFile);
                    try
                    {
                        lex = (ILexicon)@in.ReadObject();
                    }
                    catch (TypeLoadException)
                    {
                        throw new Exception("Bad serialized file: " + lexFile);
                    }
                    @in.Close();
                }
            }
            if (argMap.Contains("-test"))
            {
                bool segmentWords = ctpp.segment;
                bool parse        = lp != null;
                System.Diagnostics.Debug.Assert((parse || segmentWords));
                //      WordCatConstituent.collinizeWords = argMap.containsKey("-collinizeWords");
                //      WordCatConstituent.collinizeTags = argMap.containsKey("-collinizeTags");
                IWordSegmenter seg = null;
                if (segmentWords)
                {
                    seg = (IWordSegmenter)lex;
                }
                string[]       testArgs     = (argMap["-test"]);
                MemoryTreebank testTreebank = op.tlpParams.MemoryTreebank();
                IFileFilter    testFilt     = new NumberRangesFileFilter(testArgs[1], false);
                testTreebank.LoadPath(new File(testArgs[0]), testFilt);
                ITreeTransformer          subcategoryStripper = op.tlpParams.SubcategoryStripper();
                ITreeTransformer          collinizer          = ctpp.Collinizer();
                WordCatEquivalenceClasser eqclass             = new WordCatEquivalenceClasser();
                WordCatEqualityChecker    eqcheck             = new WordCatEqualityChecker();
                EquivalenceClassEval      basicEval           = new EquivalenceClassEval(eqclass, eqcheck, "basic");
                EquivalenceClassEval      collinsEval         = new EquivalenceClassEval(eqclass, eqcheck, "collinized");
                IList <string>            evalTypes           = new List <string>(3);
                bool goodPOS = false;
                if (segmentWords)
                {
                    evalTypes.Add(WordCatConstituent.wordType);
                    if (ctpp.segmentMarkov && !parse)
                    {
                        evalTypes.Add(WordCatConstituent.tagType);
                        goodPOS = true;
                    }
                }
                if (parse)
                {
                    evalTypes.Add(WordCatConstituent.tagType);
                    evalTypes.Add(WordCatConstituent.catType);
                    if (combo)
                    {
                        evalTypes.Add(WordCatConstituent.wordType);
                        goodPOS = true;
                    }
                }
                TreeToBracketProcessor proc = new TreeToBracketProcessor(evalTypes);
                log.Info("Testing...");
                foreach (Tree goldTop in testTreebank)
                {
                    Tree             gold         = goldTop.FirstChild();
                    IList <IHasWord> goldSentence = gold.YieldHasWord();
                    if (goldSentence.Count > maxLength)
                    {
                        log.Info("Skipping sentence; too long: " + goldSentence.Count);
                        continue;
                    }
                    else
                    {
                        log.Info("Processing sentence; length: " + goldSentence.Count);
                    }
                    IList <IHasWord> s;
                    if (segmentWords)
                    {
                        StringBuilder goldCharBuf = new StringBuilder();
                        foreach (IHasWord aGoldSentence in goldSentence)
                        {
                            StringLabel word = (StringLabel)aGoldSentence;
                            goldCharBuf.Append(word.Value());
                        }
                        string goldChars = goldCharBuf.ToString();
                        s = seg.Segment(goldChars);
                    }
                    else
                    {
                        s = goldSentence;
                    }
                    Tree tree;
                    if (parse)
                    {
                        tree = lp.ParseTree(s);
                        if (tree == null)
                        {
                            throw new Exception("PARSER RETURNED NULL!!!");
                        }
                    }
                    else
                    {
                        tree = Edu.Stanford.Nlp.Trees.Trees.ToFlatTree(s);
                        tree = subcategoryStripper.TransformTree(tree);
                    }
                    if (pw != null)
                    {
                        if (parse)
                        {
                            tree.PennPrint(pw);
                        }
                        else
                        {
                            IEnumerator sentIter = s.GetEnumerator();
                            for (; ;)
                            {
                                Word word = (Word)sentIter.Current;
                                pw.Print(word.Word());
                                if (sentIter.MoveNext())
                                {
                                    pw.Print(" ");
                                }
                                else
                                {
                                    break;
                                }
                            }
                        }
                        pw.Println();
                    }
                    if (eval)
                    {
                        ICollection ourBrackets;
                        ICollection goldBrackets;
                        ourBrackets  = proc.AllBrackets(tree);
                        goldBrackets = proc.AllBrackets(gold);
                        if (goodPOS)
                        {
                            Sharpen.Collections.AddAll(ourBrackets, TreeToBracketProcessor.CommonWordTagTypeBrackets(tree, gold));
                            Sharpen.Collections.AddAll(goldBrackets, TreeToBracketProcessor.CommonWordTagTypeBrackets(gold, tree));
                        }
                        basicEval.Eval(ourBrackets, goldBrackets);
                        System.Console.Out.WriteLine("\nScores:");
                        basicEval.DisplayLast();
                        Tree collinsTree = collinizer.TransformTree(tree);
                        Tree collinsGold = collinizer.TransformTree(gold);
                        ourBrackets  = proc.AllBrackets(collinsTree);
                        goldBrackets = proc.AllBrackets(collinsGold);
                        if (goodPOS)
                        {
                            Sharpen.Collections.AddAll(ourBrackets, TreeToBracketProcessor.CommonWordTagTypeBrackets(collinsTree, collinsGold));
                            Sharpen.Collections.AddAll(goldBrackets, TreeToBracketProcessor.CommonWordTagTypeBrackets(collinsGold, collinsTree));
                        }
                        collinsEval.Eval(ourBrackets, goldBrackets);
                        System.Console.Out.WriteLine("\nCollinized scores:");
                        collinsEval.DisplayLast();
                        System.Console.Out.WriteLine();
                    }
                }
                if (eval)
                {
                    basicEval.Display();
                    System.Console.Out.WriteLine();
                    collinsEval.Display();
                }
            }
        }
예제 #19
0
        /// <summary>
        /// This method lets you train and test a segmenter relative to a
        /// Treebank.
        /// </summary>
        /// <remarks>
        /// This method lets you train and test a segmenter relative to a
        /// Treebank.
        /// <p>
        /// <i>Implementation note:</i> This method is largely cloned from
        /// LexicalizedParser's main method.  Should we try to have it be able
        /// to train segmenters to stop things going out of sync?
        /// </remarks>
        public static void Main(string[] args)
        {
            bool     train = false;
            bool     saveToSerializedFile      = false;
            bool     saveToTextFile            = false;
            string   serializedInputFileOrUrl  = null;
            string   textInputFileOrUrl        = null;
            string   serializedOutputFileOrUrl = null;
            string   textOutputFileOrUrl       = null;
            string   treebankPath = null;
            Treebank testTreebank = null;
            // Treebank tuneTreebank = null;
            string      testPath    = null;
            IFileFilter testFilter  = null;
            IFileFilter trainFilter = null;
            string      encoding    = null;
            // variables needed to process the files to be parsed
            ITokenizerFactory <Word> tokenizerFactory = null;
            //    DocumentPreprocessor documentPreprocessor = new DocumentPreprocessor();
            bool tokenized = false;
            // whether or not the input file has already been tokenized
            IFunction <IList <IHasWord>, IList <IHasWord> > escaper = new ChineseEscaper();
            // int tagDelimiter = -1;
            // String sentenceDelimiter = "\n";
            // boolean fromXML = false;
            int argIndex = 0;

            if (args.Length < 1)
            {
                log.Info("usage: java edu.stanford.nlp.parser.lexparser." + "LexicalizedParser parserFileOrUrl filename*");
                return;
            }
            Options op = new Options();

            op.tlpParams = new ChineseTreebankParserParams();
            // while loop through option arguments
            while (argIndex < args.Length && args[argIndex][0] == '-')
            {
                if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-train"))
                {
                    train = true;
                    saveToSerializedFile = true;
                    int numSubArgs = NumSubArgs(args, argIndex);
                    argIndex++;
                    if (numSubArgs > 1)
                    {
                        treebankPath = args[argIndex];
                        argIndex++;
                    }
                    else
                    {
                        throw new Exception("Error: -train option must have treebankPath as first argument.");
                    }
                    if (numSubArgs == 2)
                    {
                        trainFilter = new NumberRangesFileFilter(args[argIndex++], true);
                    }
                    else
                    {
                        if (numSubArgs >= 3)
                        {
                            try
                            {
                                int low  = System.Convert.ToInt32(args[argIndex]);
                                int high = System.Convert.ToInt32(args[argIndex + 1]);
                                trainFilter = new NumberRangeFileFilter(low, high, true);
                                argIndex   += 2;
                            }
                            catch (NumberFormatException)
                            {
                                // maybe it's a ranges expression?
                                trainFilter = new NumberRangesFileFilter(args[argIndex], true);
                                argIndex++;
                            }
                        }
                    }
                }
                else
                {
                    if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-encoding"))
                    {
                        // sets encoding for TreebankLangParserParams
                        encoding = args[argIndex + 1];
                        op.tlpParams.SetInputEncoding(encoding);
                        op.tlpParams.SetOutputEncoding(encoding);
                        argIndex += 2;
                    }
                    else
                    {
                        if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-loadFromSerializedFile"))
                        {
                            // load the parser from a binary serialized file
                            // the next argument must be the path to the parser file
                            serializedInputFileOrUrl = args[argIndex + 1];
                            argIndex += 2;
                        }
                        else
                        {
                            // doesn't make sense to load from TextFile -pichuan
                            //      } else if (args[argIndex].equalsIgnoreCase("-loadFromTextFile")) {
                            //        // load the parser from declarative text file
                            //        // the next argument must be the path to the parser file
                            //        textInputFileOrUrl = args[argIndex + 1];
                            //        argIndex += 2;
                            if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-saveToSerializedFile"))
                            {
                                saveToSerializedFile      = true;
                                serializedOutputFileOrUrl = args[argIndex + 1];
                                argIndex += 2;
                            }
                            else
                            {
                                if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-saveToTextFile"))
                                {
                                    // save the parser to declarative text file
                                    saveToTextFile      = true;
                                    textOutputFileOrUrl = args[argIndex + 1];
                                    argIndex           += 2;
                                }
                                else
                                {
                                    if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-treebank"))
                                    {
                                        // the next argument is the treebank path and range for testing
                                        int numSubArgs = NumSubArgs(args, argIndex);
                                        argIndex++;
                                        if (numSubArgs == 1)
                                        {
                                            testFilter = new NumberRangesFileFilter(args[argIndex++], true);
                                        }
                                        else
                                        {
                                            if (numSubArgs > 1)
                                            {
                                                testPath = args[argIndex++];
                                                if (numSubArgs == 2)
                                                {
                                                    testFilter = new NumberRangesFileFilter(args[argIndex++], true);
                                                }
                                                else
                                                {
                                                    if (numSubArgs >= 3)
                                                    {
                                                        try
                                                        {
                                                            int low  = System.Convert.ToInt32(args[argIndex]);
                                                            int high = System.Convert.ToInt32(args[argIndex + 1]);
                                                            testFilter = new NumberRangeFileFilter(low, high, true);
                                                            argIndex  += 2;
                                                        }
                                                        catch (NumberFormatException)
                                                        {
                                                            // maybe it's a ranges expression?
                                                            testFilter = new NumberRangesFileFilter(args[argIndex++], true);
                                                        }
                                                    }
                                                }
                                            }
                                        }
                                    }
                                    else
                                    {
                                        int j = op.tlpParams.SetOptionFlag(args, argIndex);
                                        if (j == argIndex)
                                        {
                                            log.Info("Unknown option ignored: " + args[argIndex]);
                                            j++;
                                        }
                                        argIndex = j;
                                    }
                                }
                            }
                        }
                    }
                }
            }
            // end while loop through arguments
            ITreebankLangParserParams tlpParams = op.tlpParams;

            // all other arguments are order dependent and
            // are processed in order below
            Edu.Stanford.Nlp.Parser.Lexparser.ChineseLexiconAndWordSegmenter cs = null;
            if (!train && op.testOptions.verbose)
            {
                System.Console.Out.WriteLine("Currently " + new DateTime());
                PrintArgs(args, System.Console.Out);
            }
            if (train)
            {
                PrintArgs(args, System.Console.Out);
                // so we train a parser using the treebank
                if (treebankPath == null)
                {
                    // the next arg must be the treebank path, since it wasn't give earlier
                    treebankPath = args[argIndex];
                    argIndex++;
                    if (args.Length > argIndex + 1)
                    {
                        try
                        {
                            // the next two args might be the range
                            int low  = System.Convert.ToInt32(args[argIndex]);
                            int high = System.Convert.ToInt32(args[argIndex + 1]);
                            trainFilter = new NumberRangeFileFilter(low, high, true);
                            argIndex   += 2;
                        }
                        catch (NumberFormatException)
                        {
                            // maybe it's a ranges expression?
                            trainFilter = new NumberRangesFileFilter(args[argIndex], true);
                            argIndex++;
                        }
                    }
                }
                Treebank        trainTreebank = MakeTreebank(treebankPath, op, trainFilter);
                IIndex <string> wordIndex     = new HashIndex <string>();
                IIndex <string> tagIndex      = new HashIndex <string>();
                cs = new Edu.Stanford.Nlp.Parser.Lexparser.ChineseLexiconAndWordSegmenter(trainTreebank, op, wordIndex, tagIndex);
            }
            else
            {
                if (textInputFileOrUrl != null)
                {
                }
                else
                {
                    // so we load the segmenter from a text grammar file
                    // XXXXX fix later -pichuan
                    //cs = new LexicalizedParser(textInputFileOrUrl, true, op);
                    // so we load a serialized segmenter
                    if (serializedInputFileOrUrl == null)
                    {
                        // the next argument must be the path to the serialized parser
                        serializedInputFileOrUrl = args[argIndex];
                        argIndex++;
                    }
                    try
                    {
                        cs = new Edu.Stanford.Nlp.Parser.Lexparser.ChineseLexiconAndWordSegmenter(serializedInputFileOrUrl, op);
                    }
                    catch (ArgumentException)
                    {
                        log.Info("Error loading segmenter, exiting...");
                        System.Environment.Exit(0);
                    }
                }
            }
            // the following has to go after reading parser to make sure
            // op and tlpParams are the same for train and test
            TreePrint treePrint = op.testOptions.TreePrint(tlpParams);

            if (testFilter != null)
            {
                if (testPath == null)
                {
                    if (treebankPath == null)
                    {
                        throw new Exception("No test treebank path specified...");
                    }
                    else
                    {
                        log.Info("No test treebank path specified.  Using train path: \"" + treebankPath + "\"");
                        testPath = treebankPath;
                    }
                }
                testTreebank = tlpParams.TestMemoryTreebank();
                testTreebank.LoadPath(testPath, testFilter);
            }
            op.trainOptions.sisterSplitters = Generics.NewHashSet(Arrays.AsList(tlpParams.SisterSplitters()));
            // at this point we should be sure that op.tlpParams is
            // set appropriately (from command line, or from grammar file),
            // and will never change again.  We also set the tlpParams of the
            // LexicalizedParser instance to be the same object.  This is
            // redundancy that we probably should take out eventually.
            //
            // -- Roger
            if (op.testOptions.verbose)
            {
                log.Info("Lexicon is " + cs.GetType().FullName);
            }
            PrintWriter pwOut = tlpParams.Pw();
            PrintWriter pwErr = tlpParams.Pw(System.Console.Error);

            // Now what do we do with the parser we've made
            if (saveToTextFile)
            {
                // save the parser to textGrammar format
                if (textOutputFileOrUrl != null)
                {
                    SaveSegmenterDataToText(cs, textOutputFileOrUrl);
                }
                else
                {
                    log.Info("Usage: must specify a text segmenter data output path");
                }
            }
            if (saveToSerializedFile)
            {
                if (serializedOutputFileOrUrl == null && argIndex < args.Length)
                {
                    // the next argument must be the path to serialize to
                    serializedOutputFileOrUrl = args[argIndex];
                    argIndex++;
                }
                if (serializedOutputFileOrUrl != null)
                {
                    SaveSegmenterDataToSerialized(cs, serializedOutputFileOrUrl);
                }
                else
                {
                    if (textOutputFileOrUrl == null && testTreebank == null)
                    {
                        // no saving/parsing request has been specified
                        log.Info("usage: " + "java edu.stanford.nlp.parser.lexparser.ChineseLexiconAndWordSegmenter" + "-train trainFilesPath [start stop] serializedParserFilename");
                    }
                }
            }
            /* --------------------- Testing part!!!! ----------------------- */
            if (op.testOptions.verbose)
            {
            }
            //      printOptions(false, op);
            if (testTreebank != null || (argIndex < args.Length && Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-treebank")))
            {
                // test parser on treebank
                if (testTreebank == null)
                {
                    // the next argument is the treebank path and range for testing
                    testTreebank = tlpParams.TestMemoryTreebank();
                    if (args.Length < argIndex + 4)
                    {
                        testTreebank.LoadPath(args[argIndex + 1]);
                    }
                    else
                    {
                        int testlow  = System.Convert.ToInt32(args[argIndex + 2]);
                        int testhigh = System.Convert.ToInt32(args[argIndex + 3]);
                        testTreebank.LoadPath(args[argIndex + 1], new NumberRangeFileFilter(testlow, testhigh, true));
                    }
                }
            }
        }
예제 #20
0
        protected void ReadFile(BinaryReader _br)
        {
            br = _br;
            uint bnk_count = br.ReadUInt32();

            //Skip second count
            br.BaseStream.Position += 4;
            uint bnk_offset      = br.ReadUInt32();
            uint section_pointer = br.ReadUInt32();
            uint unknown1        = br.ReadUInt32();

            uint sound_count = br.ReadUInt32();

            //Skip second count
            br.BaseStream.Position += 4;
            uint sound_offset = br.ReadUInt32();

            //Skips section pointer, unknown1, unknown2
            br.BaseStream.Position += 12;

            uint u_count = br.ReadUInt32();

            //Skip second count
            br.BaseStream.Position += 4;
            uint u_offset = br.ReadUInt32();

            br.BaseStream.Position = bnk_offset;

            for (int i = 0; i < bnk_count; i++)
            {
                br.BaseStream.Position += 4;
                uint position = br.ReadUInt32();
                this.SeekPush();
                br.BaseStream.Position = position;
                Soundbanks.Add(this.ReadString());
                this.SeekPop();
            }

            br.BaseStream.Position = sound_offset;

            Dictionary <ulong, uint> sound_lookups = new Dictionary <ulong, uint>();

            for (int i = 0; i < sound_count; i++)
            {
                uint  id   = (uint)br.ReadUInt64();
                ulong hash = br.ReadUInt64();
                if (sound_lookups.ContainsKey(hash))
                {
                    uint other_id = sound_lookups[hash];
                    continue;
                }

                sound_lookups.Add(hash, id);
            }

            br.BaseStream.Position = u_offset;

            for (int i = 0; i < u_count; i++)
            {
                ulong hash = br.ReadUInt64();
                br.BaseStream.Position += 4;
                uint string_pos = br.ReadUInt32();
                this.SeekPush();
                br.BaseStream.Position = string_pos;
                string str = this.ReadString();
                this.SeekPop();
                if (!sound_lookups.ContainsKey(hash))
                {
                    continue;
                }
                uint id = sound_lookups[hash];
                if (SoundLookups.ContainsKey(id))
                {
                    continue;
                }

                Idstring ids = HashIndex.Get(hash);

                SoundLookups.Add(id, new Tuple <string, Idstring>(id.ToString() != str ? str : null, ids));
            }
        }
예제 #21
0
        private static async Task CreateIndex(DocumentClient client, string collectionName)
        {
            Console.WriteLine("Set up Indexes");
            DocumentCollection collection =
                await
                client.ReadDocumentCollectionAsync(UriFactory.CreateDocumentCollectionUri(databaseId, collectionName));

            /*
             * Range over /prop/? (or /*) can be used to serve the following queries efficiently:
             * SELECT * FROM collection c WHERE c.prop = "value"
             * SELECT * FROM collection c WHERE c.prop > 5
             * SELECT * FROM collection c ORDER BY c.prop
             */
            Index indexNum = new RangeIndex(DataType.Number);

            collection.IndexingPolicy.IncludedPaths.Add(new IncludedPath()
            {
                Indexes = new Collection <Index>()
                {
                    indexNum
                },
                Path = @"/FamilyId/?"
            });

            /*
             * Hash over /prop/? (or /*) can be used to serve the following queries efficiently:
             * SELECT * FROM collection c WHERE c.prop = "value"
             */
            Index indexArray = new HashIndex(DataType.String);

            collection.IndexingPolicy.IncludedPaths.Add(new IncludedPath()
            {
                Indexes = new Collection <Index>()
                {
                    indexArray
                },
                Path = @"/Address/*"
            });

            /*
             * Hash over /props/[]/? (or /* or /props/*) can be used to serve the following queries efficiently:
             * SELECT tag FROM collection c JOIN tag IN c.props WHERE tag = 5
             */
            Index indexArr = new HashIndex(DataType.String);

            collection.IndexingPolicy.IncludedPaths.Add(new IncludedPath()
            {
                Indexes = new Collection <Index>()
                {
                    indexArr
                },
                Path = @"/Children/[]/?"
            });


            /* exclude from index Parents */
            collection.IndexingPolicy.ExcludedPaths.Add(new ExcludedPath()
            {
                Path = @"/Parents/*"
            });


            await client.ReplaceDocumentCollectionAsync(collection);
        }
예제 #22
0
        /// <summary>
        ///     The load local mods.
        /// </summary>
        /// 
        public void LoadMods(bool overrride = false)
        {
            var watch = Stopwatch.StartNew();

            if (Directory.Exists(modsDirectory))
            {
                watch.Restart();

                if (overrride)
                    this._modsList.Clear();

                List<string> leftovers = this.modsList.Keys.ToList();

                string[] pdmods = Directory.GetFiles(modsDirectory, "*.pdmod");

                //System.Threading.Tasks.Parallel.ForEach(pdmods, file =>
                foreach (string file in pdmods)
                {
                    if (!File.Exists(file))
                        return;//continue;

                    if (LoadSingleMod(file))
                    {
                        leftovers.Remove(file);
                    }
                }//);

                watch.Stop();
                Console.WriteLine("LoadLocalMods.pdmods - " + watch.ElapsedMilliseconds + " ms");

                watch.Restart();
                foreach (string left in leftovers)
                    RemoveModsList(left);

                watch.Stop();
                Console.WriteLine("LoadLocalMods.pdmods.leftovers - " + watch.ElapsedMilliseconds + " ms");

            }
            else
            {
                if (this.CanCreateDirectory(modsDirectory))
                {
                    Directory.CreateDirectory(modsDirectory);
                }
            }

            watch.Restart();

            //load in override folder
            if (Directory.Exists(Path.Combine(StaticStorage.settings.AssetsFolder, "mod_overrides")))
            { 
                List<BundleMod> mod_overrides_mods = new List<BundleMod>();
                string[] mod_overrides = Directory.EnumerateDirectories(Path.Combine(StaticStorage.settings.AssetsFolder, "mod_overrides")).ToArray();

                foreach (string mo in mod_overrides)
                {
                    if (!Directory.Exists(mo))
                        continue;//continue;
                    if (new DirectoryInfo(mo).Name == "Bundle_Modder_Shared")
                        continue;//continue;

                    string[] allfiles = System.IO.Directory.GetFiles(mo, "*.*", System.IO.SearchOption.AllDirectories);
                    BundleMod mo_mod = new BundleMod();
                    mo_mod.Name = new DirectoryInfo(mo).Name;
                    mo_mod.Author = "<UNKNOWN>";
                    mo_mod.Description = "This mod is installed in \"mod_overrides\" folder. No description for this mod is availiable. This mod was not matched with any local mods. You can only uninstall this mod.";
                    mo_mod.file = mo;
                    mo_mod.status = BundleMod.ModStatus.Unrecognized;
                    mo_mod.type = BundleMod.ModType.mod_override;
                    mo_mod.actionStatus = BundleMod.ModActionStatus.Missing;
                    mo_mod.UtilizesOverride = true;

                    if (File.Exists(Path.Combine(mo, "mod.txt")))
                    {
                        try
                        {
                            OverrideMod overrideModInformation = OverrideMod.Deserialize(File.ReadAllText(Path.Combine(mo, "mod.txt")));

                            if(!String.IsNullOrWhiteSpace(overrideModInformation.Name))
                                mo_mod.Name = overrideModInformation.Name;
                            
                            if(!String.IsNullOrWhiteSpace(overrideModInformation.Author))
                                mo_mod.Author = overrideModInformation.Author;

                            if (!String.IsNullOrWhiteSpace(overrideModInformation.Description))
                                mo_mod.Description = overrideModInformation.Description;
                            
                            mo_mod.status = BundleMod.ModStatus.Installed;
                            mo_mod.actionStatus = BundleMod.ModActionStatus.None;
                        }
                        catch(Exception exc)
                        {
                        }
                    }

                    foreach (string mo_entry in allfiles)
                    {
                        if (mo_entry.EndsWith("mod.txt"))
                            continue;
                        
                        BundleRewriteItem mo_bri = new BundleRewriteItem();
                        string filepath = mo_entry.Substring(mo.Length + 1).Replace('\\', '/');
                        string[] pathelements = filepath.Split('.');
                        if (pathelements.Length > 3)
                            continue;

                        string entrypath = pathelements[0];
                        if (pathelements.Length == 2)
                        {
                            mo_bri.BundlePath = Hash64.HashString(pathelements[0]);
                            mo_bri.BundleExtension = Hash64.HashString(pathelements[1]);
                        }
                        else if (pathelements.Length == 3)
                        {
                            mo_bri.BundlePath = Hash64.HashString(pathelements[0]);
                            UInt32 lang = 0;
                            if (UInt32.TryParse(pathelements[1], out lang))
                                mo_bri.BundleLanguage = lang;
                            mo_bri.BundleExtension = Hash64.HashString(pathelements[2]);
                            mo_bri.IsLanguageSpecific = true;
                        }
                        else
                            continue;
                        mo_bri.ModName = mo_mod.Name;
                        mo_bri.ModAuthor = mo_mod.Author;
                        mo_bri.ModDescription = mo_mod.Description;
                        mo_bri.ReplacementFile = "";
                        if (mo_bri.isOverrideable()
                            //&& !bri.ReplacementFile.EndsWith(".script")
                            )
                        {

                            if (string.IsNullOrEmpty(HashIndex.GetUnhashed(mo_bri.BundlePath)) ||
                                string.IsNullOrEmpty(HashIndex.GetUnhashed(mo_bri.BundleExtension))
                                )
                            {
                                continue;
                            }
                        }
                        mo_mod.ItemQueue.Add(mo_bri);
                    }
                    mod_overrides_mods.Add(mo_mod);
                }

                //check vs others
                Dictionary<string, BundleMod> temporarylocalModsList_master = this.modsList;

                foreach (BundleMod mo_bm in mod_overrides_mods)
                {
                    bool modMatch = false;

                    List<BundleMod> matched_mods = temporarylocalModsList_master.Values.Where(mod => mod.getEscapedName().Equals(mo_bm.Name) || mod.Name.Equals(mo_bm.Name)).ToList();

                    foreach (BundleMod bm in matched_mods)
                    {
                        modMatch = true;
                        if (mo_bm.ItemQueue.Count > bm.ItemQueue.Count) //the override mod contains too many files, not equal
                        {
                            mo_bm.canInstall = false;
                            mo_bm.canUninstall = true;
                            mo_bm.actionStatus = BundleMod.ModActionStatus.Missing;
                            mo_bm.status = BundleMod.ModStatus.Unrecognized;

                            AddModsList(mo_bm.file, mo_bm);
                        }
                        else
                        {
                            bool[] mo_checklist = new bool[mo_bm.ItemQueue.Count];

                            int checklist_i = 0;
                            bool mo_onlyfolder = !(bm.ItemQueue.Any(x => !x.isOverrideable())); //isOverradable
                            foreach (BundleRewriteItem mo_bri in mo_bm.ItemQueue)
                            {
                                if (bm.ItemQueue.Any(x => x.BundlePath == mo_bri.BundlePath && x.BundleExtension == mo_bri.BundleExtension))
                                    mo_checklist[checklist_i] = true;
                                checklist_i++;
                            }

                            bool mo_equal = !mo_checklist.Any(x => !x);
                            /*
                            for (checklist_i = 0; mo_equal && checklist_i < mo_bm.ItemQueue.Count; checklist_i++)
                                if (!mo_checklist[checklist_i])
                                    mo_equal = false;
                            */
                            if (!mo_equal)
                            {
                                mo_bm.canInstall = false;
                                mo_bm.canUninstall = true;
                                mo_bm.actionStatus = BundleMod.ModActionStatus.Missing;
                                mo_bm.status = BundleMod.ModStatus.Unrecognized;

                                AddModsList(mo_bm.file, mo_bm);
                            }
                            else
                            {
                                if (mo_onlyfolder || InstalledModsListContains(bm) > -1)
                                {
                                    bm.status = BundleMod.ModStatus.Installed;//installed
                                }
                                else
                                {
                                    bm.actionStatus = BundleMod.ModActionStatus.ForcedReinstall;
                                    bm.status = BundleMod.ModStatus.ParticallyInstalled;//installed

                                    foreach (BundleRewriteItem bri in bm.ItemQueue)
                                        bri.toReinstall = true;
                                }
                            }
                        }
                    }

                    if (!modMatch)
                    {
                        mo_bm.canInstall = false;
                        mo_bm.canUninstall = true;

                        AddModsList(mo_bm.file, mo_bm);
                    }

                }
            }

            watch.Stop();
            Console.WriteLine("LoadLocalMods.overrides - " + watch.ElapsedMilliseconds + " ms");

            watch.Restart();

            //BLT Mods
            if ( Directory.Exists( Path.Combine( StaticStorage.settings.AssetsFolder, "..", "mods") ) )
            {
                if( Directory.Exists( Path.Combine( StaticStorage.settings.AssetsFolder, "..", "mods", "base") ) )
                {
                    List<string> bltmods = Directory.EnumerateDirectories(Path.Combine(StaticStorage.settings.AssetsFolder, "..", "mods")).ToList();

                    foreach (string bltmod in bltmods)
                    {
                        if (!Directory.Exists(bltmod))
                            continue;

                        if (Path.GetFileNameWithoutExtension(bltmod).Equals("log") || Path.GetFileNameWithoutExtension(bltmod).Equals("base"))
                            continue;

                        if (!File.Exists(Path.Combine(bltmod, "mod.txt")))
                            continue;


                        BundleMod blt_mod = new BundleMod();
                        blt_mod.Name = new DirectoryInfo(bltmod).Name;
                        blt_mod.Author = "<UNKNOWN>";
                        blt_mod.Description = "This is a BLT Hook mod. No description for this mod is availiable. This mod doesn't have a proper description. You can enable/disable this mod as well as uninstall it.";
                        blt_mod.file = bltmod;
                        blt_mod.status = BundleMod.ModStatus.Installed;
                        blt_mod.type = BundleMod.ModType.lua;
                        blt_mod.actionStatus = BundleMod.ModActionStatus.None;
                        blt_mod.UtilizesOverride = false;
                        blt_mod.UtilizesBundles = false;
                        blt_mod.enabled = true;

                        try
                        {
                            FileStream bltModfs = new FileStream(Path.Combine(bltmod, "mod.txt"), FileMode.Open);
                            using (StreamReader bltModsr = new StreamReader(bltModfs))
                            {
                                try
                                {
                                    //JsonConvert
                                    dynamic jsonDe = JsonConvert.DeserializeObject(bltModsr.ReadToEnd());
                                    //dynamic jsonDe = null;
                                    if (jsonDe != null)
                                    {
                                        if (jsonDe.name != null)
                                        {
                                            blt_mod.Name = jsonDe.name;
                                        }
                                        if (jsonDe.author != null)
                                        {
                                            blt_mod.Author = jsonDe.author;
                                        }
                                        if (jsonDe.description != null)
                                        {
                                            blt_mod.Description = jsonDe.description;
                                        }
                                    }
                                }
                                catch (Exception exc)
                                {
                                    blt_mod.Description += " Failed parsing mods.txt of " + Path.GetFileNameWithoutExtension(bltmod) + ", Message: " + exc.Message;
                                }
                            }
                        }
                        catch (Exception e)
                        {
                            blt_mod.Description += " Failed parsing mods.txt of " + Path.GetFileNameWithoutExtension(bltmod) + ", Message: " + e.Message;
                        }
                        AddModsList(bltmod, blt_mod, true);
                    }
                }

                LoadBLTModManagement();
            }

            watch.Stop();
            Console.WriteLine("LoadLocalMods.blt_mods - " + watch.ElapsedMilliseconds + " ms");
        }
예제 #23
0
        protected internal virtual void AddGuess(LabeledChunkIdentifier.LabelTagType guess, LabeledChunkIdentifier.LabelTagType correct, bool addUnknownLabels)
        {
            if (addUnknownLabels)
            {
                if (labelIndex == null)
                {
                    labelIndex = new HashIndex <string>();
                }
                labelIndex.Add(GetTypeLabel(guess));
                labelIndex.Add(GetTypeLabel(correct));
            }
            if (inCorrect)
            {
                bool prevCorrectEnded = LabeledChunkIdentifier.IsEndOfChunk(prevCorrect, correct);
                bool prevGuessEnded   = LabeledChunkIdentifier.IsEndOfChunk(prevGuess, guess);
                if (prevCorrectEnded && prevGuessEnded && prevGuess.TypeMatches(prevCorrect))
                {
                    inCorrect = false;
                    correctGuesses.IncrementCount(GetTypeLabel(prevCorrect));
                }
                else
                {
                    if (prevCorrectEnded != prevGuessEnded || !guess.TypeMatches(correct))
                    {
                        inCorrect = false;
                    }
                }
            }
            bool correctStarted = LabeledChunkIdentifier.IsStartOfChunk(prevCorrect, correct);
            bool guessStarted   = LabeledChunkIdentifier.IsStartOfChunk(prevGuess, guess);

            if (correctStarted && guessStarted && guess.TypeMatches(correct))
            {
                inCorrect = true;
            }
            if (correctStarted)
            {
                foundCorrect.IncrementCount(GetTypeLabel(correct));
            }
            if (guessStarted)
            {
                foundGuessed.IncrementCount(GetTypeLabel(guess));
            }
            if (chunker.IsIgnoreProvidedTag())
            {
                if (guess.TypeMatches(correct))
                {
                    tokensCorrect++;
                }
            }
            else
            {
                if (guess.label.Equals(correct.label))
                {
                    tokensCorrect++;
                }
            }
            tokensCount++;
            prevGuess   = guess;
            prevCorrect = correct;
        }
        /// <param name="args"/>
        public static void Main(string[] args)
        {
            if (args.Length != 4)
            {
                System.Console.Error.Printf("Usage: java %s language features train_file dev_file%n", typeof(Edu.Stanford.Nlp.Parser.Lexparser.FactoredLexicon).FullName);
                System.Environment.Exit(-1);
            }
            // Command line options
            Language language = Language.ValueOf(args[0]);
            ITreebankLangParserParams tlpp = language.@params;
            Treebank trainTreebank         = tlpp.DiskTreebank();

            trainTreebank.LoadPath(args[2]);
            Treebank devTreebank = tlpp.DiskTreebank();

            devTreebank.LoadPath(args[3]);
            MorphoFeatureSpecification morphoSpec;
            Options options = GetOptions(language);

            if (language.Equals(Language.Arabic))
            {
                morphoSpec = new ArabicMorphoFeatureSpecification();
                string[] languageOptions = new string[] { "-arabicFactored" };
                tlpp.SetOptionFlag(languageOptions, 0);
            }
            else
            {
                if (language.Equals(Language.French))
                {
                    morphoSpec = new FrenchMorphoFeatureSpecification();
                    string[] languageOptions = new string[] { "-frenchFactored" };
                    tlpp.SetOptionFlag(languageOptions, 0);
                }
                else
                {
                    throw new NotSupportedException();
                }
            }
            string featureList = args[1];

            string[] features = featureList.Trim().Split(",");
            foreach (string feature in features)
            {
                morphoSpec.Activate(MorphoFeatureSpecification.MorphoFeatureType.ValueOf(feature));
            }
            System.Console.Out.WriteLine("Language: " + language.ToString());
            System.Console.Out.WriteLine("Features: " + args[1]);
            // Create word and tag indices
            // Save trees in a collection since the interface requires that....
            System.Console.Out.Write("Loading training trees...");
            IList <Tree>    trainTrees = new List <Tree>(19000);
            IIndex <string> wordIndex  = new HashIndex <string>();
            IIndex <string> tagIndex   = new HashIndex <string>();

            foreach (Tree tree in trainTreebank)
            {
                foreach (Tree subTree in tree)
                {
                    if (!subTree.IsLeaf())
                    {
                        tlpp.TransformTree(subTree, tree);
                    }
                }
                trainTrees.Add(tree);
            }
            System.Console.Out.Printf("Done! (%d trees)%n", trainTrees.Count);
            // Setup and train the lexicon.
            System.Console.Out.Write("Collecting sufficient statistics for lexicon...");
            Edu.Stanford.Nlp.Parser.Lexparser.FactoredLexicon lexicon = new Edu.Stanford.Nlp.Parser.Lexparser.FactoredLexicon(options, morphoSpec, wordIndex, tagIndex);
            lexicon.InitializeTraining(trainTrees.Count);
            lexicon.Train(trainTrees, null);
            lexicon.FinishTraining();
            System.Console.Out.WriteLine("Done!");
            trainTrees = null;
            // Load the tuning set
            System.Console.Out.Write("Loading tuning set...");
            IList <FactoredLexiconEvent> tuningSet = GetTuningSet(devTreebank, lexicon, tlpp);

            System.Console.Out.Printf("...Done! (%d events)%n", tuningSet.Count);
            // Print the probabilities that we obtain
            // TODO(spenceg): Implement tagging accuracy with FactLex
            int nCorrect             = 0;
            ICounter <string> errors = new ClassicCounter <string>();

            foreach (FactoredLexiconEvent @event in tuningSet)
            {
                IEnumerator <IntTaggedWord> itr = lexicon.RuleIteratorByWord(@event.Word(), @event.GetLoc(), @event.FeatureStr());
                ICounter <int> logScores        = new ClassicCounter <int>();
                bool           noRules          = true;
                int            goldTagId        = -1;
                while (itr.MoveNext())
                {
                    noRules = false;
                    IntTaggedWord iTW = itr.Current;
                    if (iTW.Tag() == @event.TagId())
                    {
                        log.Info("GOLD-");
                        goldTagId = iTW.Tag();
                    }
                    float tagScore = lexicon.Score(iTW, @event.GetLoc(), @event.Word(), @event.FeatureStr());
                    logScores.IncrementCount(iTW.Tag(), tagScore);
                }
                if (noRules)
                {
                    System.Console.Error.Printf("NO TAGGINGS: %s %s%n", @event.Word(), @event.FeatureStr());
                }
                else
                {
                    // Score the tagging
                    int hypTagId = Counters.Argmax(logScores);
                    if (hypTagId == goldTagId)
                    {
                        ++nCorrect;
                    }
                    else
                    {
                        string goldTag = goldTagId < 0 ? "UNSEEN" : lexicon.tagIndex.Get(goldTagId);
                        errors.IncrementCount(goldTag);
                    }
                }
                log.Info();
            }
            // Output accuracy
            double acc = (double)nCorrect / (double)tuningSet.Count;

            System.Console.Error.Printf("%n%nACCURACY: %.2f%n%n", acc * 100.0);
            log.Info("% of errors by type:");
            IList <string> biggestKeys = new List <string>(errors.KeySet());

            biggestKeys.Sort(Counters.ToComparator(errors, false, true));
            Counters.Normalize(errors);
            foreach (string key in biggestKeys)
            {
                System.Console.Error.Printf("%s\t%.2f%n", key, errors.GetCount(key) * 100.0);
            }
        }
예제 #25
0
        /* some documentation for Roger's convenience
         * {pcfg,dep,combo}{PE,DE,TE} are precision/dep/tagging evals for the models
         *
         * parser is the PCFG parser
         * dparser is the dependency parser
         * bparser is the combining parser
         *
         * during testing:
         * tree is the test tree (gold tree)
         * binaryTree is the gold tree binarized
         * tree2b is the best PCFG paser, binarized
         * tree2 is the best PCFG parse (debinarized)
         * tree3 is the dependency parse, binarized
         * tree3db is the dependency parser, debinarized
         * tree4 is the best combo parse, binarized and then debinarized
         * tree4b is the best combo parse, binarized
         */
        public static void Main(string[] args)
        {
            Options op = new Options(new EnglishTreebankParserParams());

            // op.tlpParams may be changed to something else later, so don't use it till
            // after options are parsed.
            StringUtils.LogInvocationString(log, args);
            string path          = "/u/nlp/stuff/corpora/Treebank3/parsed/mrg/wsj";
            int    trainLow      = 200;
            int    trainHigh     = 2199;
            int    testLow       = 2200;
            int    testHigh      = 2219;
            string serializeFile = null;
            int    i             = 0;

            while (i < args.Length && args[i].StartsWith("-"))
            {
                if (Sharpen.Runtime.EqualsIgnoreCase(args[i], "-path") && (i + 1 < args.Length))
                {
                    path = args[i + 1];
                    i   += 2;
                }
                else
                {
                    if (Sharpen.Runtime.EqualsIgnoreCase(args[i], "-train") && (i + 2 < args.Length))
                    {
                        trainLow  = System.Convert.ToInt32(args[i + 1]);
                        trainHigh = System.Convert.ToInt32(args[i + 2]);
                        i        += 3;
                    }
                    else
                    {
                        if (Sharpen.Runtime.EqualsIgnoreCase(args[i], "-test") && (i + 2 < args.Length))
                        {
                            testLow  = System.Convert.ToInt32(args[i + 1]);
                            testHigh = System.Convert.ToInt32(args[i + 2]);
                            i       += 3;
                        }
                        else
                        {
                            if (Sharpen.Runtime.EqualsIgnoreCase(args[i], "-serialize") && (i + 1 < args.Length))
                            {
                                serializeFile = args[i + 1];
                                i            += 2;
                            }
                            else
                            {
                                if (Sharpen.Runtime.EqualsIgnoreCase(args[i], "-tLPP") && (i + 1 < args.Length))
                                {
                                    try
                                    {
                                        op.tlpParams = (ITreebankLangParserParams)System.Activator.CreateInstance(Sharpen.Runtime.GetType(args[i + 1]));
                                    }
                                    catch (TypeLoadException e)
                                    {
                                        log.Info("Class not found: " + args[i + 1]);
                                        throw new Exception(e);
                                    }
                                    catch (InstantiationException e)
                                    {
                                        log.Info("Couldn't instantiate: " + args[i + 1] + ": " + e.ToString());
                                        throw new Exception(e);
                                    }
                                    catch (MemberAccessException e)
                                    {
                                        log.Info("illegal access" + e);
                                        throw new Exception(e);
                                    }
                                    i += 2;
                                }
                                else
                                {
                                    if (args[i].Equals("-encoding"))
                                    {
                                        // sets encoding for TreebankLangParserParams
                                        op.tlpParams.SetInputEncoding(args[i + 1]);
                                        op.tlpParams.SetOutputEncoding(args[i + 1]);
                                        i += 2;
                                    }
                                    else
                                    {
                                        i = op.SetOptionOrWarn(args, i);
                                    }
                                }
                            }
                        }
                    }
                }
            }
            // System.out.println(tlpParams.getClass());
            ITreebankLanguagePack tlp = op.tlpParams.TreebankLanguagePack();

            op.trainOptions.sisterSplitters = Generics.NewHashSet(Arrays.AsList(op.tlpParams.SisterSplitters()));
            //    BinarizerFactory.TreeAnnotator.setTreebankLang(tlpParams);
            PrintWriter pw = op.tlpParams.Pw();

            op.testOptions.Display();
            op.trainOptions.Display();
            op.Display();
            op.tlpParams.Display();
            // setup tree transforms
            Treebank       trainTreebank = op.tlpParams.MemoryTreebank();
            MemoryTreebank testTreebank  = op.tlpParams.TestMemoryTreebank();

            // Treebank blippTreebank = ((EnglishTreebankParserParams) tlpParams).diskTreebank();
            // String blippPath = "/afs/ir.stanford.edu/data/linguistic-data/BLLIP-WSJ/";
            // blippTreebank.loadPath(blippPath, "", true);
            Timing.StartTime();
            log.Info("Reading trees...");
            testTreebank.LoadPath(path, new NumberRangeFileFilter(testLow, testHigh, true));
            if (op.testOptions.increasingLength)
            {
                testTreebank.Sort(new TreeLengthComparator());
            }
            trainTreebank.LoadPath(path, new NumberRangeFileFilter(trainLow, trainHigh, true));
            Timing.Tick("done.");
            log.Info("Binarizing trees...");
            TreeAnnotatorAndBinarizer binarizer;

            if (!op.trainOptions.leftToRight)
            {
                binarizer = new TreeAnnotatorAndBinarizer(op.tlpParams, op.forceCNF, !op.trainOptions.OutsideFactor(), true, op);
            }
            else
            {
                binarizer = new TreeAnnotatorAndBinarizer(op.tlpParams.HeadFinder(), new LeftHeadFinder(), op.tlpParams, op.forceCNF, !op.trainOptions.OutsideFactor(), true, op);
            }
            CollinsPuncTransformer collinsPuncTransformer = null;

            if (op.trainOptions.collinsPunc)
            {
                collinsPuncTransformer = new CollinsPuncTransformer(tlp);
            }
            ITreeTransformer debinarizer      = new Debinarizer(op.forceCNF);
            IList <Tree>     binaryTrainTrees = new List <Tree>();

            if (op.trainOptions.selectiveSplit)
            {
                op.trainOptions.splitters = ParentAnnotationStats.GetSplitCategories(trainTreebank, op.trainOptions.tagSelectiveSplit, 0, op.trainOptions.selectiveSplitCutOff, op.trainOptions.tagSelectiveSplitCutOff, op.tlpParams.TreebankLanguagePack());
                if (op.trainOptions.deleteSplitters != null)
                {
                    IList <string> deleted = new List <string>();
                    foreach (string del in op.trainOptions.deleteSplitters)
                    {
                        string baseDel    = tlp.BasicCategory(del);
                        bool   checkBasic = del.Equals(baseDel);
                        for (IEnumerator <string> it = op.trainOptions.splitters.GetEnumerator(); it.MoveNext();)
                        {
                            string elem     = it.Current;
                            string baseElem = tlp.BasicCategory(elem);
                            bool   delStr   = checkBasic && baseElem.Equals(baseDel) || elem.Equals(del);
                            if (delStr)
                            {
                                it.Remove();
                                deleted.Add(elem);
                            }
                        }
                    }
                    log.Info("Removed from vertical splitters: " + deleted);
                }
            }
            if (op.trainOptions.selectivePostSplit)
            {
                ITreeTransformer myTransformer = new TreeAnnotator(op.tlpParams.HeadFinder(), op.tlpParams, op);
                Treebank         annotatedTB   = trainTreebank.Transform(myTransformer);
                op.trainOptions.postSplitters = ParentAnnotationStats.GetSplitCategories(annotatedTB, true, 0, op.trainOptions.selectivePostSplitCutOff, op.trainOptions.tagSelectivePostSplitCutOff, op.tlpParams.TreebankLanguagePack());
            }
            if (op.trainOptions.hSelSplit)
            {
                binarizer.SetDoSelectiveSplit(false);
                foreach (Tree tree in trainTreebank)
                {
                    if (op.trainOptions.collinsPunc)
                    {
                        tree = collinsPuncTransformer.TransformTree(tree);
                    }
                    //tree.pennPrint(tlpParams.pw());
                    tree = binarizer.TransformTree(tree);
                }
                //binaryTrainTrees.add(tree);
                binarizer.SetDoSelectiveSplit(true);
            }
            foreach (Tree tree_1 in trainTreebank)
            {
                if (op.trainOptions.collinsPunc)
                {
                    tree_1 = collinsPuncTransformer.TransformTree(tree_1);
                }
                tree_1 = binarizer.TransformTree(tree_1);
                binaryTrainTrees.Add(tree_1);
            }
            if (op.testOptions.verbose)
            {
                binarizer.DumpStats();
            }
            IList <Tree> binaryTestTrees = new List <Tree>();

            foreach (Tree tree_2 in testTreebank)
            {
                if (op.trainOptions.collinsPunc)
                {
                    tree_2 = collinsPuncTransformer.TransformTree(tree_2);
                }
                tree_2 = binarizer.TransformTree(tree_2);
                binaryTestTrees.Add(tree_2);
            }
            Timing.Tick("done.");
            // binarization
            BinaryGrammar      bg = null;
            UnaryGrammar       ug = null;
            IDependencyGrammar dg = null;
            // DependencyGrammar dgBLIPP = null;
            ILexicon        lex        = null;
            IIndex <string> stateIndex = new HashIndex <string>();
            // extract grammars
            IExtractor <Pair <UnaryGrammar, BinaryGrammar> > bgExtractor = new BinaryGrammarExtractor(op, stateIndex);

            //Extractor bgExtractor = new SmoothedBinaryGrammarExtractor();//new BinaryGrammarExtractor();
            // Extractor lexExtractor = new LexiconExtractor();
            //Extractor dgExtractor = new DependencyMemGrammarExtractor();
            if (op.doPCFG)
            {
                log.Info("Extracting PCFG...");
                Pair <UnaryGrammar, BinaryGrammar> bgug = null;
                if (op.trainOptions.cheatPCFG)
                {
                    IList <Tree> allTrees = new List <Tree>(binaryTrainTrees);
                    Sharpen.Collections.AddAll(allTrees, binaryTestTrees);
                    bgug = bgExtractor.Extract(allTrees);
                }
                else
                {
                    bgug = bgExtractor.Extract(binaryTrainTrees);
                }
                bg = bgug.second;
                bg.SplitRules();
                ug = bgug.first;
                ug.PurgeRules();
                Timing.Tick("done.");
            }
            log.Info("Extracting Lexicon...");
            IIndex <string> wordIndex = new HashIndex <string>();
            IIndex <string> tagIndex  = new HashIndex <string>();

            lex = op.tlpParams.Lex(op, wordIndex, tagIndex);
            lex.InitializeTraining(binaryTrainTrees.Count);
            lex.Train(binaryTrainTrees);
            lex.FinishTraining();
            Timing.Tick("done.");
            if (op.doDep)
            {
                log.Info("Extracting Dependencies...");
                binaryTrainTrees.Clear();
                IExtractor <IDependencyGrammar> dgExtractor = new MLEDependencyGrammarExtractor(op, wordIndex, tagIndex);
                // dgBLIPP = (DependencyGrammar) dgExtractor.extract(new ConcatenationIterator(trainTreebank.iterator(),blippTreebank.iterator()),new TransformTreeDependency(tlpParams,true));
                // DependencyGrammar dg1 = dgExtractor.extract(trainTreebank.iterator(), new TransformTreeDependency(op.tlpParams, true));
                //dgBLIPP=(DependencyGrammar)dgExtractor.extract(blippTreebank.iterator(),new TransformTreeDependency(tlpParams));
                //dg = (DependencyGrammar) dgExtractor.extract(new ConcatenationIterator(trainTreebank.iterator(),blippTreebank.iterator()),new TransformTreeDependency(tlpParams));
                // dg=new DependencyGrammarCombination(dg1,dgBLIPP,2);
                dg = dgExtractor.Extract(binaryTrainTrees);
                //uses information whether the words are known or not, discards unknown words
                Timing.Tick("done.");
                //System.out.print("Extracting Unknown Word Model...");
                //UnknownWordModel uwm = (UnknownWordModel)uwmExtractor.extract(binaryTrainTrees);
                //Timing.tick("done.");
                System.Console.Out.Write("Tuning Dependency Model...");
                dg.Tune(binaryTestTrees);
                //System.out.println("TUNE DEPS: "+tuneDeps);
                Timing.Tick("done.");
            }
            BinaryGrammar      boundBG = bg;
            UnaryGrammar       boundUG = ug;
            IGrammarProjection gp      = new NullGrammarProjection(bg, ug);

            // serialization
            if (serializeFile != null)
            {
                log.Info("Serializing parser...");
                LexicalizedParser parser = new LexicalizedParser(lex, bg, ug, dg, stateIndex, wordIndex, tagIndex, op);
                parser.SaveParserToSerialized(serializeFile);
                Timing.Tick("done.");
            }
            // test: pcfg-parse and output
            ExhaustivePCFGParser parser_1 = null;

            if (op.doPCFG)
            {
                parser_1 = new ExhaustivePCFGParser(boundBG, boundUG, lex, op, stateIndex, wordIndex, tagIndex);
            }
            ExhaustiveDependencyParser dparser = ((op.doDep && !op.testOptions.useFastFactored) ? new ExhaustiveDependencyParser(dg, lex, op, wordIndex, tagIndex) : null);
            IScorer scorer = (op.doPCFG ? new TwinScorer(new ProjectionScorer(parser_1, gp, op), dparser) : null);
            //Scorer scorer = parser;
            BiLexPCFGParser bparser = null;

            if (op.doPCFG && op.doDep)
            {
                bparser = (op.testOptions.useN5) ? new BiLexPCFGParser.N5BiLexPCFGParser(scorer, parser_1, dparser, bg, ug, dg, lex, op, gp, stateIndex, wordIndex, tagIndex) : new BiLexPCFGParser(scorer, parser_1, dparser, bg, ug, dg, lex, op, gp, stateIndex
                                                                                                                                                                                                    , wordIndex, tagIndex);
            }
            Evalb        pcfgPE         = new Evalb("pcfg  PE", true);
            Evalb        comboPE        = new Evalb("combo PE", true);
            AbstractEval pcfgCB         = new Evalb.CBEval("pcfg  CB", true);
            AbstractEval pcfgTE         = new TaggingEval("pcfg  TE");
            AbstractEval comboTE        = new TaggingEval("combo TE");
            AbstractEval pcfgTEnoPunct  = new TaggingEval("pcfg nopunct TE");
            AbstractEval comboTEnoPunct = new TaggingEval("combo nopunct TE");
            AbstractEval depTE          = new TaggingEval("depnd TE");
            AbstractEval depDE          = new UnlabeledAttachmentEval("depnd DE", true, null, tlp.PunctuationWordRejectFilter());
            AbstractEval comboDE        = new UnlabeledAttachmentEval("combo DE", true, null, tlp.PunctuationWordRejectFilter());

            if (op.testOptions.evalb)
            {
                EvalbFormatWriter.InitEVALBfiles(op.tlpParams);
            }
            // int[] countByLength = new int[op.testOptions.maxLength+1];
            // Use a reflection ruse, so one can run this without needing the
            // tagger.  Using a function rather than a MaxentTagger means we
            // can distribute a version of the parser that doesn't include the
            // entire tagger.
            IFunction <IList <IHasWord>, List <TaggedWord> > tagger = null;

            if (op.testOptions.preTag)
            {
                try
                {
                    Type[]   argsClass = new Type[] { typeof(string) };
                    object[] arguments = new object[] { op.testOptions.taggerSerializedFile };
                    tagger = (IFunction <IList <IHasWord>, List <TaggedWord> >)Sharpen.Runtime.GetType("edu.stanford.nlp.tagger.maxent.MaxentTagger").GetConstructor(argsClass).NewInstance(arguments);
                }
                catch (Exception e)
                {
                    log.Info(e);
                    log.Info("Warning: No pretagging of sentences will be done.");
                }
            }
            for (int tNum = 0; tNum < ttSize; tNum++)
            {
                Tree tree        = testTreebank[tNum];
                int  testTreeLen = tree_2.Yield().Count;
                if (testTreeLen > op.testOptions.maxLength)
                {
                    continue;
                }
                Tree binaryTree = binaryTestTrees[tNum];
                // countByLength[testTreeLen]++;
                System.Console.Out.WriteLine("-------------------------------------");
                System.Console.Out.WriteLine("Number: " + (tNum + 1));
                System.Console.Out.WriteLine("Length: " + testTreeLen);
                //tree.pennPrint(pw);
                // System.out.println("XXXX The binary tree is");
                // binaryTree.pennPrint(pw);
                //System.out.println("Here are the tags in the lexicon:");
                //System.out.println(lex.showTags());
                //System.out.println("Here's the tagnumberer:");
                //System.out.println(Numberer.getGlobalNumberer("tags").toString());
                long timeMil1 = Runtime.CurrentTimeMillis();
                Timing.Tick("Starting parse.");
                if (op.doPCFG)
                {
                    //log.info(op.testOptions.forceTags);
                    if (op.testOptions.forceTags)
                    {
                        if (tagger != null)
                        {
                            //System.out.println("Using a tagger to set tags");
                            //System.out.println("Tagged sentence as: " + tagger.processSentence(cutLast(wordify(binaryTree.yield()))).toString(false));
                            parser_1.Parse(AddLast(tagger.Apply(CutLast(Wordify(binaryTree.Yield())))));
                        }
                        else
                        {
                            //System.out.println("Forcing tags to match input.");
                            parser_1.Parse(CleanTags(binaryTree.TaggedYield(), tlp));
                        }
                    }
                    else
                    {
                        // System.out.println("XXXX Parsing " + binaryTree.yield());
                        parser_1.Parse(binaryTree.YieldHasWord());
                    }
                }
                //Timing.tick("Done with pcfg phase.");
                if (op.doDep)
                {
                    dparser.Parse(binaryTree.YieldHasWord());
                }
                //Timing.tick("Done with dependency phase.");
                bool bothPassed = false;
                if (op.doPCFG && op.doDep)
                {
                    bothPassed = bparser.Parse(binaryTree.YieldHasWord());
                }
                //Timing.tick("Done with combination phase.");
                long timeMil2 = Runtime.CurrentTimeMillis();
                long elapsed  = timeMil2 - timeMil1;
                log.Info("Time: " + ((int)(elapsed / 100)) / 10.00 + " sec.");
                //System.out.println("PCFG Best Parse:");
                Tree tree2b = null;
                Tree tree2  = null;
                //System.out.println("Got full best parse...");
                if (op.doPCFG)
                {
                    tree2b = parser_1.GetBestParse();
                    tree2  = debinarizer.TransformTree(tree2b);
                }
                //System.out.println("Debinarized parse...");
                //tree2.pennPrint();
                //System.out.println("DepG Best Parse:");
                Tree tree3   = null;
                Tree tree3db = null;
                if (op.doDep)
                {
                    tree3 = dparser.GetBestParse();
                    // was: but wrong Tree tree3db = debinarizer.transformTree(tree2);
                    tree3db = debinarizer.TransformTree(tree3);
                    tree3.PennPrint(pw);
                }
                //tree.pennPrint();
                //((Tree)binaryTrainTrees.get(tNum)).pennPrint();
                //System.out.println("Combo Best Parse:");
                Tree tree4 = null;
                if (op.doPCFG && op.doDep)
                {
                    try
                    {
                        tree4 = bparser.GetBestParse();
                        if (tree4 == null)
                        {
                            tree4 = tree2b;
                        }
                    }
                    catch (ArgumentNullException)
                    {
                        log.Info("Blocked, using PCFG parse!");
                        tree4 = tree2b;
                    }
                }
                if (op.doPCFG && !bothPassed)
                {
                    tree4 = tree2b;
                }
                //tree4.pennPrint();
                if (op.doDep)
                {
                    depDE.Evaluate(tree3, binaryTree, pw);
                    depTE.Evaluate(tree3db, tree_2, pw);
                }
                ITreeTransformer tc      = op.tlpParams.Collinizer();
                ITreeTransformer tcEvalb = op.tlpParams.CollinizerEvalb();
                if (op.doPCFG)
                {
                    // System.out.println("XXXX Best PCFG was: ");
                    // tree2.pennPrint();
                    // System.out.println("XXXX Transformed best PCFG is: ");
                    // tc.transformTree(tree2).pennPrint();
                    //System.out.println("True Best Parse:");
                    //tree.pennPrint();
                    //tc.transformTree(tree).pennPrint();
                    pcfgPE.Evaluate(tc.TransformTree(tree2), tc.TransformTree(tree_2), pw);
                    pcfgCB.Evaluate(tc.TransformTree(tree2), tc.TransformTree(tree_2), pw);
                    Tree tree4b = null;
                    if (op.doDep)
                    {
                        comboDE.Evaluate((bothPassed ? tree4 : tree3), binaryTree, pw);
                        tree4b = tree4;
                        tree4  = debinarizer.TransformTree(tree4);
                        if (op.nodePrune)
                        {
                            NodePruner np = new NodePruner(parser_1, debinarizer);
                            tree4 = np.Prune(tree4);
                        }
                        //tree4.pennPrint();
                        comboPE.Evaluate(tc.TransformTree(tree4), tc.TransformTree(tree_2), pw);
                    }
                    //pcfgTE.evaluate(tree2, tree);
                    pcfgTE.Evaluate(tcEvalb.TransformTree(tree2), tcEvalb.TransformTree(tree_2), pw);
                    pcfgTEnoPunct.Evaluate(tc.TransformTree(tree2), tc.TransformTree(tree_2), pw);
                    if (op.doDep)
                    {
                        comboTE.Evaluate(tcEvalb.TransformTree(tree4), tcEvalb.TransformTree(tree_2), pw);
                        comboTEnoPunct.Evaluate(tc.TransformTree(tree4), tc.TransformTree(tree_2), pw);
                    }
                    System.Console.Out.WriteLine("PCFG only: " + parser_1.ScoreBinarizedTree(tree2b, 0));
                    //tc.transformTree(tree2).pennPrint();
                    tree2.PennPrint(pw);
                    if (op.doDep)
                    {
                        System.Console.Out.WriteLine("Combo: " + parser_1.ScoreBinarizedTree(tree4b, 0));
                        // tc.transformTree(tree4).pennPrint(pw);
                        tree4.PennPrint(pw);
                    }
                    System.Console.Out.WriteLine("Correct:" + parser_1.ScoreBinarizedTree(binaryTree, 0));

                    /*
                     * if (parser.scoreBinarizedTree(tree2b,true) < parser.scoreBinarizedTree(binaryTree,true)) {
                     * System.out.println("SCORE INVERSION");
                     * parser.validateBinarizedTree(binaryTree,0);
                     * }
                     */
                    tree_2.PennPrint(pw);
                }
                // end if doPCFG
                if (op.testOptions.evalb)
                {
                    if (op.doPCFG && op.doDep)
                    {
                        EvalbFormatWriter.WriteEVALBline(tcEvalb.TransformTree(tree_2), tcEvalb.TransformTree(tree4));
                    }
                    else
                    {
                        if (op.doPCFG)
                        {
                            EvalbFormatWriter.WriteEVALBline(tcEvalb.TransformTree(tree_2), tcEvalb.TransformTree(tree2));
                        }
                        else
                        {
                            if (op.doDep)
                            {
                                EvalbFormatWriter.WriteEVALBline(tcEvalb.TransformTree(tree_2), tcEvalb.TransformTree(tree3db));
                            }
                        }
                    }
                }
            }
            // end for each tree in test treebank
            if (op.testOptions.evalb)
            {
                EvalbFormatWriter.CloseEVALBfiles();
            }
            // op.testOptions.display();
            if (op.doPCFG)
            {
                pcfgPE.Display(false, pw);
                System.Console.Out.WriteLine("Grammar size: " + stateIndex.Size());
                pcfgCB.Display(false, pw);
                if (op.doDep)
                {
                    comboPE.Display(false, pw);
                }
                pcfgTE.Display(false, pw);
                pcfgTEnoPunct.Display(false, pw);
                if (op.doDep)
                {
                    comboTE.Display(false, pw);
                    comboTEnoPunct.Display(false, pw);
                }
            }
            if (op.doDep)
            {
                depTE.Display(false, pw);
                depDE.Display(false, pw);
            }
            if (op.doPCFG && op.doDep)
            {
                comboDE.Display(false, pw);
            }
        }
예제 #26
0
 public void execute()
 {
     HashIndex.GenerateHashList("./FullHashlist");
 }
예제 #27
0
    private void ProcessFile(string path)
    {
        Idstring p_ids = HashIndex.Get(Path.GetFileNameWithoutExtension(path));
        var      t_ids = new Tuple <Idstring, Idstring, Idstring>(p_ids, new Idstring(0), HashIndex.Get(Path.GetExtension(path)));

        if (!this._browser.RawFiles.ContainsKey(t_ids))
        {
            this.error_output.Write(string.Format("File with path {0} does not exist!\n", path));
            this.error_output.Flush();
            return;
        }
        FileEntry file = this._browser.RawFiles[t_ids];

        if (file.BundleEntries.Count == 0 || this.ExtractedPaths.Contains(p_ids))
        {
            return;
        }

        try
        {
            if (Path.GetExtension(path) == ".object")
            {
                string   model_file = Path.Combine(Path.GetDirectoryName(path), Path.GetFileNameWithoutExtension(path)).Replace("\\", "/");
                Idstring m_ids      = HashIndex.Get(model_file);
                //error_output.WriteLine(string.Format("Attempt to ouput model file {0}", model_file));
                var t_m_ids = new Tuple <Idstring, Idstring, Idstring>(m_ids, new Idstring(0), HashIndex.Get("model"));
                if (this._browser.RawFiles.ContainsKey(t_m_ids))
                {
                    this.WriteFile(this._browser.RawFiles[t_m_ids]);
                }

                string   cooked_physics = Path.Combine(Path.GetDirectoryName(path), Path.GetFileNameWithoutExtension(path)).Replace("\\", "/");
                Idstring c_ids          = HashIndex.Get(cooked_physics);
                var      t_c_ids        = new Tuple <Idstring, Idstring, Idstring>(c_ids, new Idstring(0), HashIndex.Get("cooked_physics"));
                //error_output.WriteLine(string.Format("Attempt to ouput cooked_physics file {0}", cooked_physics));
                if (this._browser.RawFiles.ContainsKey(t_c_ids))
                {
                    this.WriteFile(this._browser.RawFiles[t_c_ids]);
                }
            }

            if (this.FileProcessors.ContainsKey(file._extension.ToString()))
            {
                this.FileProcessors[file._extension.ToString()].Invoke(file);
            }
            else
            {
                this.WriteFile(file);
            }
        }
        catch (Exception exc)
        {
            this.error_output.Write("Exception occured on file: {0}\n", file.Path);
            this.error_output.Write(exc.Message + "\n");
            this.error_output.Write(exc.StackTrace + "\n");
            this.error_output.Flush();
        }
    }
예제 #28
0
        /// <summary>
        /// Provides some testing and opportunities for exploration of the
        /// probabilities of a BaseLexicon.
        /// </summary>
        /// <remarks>
        /// Provides some testing and opportunities for exploration of the
        /// probabilities of a BaseLexicon.  What's here currently probably
        /// only works for the English Penn Treeebank, as it uses default
        /// constructors.  Of the words given to test on,
        /// the first is treated as sentence initial, and the rest as not
        /// sentence initial.
        /// </remarks>
        /// <param name="args">
        /// The command line arguments:
        /// java BaseLexicon treebankPath fileRange unknownWordModel words
        /// </param>
        public static void Main(string[] args)
        {
            if (args.Length < 3)
            {
                log.Info("java BaseLexicon treebankPath fileRange unknownWordModel words*");
                return;
            }
            System.Console.Out.Write("Training BaseLexicon from " + args[0] + ' ' + args[1] + " ... ");
            Treebank tb = new DiskTreebank();

            tb.LoadPath(args[0], new NumberRangesFileFilter(args[1], true));
            // TODO: change this interface so the lexicon creates its own indices?
            IIndex <string> wordIndex = new HashIndex <string>();
            IIndex <string> tagIndex  = new HashIndex <string>();
            Options         op        = new Options();

            op.lexOptions.useUnknownWordSignatures = System.Convert.ToInt32(args[2]);
            Edu.Stanford.Nlp.Parser.Lexparser.BaseLexicon lex = new Edu.Stanford.Nlp.Parser.Lexparser.BaseLexicon(op, wordIndex, tagIndex);
            lex.InitializeTraining(tb.Count);
            lex.Train(tb);
            lex.FinishTraining();
            System.Console.Out.WriteLine("done.");
            System.Console.Out.WriteLine();
            NumberFormat nf = NumberFormat.GetNumberInstance();

            nf.SetMaximumFractionDigits(4);
            IList <string> impos = new List <string>();

            for (int i = 3; i < args.Length; i++)
            {
                if (lex.IsKnown(args[i]))
                {
                    System.Console.Out.WriteLine(args[i] + " is a known word.  Log probabilities [log P(w|t)] for its taggings are:");
                    for (IEnumerator <IntTaggedWord> it = lex.RuleIteratorByWord(wordIndex.AddToIndex(args[i]), i - 3, null); it.MoveNext();)
                    {
                        IntTaggedWord iTW = it.Current;
                        System.Console.Out.WriteLine(StringUtils.Pad(iTW, 24) + nf.Format(lex.Score(iTW, i - 3, wordIndex.Get(iTW.word), null)));
                    }
                }
                else
                {
                    string sig = lex.GetUnknownWordModel().GetSignature(args[i], i - 3);
                    System.Console.Out.WriteLine(args[i] + " is an unknown word.  Signature with uwm " + lex.GetUnknownWordModel().GetUnknownLevel() + ((i == 3) ? " init" : "non-init") + " is: " + sig);
                    impos.Clear();
                    IList <string> lis = new List <string>(tagIndex.ObjectsList());
                    lis.Sort();
                    foreach (string tStr in lis)
                    {
                        IntTaggedWord iTW   = new IntTaggedWord(args[i], tStr, wordIndex, tagIndex);
                        double        score = lex.Score(iTW, 1, args[i], null);
                        if (score == float.NegativeInfinity)
                        {
                            impos.Add(tStr);
                        }
                        else
                        {
                            System.Console.Out.WriteLine(StringUtils.Pad(iTW, 24) + nf.Format(score));
                        }
                    }
                    if (impos.Count > 0)
                    {
                        System.Console.Out.WriteLine(args[i] + " impossible tags: " + impos);
                    }
                }
                System.Console.Out.WriteLine();
            }
        }
예제 #29
0
 public void execute(PackageBrowser browser)
 {
     HashIndex.GenerateHashList("./FullHashlist");
 }
예제 #30
0
        private string GetString(ulong fullHash, HashIndex fileHashIndex, byte[] Data)
        {
            var folderHash = fileHashIndex.Parent.hash;
            var fileHash   = fileHashIndex.hash;

            bool hasFolderHash = false;

            string folder = "";

            if (HashList.ContainsKey(folderHash))
            {
                hasFolderHash = true;
                folder        = $"{HashList[folderHash]}/";
            }

            if (!hasFolderHash)
            {
                folder = $"{folderHash.ToString("X")}/";
            }



            string ext = FindMatch(Data);

            if (ext == ".bntx" || ext == ".bfres" || ext == ".bnsh" || ext == ".bfsha")
            {
                string fileName = GetBinaryHeaderName(Data);
                //Check for matches for shaders
                if (ext == ".bnsh")
                {
                    if (FNV64A1.Calculate($"{fileName}.bnsh_fsh") == fileHash)
                    {
                        fileName = $"{fileName}.bnsh_fsh";
                    }
                    else if (FNV64A1.Calculate($"{fileName}.bnsh_vsh") == fileHash)
                    {
                        fileName = $"{fileName}.bnsh_vsh";
                    }
                }
                else
                {
                    fileName = $"{fileName}{ext}";
                }

                if (hasFolderHash)
                {
                    return($"{folder}{fileName}");
                }
                else
                {
                    return($"{folder}{fileName}[FullHash={fullHash.ToString("X")}]{ext}");
                }
            }
            else
            {
                if (HashList.ContainsKey(fileHash))
                {
                    if (hasFolderHash)
                    {
                        return($"{folder}{HashList[fileHash]}");
                    }
                    else
                    {
                        return($"{folder}{HashList[fileHash]}[FullHash={fullHash.ToString("X")}]{ext}");
                    }
                }
                else
                {
                    return($"{folder}{fileHash.ToString("X")}[FullHash={fullHash.ToString("X")}]{ext}");
                }
            }
        }