Ejemplos de HashIndex en C# (CSharp)

Lenguaje de programación: C# (CSharp)

Clase / Tipo: HashIndex

Ejemplos en hotexamples.com: 30

C# (CSharp) HashIndex - 30 ejemplos encontrados. Estos son los ejemplos en C# (CSharp) del mundo real mejor valorados de HashIndex extraídos de proyectos de código abierto. Puedes valorar ejemplos para ayudarnos a mejorar la calidad de los ejemplos.

Métodos usados con frecuencia

Mostrar Ocultar

Get(12)

Add(8)

GenerateHashList(6)

GetUnhashed(5)

Load(5)

Size(4)

Read(3)

AddToIndex(3)

IndexOf(2)

AddAll(2)

Clear(2)

Contains(1)

TypeOfHash(1)

TryGetValue(1)

ToImmutableDictionary(1)

LookupString(1)

SearchCost(1)

ObjectsList(1)

LoadParallel(1)

AddHash(1)

GetHashCode(1)

searchCost(1)

Ejemplo n.º 1

Mostrar archivo

Archivo: RVFDataset.cs Proyecto: awesomedotnetcore/Stanford.CoreNLP.NET

        /// <summary>Applies a feature count threshold to the RVFDataset.</summary>
        /// <remarks>
        /// Applies a feature count threshold to the RVFDataset. All features that
        /// occur fewer than <i>k</i> times are expunged.
        /// </remarks>
        public override void ApplyFeatureCountThreshold(int k)
        {
            float[]       counts          = GetFeatureCounts();
            HashIndex <F> newFeatureIndex = new HashIndex <F>();

            int[] featMap = new int[featureIndex.Size()];
            for (int i = 0; i < featMap.Length; i++)
            {
                F feat = featureIndex.Get(i);
                if (counts[i] >= k)
                {
                    int newIndex = newFeatureIndex.Count;
                    newFeatureIndex.Add(feat);
                    featMap[i] = newIndex;
                }
                else
                {
                    featMap[i] = -1;
                }
            }
            // featureIndex.remove(feat);
            featureIndex = newFeatureIndex;
            // counts = null; // This is unnecessary; JVM can clean it up
            for (int i_1 = 0; i_1 < size; i_1++)
            {
                IList <int>    featList  = new List <int>(data[i_1].Length);
                IList <double> valueList = new List <double>(values[i_1].Length);
                for (int j = 0; j < data[i_1].Length; j++)
                {
                    if (featMap[data[i_1][j]] >= 0)
                    {
                        featList.Add(featMap[data[i_1][j]]);
                        valueList.Add(values[i_1][j]);
                    }
                }
                data[i_1]   = new int[featList.Count];
                values[i_1] = new double[valueList.Count];
                for (int j_1 = 0; j_1 < data[i_1].Length; j_1++)
                {
                    data[i_1][j_1]   = featList[j_1];
                    values[i_1][j_1] = valueList[j_1];
                }
            }
        }

Ejemplo n.º 2

Mostrar archivo

        public static void ReadHashlistAndLoad(string file, PackageFileEntry be)
        {
            using FileStream fs   = new FileStream(file, FileMode.Open, FileAccess.Read);
            using BinaryReader br = new BinaryReader(fs);

            byte[]        data;
            StringBuilder sb = new StringBuilder();

            string[]         idstring_data;
            HashSet <string> new_paths = new HashSet <string>();

            fs.Position = be.Address;
            if (be.Length == -1)
            {
                data = br.ReadBytes((int)(fs.Length - fs.Position));
            }
            else
            {
                data = br.ReadBytes((int)be.Length);
            }

            foreach (byte read in data)
            {
                sb.Append((char)read);
            }

            idstring_data = sb.ToString().Split('\0');
            sb.Clear();

            foreach (string idstring in idstring_data)
            {
                new_paths.Add(idstring);
            }

            new_paths.Add("idstring_lookup");
            new_paths.Add("existing_banks");
            new_paths.Add("engine-package");

            HashIndex.Load(ref new_paths);

            new_paths.Clear();
            br.Close();
        }

Ejemplo n.º 3

Mostrar archivo

        public void Read(FileReader reader)
        {
            string Signature = reader.ReadString(8, Encoding.ASCII);

            if (Signature != "GFLXPACK")
            {
                throw new Exception($"Invalid signature {Signature}! Expected GFLXPACK.");
            }

            version = reader.ReadInt32();
            uint padding   = reader.ReadUInt32();
            uint FileCount = reader.ReadUInt32();

            FolderCount = reader.ReadInt32();
            ulong FileInfoOffset       = reader.ReadUInt64();
            ulong hashArrayOffset      = reader.ReadUInt64();
            ulong hashArrayIndexOffset = reader.ReadUInt64();

            reader.Seek((long)hashArrayOffset, SeekOrigin.Begin);
            for (int i = 0; i < FileCount; i++)
            {
                ulong hash = reader.ReadUInt64();
                hashes.Add(hash);
            }
            reader.Seek((long)hashArrayIndexOffset, SeekOrigin.Begin);
            for (int i = 0; i < FileCount; i++)
            {
                HashIndex hashindex = new HashIndex();
                hashindex.Read(reader);
                hashIndices.Add(hashindex);
            }

            reader.Seek((long)FileInfoOffset, SeekOrigin.Begin);
            for (int i = 0; i < FileCount; i++)
            {
                FileEntry fileEntry = new FileEntry();
                fileEntry.Read(reader);
                fileEntry.Text = hashes[i].ToString();
                Nodes.Add(fileEntry);
                files.Add(fileEntry);
            }
        }

Ejemplo n.º 4

Mostrar archivo

    public void execute(PackageBrowser browser)
    {
        this.ExtractedPaths = new HashSet <Idstring>();

        this.error_output = new StreamWriter("./heist_extractor.log");

        this._browser = browser;
        System.Diagnostics.Stopwatch clock = new System.Diagnostics.Stopwatch();
        clock.Start();
        this.error_output.Write("Heist Extractor executed" + "\n");
        this.error_output.Flush();
        Idstring ids     = HashIndex.Get(this.heist_world);
        Idstring ids_ext = HashIndex.Get("world");
        var      tids    = new Tuple <Idstring, Idstring, Idstring>(ids, new Idstring(0), ids_ext);

        if (browser.RawFiles.ContainsKey(tids))
        {
            this.ProcessWorld(browser.RawFiles[tids]);
        }
        else
        {
            Console.WriteLine("World File does not exist");
        }

        //this.ProcessFolder(browser.Root);
        //Path.Combine(Definitions.HashDir, hashlist_tag)
        using (StreamWriter str = new StreamWriter(new FileStream(Path.Combine(this.OutputPath, "add.xml"), FileMode.Create, FileAccess.Write)))
        {
            str.Write("<table>\n");
            foreach (Idstring path in this.ExtractedPaths)
            {
                string[] split = path.ToString().Split('.');
                str.Write(String.Format("\t<{0} path=\"{1}\" force=\"true\"/>\n", split[1], split[0]));
            }

            str.Write("</table>\n");
        }

        clock.Stop();
        this.error_output.Write("Scrape operation took {0} seconds" + "\n", clock.Elapsed.TotalSeconds.ToString());
        this.error_output.Close();
    }

Ejemplo n.º 5

Mostrar archivo

Archivo: App.xaml.cs Proyecto: kythyria/DieselBundleViewer

        public App()
        {
#if !DEBUG
            Dispatcher.UnhandledException += OnException;
            if (File.Exists("debug"))
#endif
            AllocConsole();


            Console.WriteLine("Loading local hashlist");
            if (File.Exists("Data/hashlist"))
            {
                HashIndex.LoadParallel("Data/hashlist");
            }
            else
            {
                Console.WriteLine("Local hashlist is missing!");
            }

            LoadConverters();
        }

Ejemplo n.º 6

Mostrar archivo

        protected internal virtual void InitTagBins()
        {
            IIndex <string> tagBinIndex = new HashIndex <string>();

            tagBin = new int[tagIndex.Size()];
            for (int t = 0; t < tagBin.Length; t++)
            {
                string tagStr = tagIndex.Get(t);
                string binStr;
                if (tagProjection == null)
                {
                    binStr = tagStr;
                }
                else
                {
                    binStr = tagProjection.Project(tagStr);
                }
                tagBin[t] = tagBinIndex.AddToIndex(binStr);
            }
            numTagBins = tagBinIndex.Size();
        }

Ejemplo n.º 7

Mostrar archivo

        public virtual LexicalizedParser GetParserDataFromTreebank(Treebank trainTreebank)
        {
            log.Info("Binarizing training trees...");
            IList <Tree> binaryTrainTrees = GetAnnotatedBinaryTreebankFromTreebank(trainTreebank);

            Timing.Tick("done.");
            IIndex <string> stateIndex = new HashIndex <string>();

            log.Info("Extracting PCFG...");
            IExtractor <Pair <UnaryGrammar, BinaryGrammar> > bgExtractor = new BinaryGrammarExtractor(op, stateIndex);
            Pair <UnaryGrammar, BinaryGrammar> bgug = bgExtractor.Extract(binaryTrainTrees);
            BinaryGrammar bg = bgug.second;

            bg.SplitRules();
            UnaryGrammar ug = bgug.first;

            ug.PurgeRules();
            Timing.Tick("done.");
            log.Info("Extracting Lexicon...");
            IIndex <string> wordIndex = new HashIndex <string>();
            IIndex <string> tagIndex  = new HashIndex <string>();
            ILexicon        lex       = op.tlpParams.Lex(op, wordIndex, tagIndex);

            lex.InitializeTraining(binaryTrainTrees.Count);
            lex.Train(binaryTrainTrees);
            lex.FinishTraining();
            Timing.Tick("done.");
            IExtractor <IDependencyGrammar> dgExtractor = op.tlpParams.DependencyGrammarExtractor(op, wordIndex, tagIndex);
            IDependencyGrammar dg = null;

            if (op.doDep)
            {
                log.Info("Extracting Dependencies...");
                dg = dgExtractor.Extract(binaryTrainTrees);
                dg.SetLexicon(lex);
                Timing.Tick("done.");
            }
            log.Info("Done extracting grammars and lexicon.");
            return(new LexicalizedParser(lex, bg, ug, dg, stateIndex, wordIndex, tagIndex, op));
        }

Ejemplo n.º 8

Mostrar archivo

        /// <summary>Retains the given features in the Dataset.</summary>
        /// <remarks>
        /// Retains the given features in the Dataset.  All features that
        /// do not occur in features are expunged.
        /// </remarks>
        public virtual void RetainFeatures(ICollection <F> features)
        {
            //float[] counts = getFeatureCounts();
            IIndex <F> newFeatureIndex = new HashIndex <F>();

            int[] featMap = new int[featureIndex.Size()];
            for (int i = 0; i < featMap.Length; i++)
            {
                F feat = featureIndex.Get(i);
                if (features.Contains(feat))
                {
                    int newIndex = newFeatureIndex.Size();
                    newFeatureIndex.Add(feat);
                    featMap[i] = newIndex;
                }
                else
                {
                    featMap[i] = -1;
                }
            }
            // featureIndex.remove(feat);
            featureIndex = newFeatureIndex;
            // counts = null; // This is unnecessary; JVM can clean it up
            for (int i_1 = 0; i_1 < size; i_1++)
            {
                IList <int> featList = new List <int>(data[i_1].Length);
                for (int j = 0; j < data[i_1].Length; j++)
                {
                    if (featMap[data[i_1][j]] >= 0)
                    {
                        featList.Add(featMap[data[i_1][j]]);
                    }
                }
                data[i_1] = new int[featList.Count];
                for (int j_1 = 0; j_1 < data[i_1].Length; j_1++)
                {
                    data[i_1][j_1] = featList[j_1];
                }
            }
        }

Ejemplo n.º 9

Mostrar archivo

Archivo: ModDetails.cs Proyecto: steam-test1/Diesel-Bundle-Modder

        private void ModDetails_Load(object sender, EventArgs e)
        {
            this.Text = "Mod Details - " + this._mod.Name;

            this.ModNameText.Text     = this._mod.Name;
            this.AuthorText.Text      = this._mod.Author;
            this.DescriptionText.Text = this._mod.Description;

            foreach (BundleRewriteItem item in this._mod.ItemQueue)
            {
                BundleRewriteItem newBri = new BundleRewriteItem();

                newBri.ReplacementFile = Path.GetFileName(this._mod.file) + "/" + item.ReplacementFile;

                String sourcefile = "";
                String path       = HashIndex.GetUnhashed(item.BundlePath);
                String extension  = HashIndex.GetUnhashed(item.BundleExtension);

                if (!string.IsNullOrEmpty(path) && !string.IsNullOrEmpty(extension))
                {
                    if (item.IsLanguageSpecific)
                    {
                        sourcefile = path + "." + item.BundleLanguage + "." + extension;
                    }
                    else
                    {
                        sourcefile = path + "." + extension;
                    }


                    newBri.SourceFile = sourcefile;

                    _items.Add(newBri);
                }
            }

            this.ReplacementFilesGridView.DataSource = _items;
            this.ReplacementFilesGridView.Update();
        }

Ejemplo n.º 10

Mostrar archivo

    /*private void ProcessFolder(IParent folder)
     * {
     *  foreach (IChild child in folder.Children.Values)
     *  {
     *      if (child is FileEntry)
     *          this.ProcessFile(child as FileEntry);
     *      else if (child is IParent)
     *          this.ProcessFolder(child as IParent);
     *  }
     * }*/

    private void WriteFile(FileEntry entry, byte[] byt = null)
    {
        Idstring ids = HashIndex.Get(entry.Path);

        if (entry.BundleEntries.Count == 0 || this.ExtractedPaths.Contains(ids))
        {
            return;
        }

        string path   = Path.Combine(this.OutputPath, entry.Path);
        string folder = Path.GetDirectoryName(path);

        if (!Directory.Exists(folder))
        {
            Directory.CreateDirectory(folder);
        }

        byte[] bytes = byt ?? entry.FileBytes() ?? new byte[0];

        File.WriteAllBytes(path, bytes);
        this.ExtractedPaths.Add(ids);
    }

Ejemplo n.º 11

Mostrar archivo

        internal static RVFDatum <string, int> ReadDatum(string[] values, int classColumn, ICollection <int> skip, IDictionary <int, IIndex <string> > indices)
        {
            ClassicCounter <int>   c = new ClassicCounter <int>();
            RVFDatum <string, int> d = new RVFDatum <string, int>(c);
            int attrNo = 0;

            for (int index = 0; index < values.Length; index++)
            {
                if (index == classColumn)
                {
                    d.SetLabel(values[index]);
                    continue;
                }
                if (skip.Contains(int.Parse(index)))
                {
                    continue;
                }
                int             featKey = int.Parse(attrNo);
                IIndex <string> ind     = indices[featKey];
                if (ind == null)
                {
                    ind = new HashIndex <string>();
                    indices[featKey] = ind;
                }
                // MG: condition on isLocked is useless, since add(E) contains such a condition:
                //if (!ind.isLocked()) {
                ind.Add(values[index]);
                //}
                int valInd = ind.IndexOf(values[index]);
                if (valInd == -1)
                {
                    valInd = 0;
                    logger.Info("unknown attribute value " + values[index] + " of attribute " + attrNo);
                }
                c.IncrementCount(featKey, valInd);
                attrNo++;
            }
            return(d);
        }

Ejemplo n.º 12

Mostrar archivo

Archivo: RVFDataset.cs Proyecto: awesomedotnetcore/Stanford.CoreNLP.NET

        /// <summary>Removes all features from the dataset that are not in featureSet.</summary>
        /// <param name="featureSet"/>
        public virtual void SelectFeaturesFromSet(ICollection <F> featureSet)
        {
            HashIndex <F> newFeatureIndex = new HashIndex <F>();

            int[] featMap = new int[featureIndex.Size()];
            Arrays.Fill(featMap, -1);
            foreach (F feature in featureSet)
            {
                int oldID = featureIndex.IndexOf(feature);
                if (oldID >= 0)
                {
                    // it's a valid feature in the index
                    int newID = newFeatureIndex.AddToIndex(feature);
                    featMap[oldID] = newID;
                }
            }
            featureIndex = newFeatureIndex;
            for (int i = 0; i < size; i++)
            {
                IList <int>    featList  = new List <int>(data[i].Length);
                IList <double> valueList = new List <double>(values[i].Length);
                for (int j = 0; j < data[i].Length; j++)
                {
                    if (featMap[data[i][j]] >= 0)
                    {
                        featList.Add(featMap[data[i][j]]);
                        valueList.Add(values[i][j]);
                    }
                }
                data[i]   = new int[featList.Count];
                values[i] = new double[valueList.Count];
                for (int j_1 = 0; j_1 < data[i].Length; j_1++)
                {
                    data[i][j_1]   = featList[j_1];
                    values[i][j_1] = valueList[j_1];
                }
            }
        }

Ejemplo n.º 13

Mostrar archivo

Archivo: Dataset.cs Proyecto: awesomedotnetcore/Stanford.CoreNLP.NET

        /// <summary>Generic method to select features based on the feature scores vector provided as an argument.</summary>
        /// <param name="numFeatures">number of features to be selected.</param>
        /// <param name="scores">a vector of size total number of features in the data.</param>
        public virtual void SelectFeatures(int numFeatures, double[] scores)
        {
            IList <ScoredObject <F> > scoredFeatures = new List <ScoredObject <F> >();

            for (int i = 0; i < scores.Length; i++)
            {
                scoredFeatures.Add(new ScoredObject <F>(featureIndex.Get(i), scores[i]));
            }
            scoredFeatures.Sort(ScoredComparator.DescendingComparator);
            IIndex <F> newFeatureIndex = new HashIndex <F>();

            for (int i_1 = 0; i_1 < scoredFeatures.Count && i_1 < numFeatures; i_1++)
            {
                newFeatureIndex.Add(scoredFeatures[i_1].Object());
            }
            //logger.info(scoredFeatures.get(i));
            for (int i_2 = 0; i_2 < size; i_2++)
            {
                int[] newData  = new int[data[i_2].Length];
                int   curIndex = 0;
                for (int j = 0; j < data[i_2].Length; j++)
                {
                    int index;
                    if ((index = newFeatureIndex.IndexOf(featureIndex.Get(data[i_2][j]))) != -1)
                    {
                        newData[curIndex++] = index;
                    }
                }
                int[] newDataTrimmed = new int[curIndex];
                lock (typeof(Runtime))
                {
                    System.Array.Copy(newData, 0, newDataTrimmed, 0, curIndex);
                }
                data[i_2] = newDataTrimmed;
            }
            featureIndex = newFeatureIndex;
        }

Ejemplo n.º 14

Mostrar archivo

    private void ProcessWorld(FileEntry file)
    {
        foreach (KeyValuePair <string, IChild> child in file.Parent.Children)
        {
            if (child.Value is FileEntry)
            {
                this.WriteFile(child.Value as FileEntry);
            }
        }

        this.WriteFile(file);
        this.ProcessScriptData(file, new List <XMLTagLookup> {
            new XMLTagLookup {
                node_name = "environment_values", value = new[] { "environment" }, Converter = (hash) => { return(hash + ".environment"); }
            }
        });

        string   continents_file = Path.Combine(Path.GetDirectoryName(file.Path), "continents").Replace("\\", "/");
        Idstring ids             = HashIndex.Get(continents_file);
        var      t_ids           = new Tuple <Idstring, Idstring, Idstring>(ids, new Idstring(0), HashIndex.Get("continents"));

        if (this._browser.RawFiles.ContainsKey(t_ids))
        {
            FileEntry c_file = this._browser.RawFiles[t_ids];
            this.WriteFile(c_file);

            string xml = ScriptActions.GetConverter("scriptdata", "script_cxml").export(c_file.FileStream(), true);

            XmlDocument doc = new XmlDocument();

            try
            {
                doc.LoadXml(xml);
                foreach (XmlNode child in doc.ChildNodes[0])
                {
                    this.ProcessFile(Path.Combine(Path.GetDirectoryName(file.Path), string.Format("{0}/{0}.continent", child.Attributes.GetNamedItem("name").Value)).Replace("\\", "/"));
                }
            }
            catch (Exception exc)
            {
                this.error_output.Write("Exception occured on file: {0}\n", c_file.Path);
                if (xml != null)
                {
                    this.error_output.Write(xml + "\n");
                }
                this.error_output.Write(exc.Message + "\n");
                this.error_output.Write(exc.StackTrace + "\n");
                this.error_output.Flush();
                return;
            }
        }
        else
        {
            this.error_output.Write("Continents file {0} does not exist!\n", continents_file);
        }

        string   mission_file = Path.Combine(Path.GetDirectoryName(file.Path), "mission").Replace("\\", "/");
        Idstring m_ids        = HashIndex.Get(mission_file);
        var      t_m_ids      = new Tuple <Idstring, Idstring, Idstring>(m_ids, new Idstring(0), HashIndex.Get("mission"));

        if (this._browser.RawFiles.ContainsKey(t_m_ids))
        {
            FileEntry m_file = this._browser.RawFiles[t_m_ids];
            this.WriteFile(m_file);

            string xml = ScriptActions.GetConverter("scriptdata", "script_cxml").export(m_file.FileStream(), true);

            XmlDocument doc = new XmlDocument();

            try
            {
                doc.LoadXml(xml);
                foreach (XmlNode child in doc.ChildNodes[0])
                {
                    this.ProcessFile(Path.Combine(Path.GetDirectoryName(file.Path), string.Format("{0}.mission", child.Attributes.GetNamedItem("file").Value)).Replace("\\", "/"));
                }
            }
            catch (Exception exc)
            {
                this.error_output.Write("Exception occured on file: {0}\n", m_file.Path);
                if (xml != null)
                {
                    this.error_output.Write(xml + "\n");
                }
                this.error_output.Write(exc.Message + "\n");
                this.error_output.Write(exc.StackTrace + "\n");
                this.error_output.Flush();
                return;
            }
        }
        else
        {
            this.error_output.Write("Mission file {0} does not exist!\n", continents_file);
        }

        this.error_output.Flush();
    }

Ejemplo n.º 15

Mostrar archivo

 public static void GenerateHashlist(string workingPath, string file, PackageFileEntry be)
 {
     ReadHashlistAndLoad(file, be);
     HashIndex.GenerateHashList(Path.Combine(workingPath, HashlistFile));
 }

Ejemplo n.º 16

Mostrar archivo

        public static void Main(string[] args)
        {
            if (args.Length < minArgs)
            {
                System.Console.Out.WriteLine(usage.ToString());
                System.Environment.Exit(-1);
            }
            ITreebankLangParserParams tlpp = new EnglishTreebankParserParams();
            DiskTreebank tb       = null;
            string       encoding = "UTF-8";
            Language     lang     = Language.English;

            for (int i = 0; i < args.Length; i++)
            {
                if (args[i].StartsWith("-"))
                {
                    switch (args[i])
                    {
                    case "-l":
                    {
                        lang = Language.ValueOf(args[++i].Trim());
                        tlpp = lang.@params;
                        break;
                    }

                    case "-e":
                    {
                        encoding = args[++i];
                        break;
                    }

                    default:
                    {
                        System.Console.Out.WriteLine(usage.ToString());
                        System.Environment.Exit(-1);
                        break;
                    }
                    }
                }
                else
                {
                    if (tb == null)
                    {
                        if (tlpp == null)
                        {
                            System.Console.Out.WriteLine(usage.ToString());
                            System.Environment.Exit(-1);
                        }
                        else
                        {
                            tlpp.SetInputEncoding(encoding);
                            tlpp.SetOutputEncoding(encoding);
                            tb = tlpp.DiskTreebank();
                        }
                    }
                    tb.LoadPath(args[i]);
                }
            }
            PrintWriter pw = tlpp.Pw();
            Options     op = new Options();

            Options.LexOptions lexOptions = op.lexOptions;
            if (lang == Language.French)
            {
                lexOptions.useUnknownWordSignatures = 1;
                lexOptions.smartMutation            = false;
                lexOptions.unknownSuffixSize        = 2;
                lexOptions.unknownPrefixSize        = 1;
            }
            else
            {
                if (lang == Language.Arabic)
                {
                    lexOptions.smartMutation            = false;
                    lexOptions.useUnknownWordSignatures = 9;
                    lexOptions.unknownPrefixSize        = 1;
                    lexOptions.unknownSuffixSize        = 1;
                }
            }
            IIndex <string>   wordIndex    = new HashIndex <string>();
            IIndex <string>   tagIndex     = new HashIndex <string>();
            ILexicon          lex          = tlpp.Lex(op, wordIndex, tagIndex);
            int               computeAfter = (int)(0.50 * tb.Count);
            ICounter <string> vocab        = new ClassicCounter <string>();
            ICounter <string> unkCounter   = new ClassicCounter <string>();
            int               treeId       = 0;

            foreach (Tree t in tb)
            {
                IList <ILabel> yield = t.Yield();
                int            posId = 0;
                foreach (ILabel word in yield)
                {
                    vocab.IncrementCount(word.Value());
                    if (treeId > computeAfter && vocab.GetCount(word.Value()) < 2.0)
                    {
                        //          if(lex.getUnknownWordModel().getSignature(word.value(), posId++).equals("UNK"))
                        //            pw.println(word.value());
                        unkCounter.IncrementCount(lex.GetUnknownWordModel().GetSignature(word.Value(), posId++));
                    }
                }
                treeId++;
            }
            IList <string> biggestKeys = new List <string>(unkCounter.KeySet());

            biggestKeys.Sort(Counters.ToComparatorDescending(unkCounter));
            foreach (string wordType in biggestKeys)
            {
                pw.Printf("%s\t%d%n", wordType, (int)unkCounter.GetCount(wordType));
            }
            pw.Close();
            pw.Close();
        }

Ejemplo n.º 17

Mostrar archivo

        /// <exception cref="System.Exception"/>
        protected internal override void LoadTextClassifier(BufferedReader br)
        {
            base.LoadTextClassifier(br);
            string line = br.ReadLine();

            string[] toks = line.Split("\\t");
            if (!toks[0].Equals("nodeFeatureIndicesMap.size()="))
            {
                throw new Exception("format error in nodeFeatureIndicesMap");
            }
            int nodeFeatureIndicesMapSize = System.Convert.ToInt32(toks[1]);

            nodeFeatureIndicesMap = new HashIndex <int>();
            int count = 0;

            while (count < nodeFeatureIndicesMapSize)
            {
                line = br.ReadLine();
                toks = line.Split("\\t");
                int idx = System.Convert.ToInt32(toks[0]);
                if (count != idx)
                {
                    throw new Exception("format error");
                }
                nodeFeatureIndicesMap.Add(System.Convert.ToInt32(toks[1]));
                count++;
            }
            line = br.ReadLine();
            toks = line.Split("\\t");
            if (!toks[0].Equals("edgeFeatureIndicesMap.size()="))
            {
                throw new Exception("format error");
            }
            int edgeFeatureIndicesMapSize = System.Convert.ToInt32(toks[1]);

            edgeFeatureIndicesMap = new HashIndex <int>();
            count = 0;
            while (count < edgeFeatureIndicesMapSize)
            {
                line = br.ReadLine();
                toks = line.Split("\\t");
                int idx = System.Convert.ToInt32(toks[0]);
                if (count != idx)
                {
                    throw new Exception("format error");
                }
                edgeFeatureIndicesMap.Add(System.Convert.ToInt32(toks[1]));
                count++;
            }
            int weightsLength = -1;

            if (flags.secondOrderNonLinear)
            {
                line = br.ReadLine();
                toks = line.Split("\\t");
                if (!toks[0].Equals("inputLayerWeights4Edge.length="))
                {
                    throw new Exception("format error");
                }
                weightsLength          = System.Convert.ToInt32(toks[1]);
                inputLayerWeights4Edge = new double[weightsLength][];
                count = 0;
                while (count < weightsLength)
                {
                    line = br.ReadLine();
                    toks = line.Split("\\t");
                    int weights2Length = System.Convert.ToInt32(toks[0]);
                    inputLayerWeights4Edge[count] = new double[weights2Length];
                    string[] weightsValue = toks[1].Split(" ");
                    if (weights2Length != weightsValue.Length)
                    {
                        throw new Exception("weights format error");
                    }
                    for (int i2 = 0; i2 < weights2Length; i2++)
                    {
                        inputLayerWeights4Edge[count][i2] = double.ParseDouble(weightsValue[i2]);
                    }
                    count++;
                }
                line = br.ReadLine();
                toks = line.Split("\\t");
                if (!toks[0].Equals("outputLayerWeights4Edge.length="))
                {
                    throw new Exception("format error");
                }
                weightsLength           = System.Convert.ToInt32(toks[1]);
                outputLayerWeights4Edge = new double[weightsLength][];
                count = 0;
                while (count < weightsLength)
                {
                    line = br.ReadLine();
                    toks = line.Split("\\t");
                    int weights2Length = System.Convert.ToInt32(toks[0]);
                    outputLayerWeights4Edge[count] = new double[weights2Length];
                    string[] weightsValue = toks[1].Split(" ");
                    if (weights2Length != weightsValue.Length)
                    {
                        throw new Exception("weights format error");
                    }
                    for (int i2 = 0; i2 < weights2Length; i2++)
                    {
                        outputLayerWeights4Edge[count][i2] = double.ParseDouble(weightsValue[i2]);
                    }
                    count++;
                }
            }
            else
            {
                line = br.ReadLine();
                toks = line.Split("\\t");
                if (!toks[0].Equals("linearWeights.length="))
                {
                    throw new Exception("format error");
                }
                weightsLength = System.Convert.ToInt32(toks[1]);
                linearWeights = new double[weightsLength][];
                count         = 0;
                while (count < weightsLength)
                {
                    line = br.ReadLine();
                    toks = line.Split("\\t");
                    int weights2Length = System.Convert.ToInt32(toks[0]);
                    linearWeights[count] = new double[weights2Length];
                    string[] weightsValue = toks[1].Split(" ");
                    if (weights2Length != weightsValue.Length)
                    {
                        throw new Exception("weights format error");
                    }
                    for (int i2 = 0; i2 < weights2Length; i2++)
                    {
                        linearWeights[count][i2] = double.ParseDouble(weightsValue[i2]);
                    }
                    count++;
                }
            }
            line = br.ReadLine();
            toks = line.Split("\\t");
            if (!toks[0].Equals("inputLayerWeights.length="))
            {
                throw new Exception("format error");
            }
            weightsLength     = System.Convert.ToInt32(toks[1]);
            inputLayerWeights = new double[weightsLength][];
            count             = 0;
            while (count < weightsLength)
            {
                line = br.ReadLine();
                toks = line.Split("\\t");
                int weights2Length = System.Convert.ToInt32(toks[0]);
                inputLayerWeights[count] = new double[weights2Length];
                string[] weightsValue = toks[1].Split(" ");
                if (weights2Length != weightsValue.Length)
                {
                    throw new Exception("weights format error");
                }
                for (int i2 = 0; i2 < weights2Length; i2++)
                {
                    inputLayerWeights[count][i2] = double.ParseDouble(weightsValue[i2]);
                }
                count++;
            }
            line = br.ReadLine();
            toks = line.Split("\\t");
            if (!toks[0].Equals("outputLayerWeights.length="))
            {
                throw new Exception("format error");
            }
            weightsLength      = System.Convert.ToInt32(toks[1]);
            outputLayerWeights = new double[weightsLength][];
            count = 0;
            while (count < weightsLength)
            {
                line = br.ReadLine();
                toks = line.Split("\\t");
                int weights2Length = System.Convert.ToInt32(toks[0]);
                outputLayerWeights[count] = new double[weights2Length];
                string[] weightsValue = toks[1].Split(" ");
                if (weights2Length != weightsValue.Length)
                {
                    throw new Exception("weights format error");
                }
                for (int i2 = 0; i2 < weights2Length; i2++)
                {
                    outputLayerWeights[count][i2] = double.ParseDouble(weightsValue[i2]);
                }
                count++;
            }
        }

Ejemplo n.º 18

Mostrar archivo

Archivo: ChineseCharacterBasedLexiconTraining.cs Proyecto: zerouid/Stanford.CoreNLP.NET

        /// <exception cref="System.IO.IOException"/>
        public static void Main(string[] args)
        {
            IDictionary <string, int> flagsToNumArgs = Generics.NewHashMap();

            flagsToNumArgs["-parser"]        = int.Parse(3);
            flagsToNumArgs["-lex"]           = int.Parse(3);
            flagsToNumArgs["-test"]          = int.Parse(2);
            flagsToNumArgs["-out"]           = int.Parse(1);
            flagsToNumArgs["-lengthPenalty"] = int.Parse(1);
            flagsToNumArgs["-penaltyType"]   = int.Parse(1);
            flagsToNumArgs["-maxLength"]     = int.Parse(1);
            flagsToNumArgs["-stats"]         = int.Parse(2);
            IDictionary <string, string[]> argMap = StringUtils.ArgsToMap(args, flagsToNumArgs);
            bool        eval = argMap.Contains("-eval");
            PrintWriter pw   = null;

            if (argMap.Contains("-out"))
            {
                pw = new PrintWriter(new OutputStreamWriter(new FileOutputStream((argMap["-out"])[0]), "GB18030"), true);
            }
            log.Info("ChineseCharacterBasedLexicon called with args:");
            ChineseTreebankParserParams ctpp = new ChineseTreebankParserParams();

            for (int i = 0; i < args.Length; i++)
            {
                ctpp.SetOptionFlag(args, i);
                log.Info(" " + args[i]);
            }
            log.Info();
            Options op = new Options(ctpp);

            if (argMap.Contains("-stats"))
            {
                string[]       statArgs         = (argMap["-stats"]);
                MemoryTreebank rawTrainTreebank = op.tlpParams.MemoryTreebank();
                IFileFilter    trainFilt        = new NumberRangesFileFilter(statArgs[1], false);
                rawTrainTreebank.LoadPath(new File(statArgs[0]), trainFilt);
                log.Info("Done reading trees.");
                MemoryTreebank trainTreebank;
                if (argMap.Contains("-annotate"))
                {
                    trainTreebank = new MemoryTreebank();
                    TreeAnnotator annotator = new TreeAnnotator(ctpp.HeadFinder(), ctpp, op);
                    foreach (Tree tree in rawTrainTreebank)
                    {
                        trainTreebank.Add(annotator.TransformTree(tree));
                    }
                    log.Info("Done annotating trees.");
                }
                else
                {
                    trainTreebank = rawTrainTreebank;
                }
                PrintStats(trainTreebank, pw);
                System.Environment.Exit(0);
            }
            int maxLength = 1000000;

            //    Test.verbose = true;
            if (argMap.Contains("-norm"))
            {
                op.testOptions.lengthNormalization = true;
            }
            if (argMap.Contains("-maxLength"))
            {
                maxLength = System.Convert.ToInt32((argMap["-maxLength"])[0]);
            }
            op.testOptions.maxLength = 120;
            bool combo = argMap.Contains("-combo");

            if (combo)
            {
                ctpp.useCharacterBasedLexicon = true;
                op.testOptions.maxSpanForTags = 10;
                op.doDep  = false;
                op.dcTags = false;
            }
            LexicalizedParser lp  = null;
            ILexicon          lex = null;

            if (argMap.Contains("-parser"))
            {
                string[] parserArgs = (argMap["-parser"]);
                if (parserArgs.Length > 1)
                {
                    IFileFilter trainFilt = new NumberRangesFileFilter(parserArgs[1], false);
                    lp = LexicalizedParser.TrainFromTreebank(parserArgs[0], trainFilt, op);
                    if (parserArgs.Length == 3)
                    {
                        string filename = parserArgs[2];
                        log.Info("Writing parser in serialized format to file " + filename + " ");
                        System.Console.Error.Flush();
                        ObjectOutputStream @out = IOUtils.WriteStreamFromString(filename);
                        @out.WriteObject(lp);
                        @out.Close();
                        log.Info("done.");
                    }
                }
                else
                {
                    string parserFile = parserArgs[0];
                    lp = LexicalizedParser.LoadModel(parserFile, op);
                }
                lex  = lp.GetLexicon();
                op   = lp.GetOp();
                ctpp = (ChineseTreebankParserParams)op.tlpParams;
            }
            if (argMap.Contains("-rad"))
            {
                ctpp.useUnknownCharacterModel = true;
            }
            if (argMap.Contains("-lengthPenalty"))
            {
                ctpp.lengthPenalty = double.Parse((argMap["-lengthPenalty"])[0]);
            }
            if (argMap.Contains("-penaltyType"))
            {
                ctpp.penaltyType = System.Convert.ToInt32((argMap["-penaltyType"])[0]);
            }
            if (argMap.Contains("-lex"))
            {
                string[] lexArgs = (argMap["-lex"]);
                if (lexArgs.Length > 1)
                {
                    IIndex <string> wordIndex = new HashIndex <string>();
                    IIndex <string> tagIndex  = new HashIndex <string>();
                    lex = ctpp.Lex(op, wordIndex, tagIndex);
                    MemoryTreebank rawTrainTreebank = op.tlpParams.MemoryTreebank();
                    IFileFilter    trainFilt        = new NumberRangesFileFilter(lexArgs[1], false);
                    rawTrainTreebank.LoadPath(new File(lexArgs[0]), trainFilt);
                    log.Info("Done reading trees.");
                    MemoryTreebank trainTreebank;
                    if (argMap.Contains("-annotate"))
                    {
                        trainTreebank = new MemoryTreebank();
                        TreeAnnotator annotator = new TreeAnnotator(ctpp.HeadFinder(), ctpp, op);
                        foreach (Tree tree in rawTrainTreebank)
                        {
                            tree = annotator.TransformTree(tree);
                            trainTreebank.Add(tree);
                        }
                        log.Info("Done annotating trees.");
                    }
                    else
                    {
                        trainTreebank = rawTrainTreebank;
                    }
                    lex.InitializeTraining(trainTreebank.Count);
                    lex.Train(trainTreebank);
                    lex.FinishTraining();
                    log.Info("Done training lexicon.");
                    if (lexArgs.Length == 3)
                    {
                        string filename = lexArgs.Length == 3 ? lexArgs[2] : "parsers/chineseCharLex.ser.gz";
                        log.Info("Writing lexicon in serialized format to file " + filename + " ");
                        System.Console.Error.Flush();
                        ObjectOutputStream @out = IOUtils.WriteStreamFromString(filename);
                        @out.WriteObject(lex);
                        @out.Close();
                        log.Info("done.");
                    }
                }
                else
                {
                    string lexFile = lexArgs.Length == 1 ? lexArgs[0] : "parsers/chineseCharLex.ser.gz";
                    log.Info("Reading Lexicon from file " + lexFile);
                    ObjectInputStream @in = IOUtils.ReadStreamFromString(lexFile);
                    try
                    {
                        lex = (ILexicon)@in.ReadObject();
                    }
                    catch (TypeLoadException)
                    {
                        throw new Exception("Bad serialized file: " + lexFile);
                    }
                    @in.Close();
                }
            }
            if (argMap.Contains("-test"))
            {
                bool segmentWords = ctpp.segment;
                bool parse        = lp != null;
                System.Diagnostics.Debug.Assert((parse || segmentWords));
                //      WordCatConstituent.collinizeWords = argMap.containsKey("-collinizeWords");
                //      WordCatConstituent.collinizeTags = argMap.containsKey("-collinizeTags");
                IWordSegmenter seg = null;
                if (segmentWords)
                {
                    seg = (IWordSegmenter)lex;
                }
                string[]       testArgs     = (argMap["-test"]);
                MemoryTreebank testTreebank = op.tlpParams.MemoryTreebank();
                IFileFilter    testFilt     = new NumberRangesFileFilter(testArgs[1], false);
                testTreebank.LoadPath(new File(testArgs[0]), testFilt);
                ITreeTransformer          subcategoryStripper = op.tlpParams.SubcategoryStripper();
                ITreeTransformer          collinizer          = ctpp.Collinizer();
                WordCatEquivalenceClasser eqclass             = new WordCatEquivalenceClasser();
                WordCatEqualityChecker    eqcheck             = new WordCatEqualityChecker();
                EquivalenceClassEval      basicEval           = new EquivalenceClassEval(eqclass, eqcheck, "basic");
                EquivalenceClassEval      collinsEval         = new EquivalenceClassEval(eqclass, eqcheck, "collinized");
                IList <string>            evalTypes           = new List <string>(3);
                bool goodPOS = false;
                if (segmentWords)
                {
                    evalTypes.Add(WordCatConstituent.wordType);
                    if (ctpp.segmentMarkov && !parse)
                    {
                        evalTypes.Add(WordCatConstituent.tagType);
                        goodPOS = true;
                    }
                }
                if (parse)
                {
                    evalTypes.Add(WordCatConstituent.tagType);
                    evalTypes.Add(WordCatConstituent.catType);
                    if (combo)
                    {
                        evalTypes.Add(WordCatConstituent.wordType);
                        goodPOS = true;
                    }
                }
                TreeToBracketProcessor proc = new TreeToBracketProcessor(evalTypes);
                log.Info("Testing...");
                foreach (Tree goldTop in testTreebank)
                {
                    Tree             gold         = goldTop.FirstChild();
                    IList <IHasWord> goldSentence = gold.YieldHasWord();
                    if (goldSentence.Count > maxLength)
                    {
                        log.Info("Skipping sentence; too long: " + goldSentence.Count);
                        continue;
                    }
                    else
                    {
                        log.Info("Processing sentence; length: " + goldSentence.Count);
                    }
                    IList <IHasWord> s;
                    if (segmentWords)
                    {
                        StringBuilder goldCharBuf = new StringBuilder();
                        foreach (IHasWord aGoldSentence in goldSentence)
                        {
                            StringLabel word = (StringLabel)aGoldSentence;
                            goldCharBuf.Append(word.Value());
                        }
                        string goldChars = goldCharBuf.ToString();
                        s = seg.Segment(goldChars);
                    }
                    else
                    {
                        s = goldSentence;
                    }
                    Tree tree;
                    if (parse)
                    {
                        tree = lp.ParseTree(s);
                        if (tree == null)
                        {
                            throw new Exception("PARSER RETURNED NULL!!!");
                        }
                    }
                    else
                    {
                        tree = Edu.Stanford.Nlp.Trees.Trees.ToFlatTree(s);
                        tree = subcategoryStripper.TransformTree(tree);
                    }
                    if (pw != null)
                    {
                        if (parse)
                        {
                            tree.PennPrint(pw);
                        }
                        else
                        {
                            IEnumerator sentIter = s.GetEnumerator();
                            for (; ;)
                            {
                                Word word = (Word)sentIter.Current;
                                pw.Print(word.Word());
                                if (sentIter.MoveNext())
                                {
                                    pw.Print(" ");
                                }
                                else
                                {
                                    break;
                                }
                            }
                        }
                        pw.Println();
                    }
                    if (eval)
                    {
                        ICollection ourBrackets;
                        ICollection goldBrackets;
                        ourBrackets  = proc.AllBrackets(tree);
                        goldBrackets = proc.AllBrackets(gold);
                        if (goodPOS)
                        {
                            Sharpen.Collections.AddAll(ourBrackets, TreeToBracketProcessor.CommonWordTagTypeBrackets(tree, gold));
                            Sharpen.Collections.AddAll(goldBrackets, TreeToBracketProcessor.CommonWordTagTypeBrackets(gold, tree));
                        }
                        basicEval.Eval(ourBrackets, goldBrackets);
                        System.Console.Out.WriteLine("\nScores:");
                        basicEval.DisplayLast();
                        Tree collinsTree = collinizer.TransformTree(tree);
                        Tree collinsGold = collinizer.TransformTree(gold);
                        ourBrackets  = proc.AllBrackets(collinsTree);
                        goldBrackets = proc.AllBrackets(collinsGold);
                        if (goodPOS)
                        {
                            Sharpen.Collections.AddAll(ourBrackets, TreeToBracketProcessor.CommonWordTagTypeBrackets(collinsTree, collinsGold));
                            Sharpen.Collections.AddAll(goldBrackets, TreeToBracketProcessor.CommonWordTagTypeBrackets(collinsGold, collinsTree));
                        }
                        collinsEval.Eval(ourBrackets, goldBrackets);
                        System.Console.Out.WriteLine("\nCollinized scores:");
                        collinsEval.DisplayLast();
                        System.Console.Out.WriteLine();
                    }
                }
                if (eval)
                {
                    basicEval.Display();
                    System.Console.Out.WriteLine();
                    collinsEval.Display();
                }
            }
        }

Ejemplo n.º 19

Mostrar archivo

        /// <summary>
        /// This method lets you train and test a segmenter relative to a
        /// Treebank.
        /// </summary>
        /// <remarks>
        /// This method lets you train and test a segmenter relative to a
        /// Treebank.
        /// <p>
        /// <i>Implementation note:</i> This method is largely cloned from
        /// LexicalizedParser's main method.  Should we try to have it be able
        /// to train segmenters to stop things going out of sync?
        /// </remarks>
        public static void Main(string[] args)
        {
            bool     train = false;
            bool     saveToSerializedFile      = false;
            bool     saveToTextFile            = false;
            string   serializedInputFileOrUrl  = null;
            string   textInputFileOrUrl        = null;
            string   serializedOutputFileOrUrl = null;
            string   textOutputFileOrUrl       = null;
            string   treebankPath = null;
            Treebank testTreebank = null;
            // Treebank tuneTreebank = null;
            string      testPath    = null;
            IFileFilter testFilter  = null;
            IFileFilter trainFilter = null;
            string      encoding    = null;
            // variables needed to process the files to be parsed
            ITokenizerFactory <Word> tokenizerFactory = null;
            //    DocumentPreprocessor documentPreprocessor = new DocumentPreprocessor();
            bool tokenized = false;
            // whether or not the input file has already been tokenized
            IFunction <IList <IHasWord>, IList <IHasWord> > escaper = new ChineseEscaper();
            // int tagDelimiter = -1;
            // String sentenceDelimiter = "\n";
            // boolean fromXML = false;
            int argIndex = 0;

            if (args.Length < 1)
            {
                log.Info("usage: java edu.stanford.nlp.parser.lexparser." + "LexicalizedParser parserFileOrUrl filename*");
                return;
            }
            Options op = new Options();

            op.tlpParams = new ChineseTreebankParserParams();
            // while loop through option arguments
            while (argIndex < args.Length && args[argIndex][0] == '-')
            {
                if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-train"))
                {
                    train = true;
                    saveToSerializedFile = true;
                    int numSubArgs = NumSubArgs(args, argIndex);
                    argIndex++;
                    if (numSubArgs > 1)
                    {
                        treebankPath = args[argIndex];
                        argIndex++;
                    }
                    else
                    {
                        throw new Exception("Error: -train option must have treebankPath as first argument.");
                    }
                    if (numSubArgs == 2)
                    {
                        trainFilter = new NumberRangesFileFilter(args[argIndex++], true);
                    }
                    else
                    {
                        if (numSubArgs >= 3)
                        {
                            try
                            {
                                int low  = System.Convert.ToInt32(args[argIndex]);
                                int high = System.Convert.ToInt32(args[argIndex + 1]);
                                trainFilter = new NumberRangeFileFilter(low, high, true);
                                argIndex   += 2;
                            }
                            catch (NumberFormatException)
                            {
                                // maybe it's a ranges expression?
                                trainFilter = new NumberRangesFileFilter(args[argIndex], true);
                                argIndex++;
                            }
                        }
                    }
                }
                else
                {
                    if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-encoding"))
                    {
                        // sets encoding for TreebankLangParserParams
                        encoding = args[argIndex + 1];
                        op.tlpParams.SetInputEncoding(encoding);
                        op.tlpParams.SetOutputEncoding(encoding);
                        argIndex += 2;
                    }
                    else
                    {
                        if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-loadFromSerializedFile"))
                        {
                            // load the parser from a binary serialized file
                            // the next argument must be the path to the parser file
                            serializedInputFileOrUrl = args[argIndex + 1];
                            argIndex += 2;
                        }
                        else
                        {
                            // doesn't make sense to load from TextFile -pichuan
                            //      } else if (args[argIndex].equalsIgnoreCase("-loadFromTextFile")) {
                            //        // load the parser from declarative text file
                            //        // the next argument must be the path to the parser file
                            //        textInputFileOrUrl = args[argIndex + 1];
                            //        argIndex += 2;
                            if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-saveToSerializedFile"))
                            {
                                saveToSerializedFile      = true;
                                serializedOutputFileOrUrl = args[argIndex + 1];
                                argIndex += 2;
                            }
                            else
                            {
                                if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-saveToTextFile"))
                                {
                                    // save the parser to declarative text file
                                    saveToTextFile      = true;
                                    textOutputFileOrUrl = args[argIndex + 1];
                                    argIndex           += 2;
                                }
                                else
                                {
                                    if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-treebank"))
                                    {
                                        // the next argument is the treebank path and range for testing
                                        int numSubArgs = NumSubArgs(args, argIndex);
                                        argIndex++;
                                        if (numSubArgs == 1)
                                        {
                                            testFilter = new NumberRangesFileFilter(args[argIndex++], true);
                                        }
                                        else
                                        {
                                            if (numSubArgs > 1)
                                            {
                                                testPath = args[argIndex++];
                                                if (numSubArgs == 2)
                                                {
                                                    testFilter = new NumberRangesFileFilter(args[argIndex++], true);
                                                }
                                                else
                                                {
                                                    if (numSubArgs >= 3)
                                                    {
                                                        try
                                                        {
                                                            int low  = System.Convert.ToInt32(args[argIndex]);
                                                            int high = System.Convert.ToInt32(args[argIndex + 1]);
                                                            testFilter = new NumberRangeFileFilter(low, high, true);
                                                            argIndex  += 2;
                                                        }
                                                        catch (NumberFormatException)
                                                        {
                                                            // maybe it's a ranges expression?
                                                            testFilter = new NumberRangesFileFilter(args[argIndex++], true);
                                                        }
                                                    }
                                                }
                                            }
                                        }
                                    }
                                    else
                                    {
                                        int j = op.tlpParams.SetOptionFlag(args, argIndex);
                                        if (j == argIndex)
                                        {
                                            log.Info("Unknown option ignored: " + args[argIndex]);
                                            j++;
                                        }
                                        argIndex = j;
                                    }
                                }
                            }
                        }
                    }
                }
            }
            // end while loop through arguments
            ITreebankLangParserParams tlpParams = op.tlpParams;

            // all other arguments are order dependent and
            // are processed in order below
            Edu.Stanford.Nlp.Parser.Lexparser.ChineseLexiconAndWordSegmenter cs = null;
            if (!train && op.testOptions.verbose)
            {
                System.Console.Out.WriteLine("Currently " + new DateTime());
                PrintArgs(args, System.Console.Out);
            }
            if (train)
            {
                PrintArgs(args, System.Console.Out);
                // so we train a parser using the treebank
                if (treebankPath == null)
                {
                    // the next arg must be the treebank path, since it wasn't give earlier
                    treebankPath = args[argIndex];
                    argIndex++;
                    if (args.Length > argIndex + 1)
                    {
                        try
                        {
                            // the next two args might be the range
                            int low  = System.Convert.ToInt32(args[argIndex]);
                            int high = System.Convert.ToInt32(args[argIndex + 1]);
                            trainFilter = new NumberRangeFileFilter(low, high, true);
                            argIndex   += 2;
                        }
                        catch (NumberFormatException)
                        {
                            // maybe it's a ranges expression?
                            trainFilter = new NumberRangesFileFilter(args[argIndex], true);
                            argIndex++;
                        }
                    }
                }
                Treebank        trainTreebank = MakeTreebank(treebankPath, op, trainFilter);
                IIndex <string> wordIndex     = new HashIndex <string>();
                IIndex <string> tagIndex      = new HashIndex <string>();
                cs = new Edu.Stanford.Nlp.Parser.Lexparser.ChineseLexiconAndWordSegmenter(trainTreebank, op, wordIndex, tagIndex);
            }
            else
            {
                if (textInputFileOrUrl != null)
                {
                }
                else
                {
                    // so we load the segmenter from a text grammar file
                    // XXXXX fix later -pichuan
                    //cs = new LexicalizedParser(textInputFileOrUrl, true, op);
                    // so we load a serialized segmenter
                    if (serializedInputFileOrUrl == null)
                    {
                        // the next argument must be the path to the serialized parser
                        serializedInputFileOrUrl = args[argIndex];
                        argIndex++;
                    }
                    try
                    {
                        cs = new Edu.Stanford.Nlp.Parser.Lexparser.ChineseLexiconAndWordSegmenter(serializedInputFileOrUrl, op);
                    }
                    catch (ArgumentException)
                    {
                        log.Info("Error loading segmenter, exiting...");
                        System.Environment.Exit(0);
                    }
                }
            }
            // the following has to go after reading parser to make sure
            // op and tlpParams are the same for train and test
            TreePrint treePrint = op.testOptions.TreePrint(tlpParams);

            if (testFilter != null)
            {
                if (testPath == null)
                {
                    if (treebankPath == null)
                    {
                        throw new Exception("No test treebank path specified...");
                    }
                    else
                    {
                        log.Info("No test treebank path specified.  Using train path: \"" + treebankPath + "\"");
                        testPath = treebankPath;
                    }
                }
                testTreebank = tlpParams.TestMemoryTreebank();
                testTreebank.LoadPath(testPath, testFilter);
            }
            op.trainOptions.sisterSplitters = Generics.NewHashSet(Arrays.AsList(tlpParams.SisterSplitters()));
            // at this point we should be sure that op.tlpParams is
            // set appropriately (from command line, or from grammar file),
            // and will never change again.  We also set the tlpParams of the
            // LexicalizedParser instance to be the same object.  This is
            // redundancy that we probably should take out eventually.
            //
            // -- Roger
            if (op.testOptions.verbose)
            {
                log.Info("Lexicon is " + cs.GetType().FullName);
            }
            PrintWriter pwOut = tlpParams.Pw();
            PrintWriter pwErr = tlpParams.Pw(System.Console.Error);

            // Now what do we do with the parser we've made
            if (saveToTextFile)
            {
                // save the parser to textGrammar format
                if (textOutputFileOrUrl != null)
                {
                    SaveSegmenterDataToText(cs, textOutputFileOrUrl);
                }
                else
                {
                    log.Info("Usage: must specify a text segmenter data output path");
                }
            }
            if (saveToSerializedFile)
            {
                if (serializedOutputFileOrUrl == null && argIndex < args.Length)
                {
                    // the next argument must be the path to serialize to
                    serializedOutputFileOrUrl = args[argIndex];
                    argIndex++;
                }
                if (serializedOutputFileOrUrl != null)
                {
                    SaveSegmenterDataToSerialized(cs, serializedOutputFileOrUrl);
                }
                else
                {
                    if (textOutputFileOrUrl == null && testTreebank == null)
                    {
                        // no saving/parsing request has been specified
                        log.Info("usage: " + "java edu.stanford.nlp.parser.lexparser.ChineseLexiconAndWordSegmenter" + "-train trainFilesPath [start stop] serializedParserFilename");
                    }
                }
            }
            /* --------------------- Testing part!!!! ----------------------- */
            if (op.testOptions.verbose)
            {
            }
            //      printOptions(false, op);
            if (testTreebank != null || (argIndex < args.Length && Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-treebank")))
            {
                // test parser on treebank
                if (testTreebank == null)
                {
                    // the next argument is the treebank path and range for testing
                    testTreebank = tlpParams.TestMemoryTreebank();
                    if (args.Length < argIndex + 4)
                    {
                        testTreebank.LoadPath(args[argIndex + 1]);
                    }
                    else
                    {
                        int testlow  = System.Convert.ToInt32(args[argIndex + 2]);
                        int testhigh = System.Convert.ToInt32(args[argIndex + 3]);
                        testTreebank.LoadPath(args[argIndex + 1], new NumberRangeFileFilter(testlow, testhigh, true));
                    }
                }
            }
        }

Ejemplo n.º 20

Mostrar archivo

        protected void ReadFile(BinaryReader _br)
        {
            br = _br;
            uint bnk_count = br.ReadUInt32();

            //Skip second count
            br.BaseStream.Position += 4;
            uint bnk_offset      = br.ReadUInt32();
            uint section_pointer = br.ReadUInt32();
            uint unknown1        = br.ReadUInt32();

            uint sound_count = br.ReadUInt32();

            //Skip second count
            br.BaseStream.Position += 4;
            uint sound_offset = br.ReadUInt32();

            //Skips section pointer, unknown1, unknown2
            br.BaseStream.Position += 12;

            uint u_count = br.ReadUInt32();

            //Skip second count
            br.BaseStream.Position += 4;
            uint u_offset = br.ReadUInt32();

            br.BaseStream.Position = bnk_offset;

            for (int i = 0; i < bnk_count; i++)
            {
                br.BaseStream.Position += 4;
                uint position = br.ReadUInt32();
                this.SeekPush();
                br.BaseStream.Position = position;
                Soundbanks.Add(this.ReadString());
                this.SeekPop();
            }

            br.BaseStream.Position = sound_offset;

            Dictionary <ulong, uint> sound_lookups = new Dictionary <ulong, uint>();

            for (int i = 0; i < sound_count; i++)
            {
                uint  id   = (uint)br.ReadUInt64();
                ulong hash = br.ReadUInt64();
                if (sound_lookups.ContainsKey(hash))
                {
                    uint other_id = sound_lookups[hash];
                    continue;
                }

                sound_lookups.Add(hash, id);
            }

            br.BaseStream.Position = u_offset;

            for (int i = 0; i < u_count; i++)
            {
                ulong hash = br.ReadUInt64();
                br.BaseStream.Position += 4;
                uint string_pos = br.ReadUInt32();
                this.SeekPush();
                br.BaseStream.Position = string_pos;
                string str = this.ReadString();
                this.SeekPop();
                if (!sound_lookups.ContainsKey(hash))
                {
                    continue;
                }
                uint id = sound_lookups[hash];
                if (SoundLookups.ContainsKey(id))
                {
                    continue;
                }

                Idstring ids = HashIndex.Get(hash);

                SoundLookups.Add(id, new Tuple <string, Idstring>(id.ToString() != str ? str : null, ids));
            }
        }

Ejemplo n.º 21

Mostrar archivo

Archivo: Program.cs Proyecto: yashints/Az204-demos

        private static async Task CreateIndex(DocumentClient client, string collectionName)
        {
            Console.WriteLine("Set up Indexes");
            DocumentCollection collection =
                await
                client.ReadDocumentCollectionAsync(UriFactory.CreateDocumentCollectionUri(databaseId, collectionName));

            /*
             * Range over /prop/? (or /*) can be used to serve the following queries efficiently:
             * SELECT * FROM collection c WHERE c.prop = "value"
             * SELECT * FROM collection c WHERE c.prop > 5
             * SELECT * FROM collection c ORDER BY c.prop
             */
            Index indexNum = new RangeIndex(DataType.Number);

            collection.IndexingPolicy.IncludedPaths.Add(new IncludedPath()
            {
                Indexes = new Collection <Index>()
                {
                    indexNum
                },
                Path = @"/FamilyId/?"
            });

            /*
             * Hash over /prop/? (or /*) can be used to serve the following queries efficiently:
             * SELECT * FROM collection c WHERE c.prop = "value"
             */
            Index indexArray = new HashIndex(DataType.String);

            collection.IndexingPolicy.IncludedPaths.Add(new IncludedPath()
            {
                Indexes = new Collection <Index>()
                {
                    indexArray
                },
                Path = @"/Address/*"
            });

            /*
             * Hash over /props/[]/? (or /* or /props/*) can be used to serve the following queries efficiently:
             * SELECT tag FROM collection c JOIN tag IN c.props WHERE tag = 5
             */
            Index indexArr = new HashIndex(DataType.String);

            collection.IndexingPolicy.IncludedPaths.Add(new IncludedPath()
            {
                Indexes = new Collection <Index>()
                {
                    indexArr
                },
                Path = @"/Children/[]/?"
            });


            /* exclude from index Parents */
            collection.IndexingPolicy.ExcludedPaths.Add(new ExcludedPath()
            {
                Path = @"/Parents/*"
            });


            await client.ReplaceDocumentCollectionAsync(collection);
        }

Ejemplo n.º 22

Mostrar archivo

Archivo: ModsDatabase.cs Proyecto: Luffyyy/Diesel-Bundle-Modder

        /// <summary>
        ///     The load local mods.
        /// </summary>
        /// 
        public void LoadMods(bool overrride = false)
        {
            var watch = Stopwatch.StartNew();

            if (Directory.Exists(modsDirectory))
            {
                watch.Restart();

                if (overrride)
                    this._modsList.Clear();

                List<string> leftovers = this.modsList.Keys.ToList();

                string[] pdmods = Directory.GetFiles(modsDirectory, "*.pdmod");

                //System.Threading.Tasks.Parallel.ForEach(pdmods, file =>
                foreach (string file in pdmods)
                {
                    if (!File.Exists(file))
                        return;//continue;

                    if (LoadSingleMod(file))
                    {
                        leftovers.Remove(file);
                    }
                }//);

                watch.Stop();
                Console.WriteLine("LoadLocalMods.pdmods - " + watch.ElapsedMilliseconds + " ms");

                watch.Restart();
                foreach (string left in leftovers)
                    RemoveModsList(left);

                watch.Stop();
                Console.WriteLine("LoadLocalMods.pdmods.leftovers - " + watch.ElapsedMilliseconds + " ms");

            }
            else
            {
                if (this.CanCreateDirectory(modsDirectory))
                {
                    Directory.CreateDirectory(modsDirectory);
                }
            }

            watch.Restart();

            //load in override folder
            if (Directory.Exists(Path.Combine(StaticStorage.settings.AssetsFolder, "mod_overrides")))
            { 
                List<BundleMod> mod_overrides_mods = new List<BundleMod>();
                string[] mod_overrides = Directory.EnumerateDirectories(Path.Combine(StaticStorage.settings.AssetsFolder, "mod_overrides")).ToArray();

                foreach (string mo in mod_overrides)
                {
                    if (!Directory.Exists(mo))
                        continue;//continue;
                    if (new DirectoryInfo(mo).Name == "Bundle_Modder_Shared")
                        continue;//continue;

                    string[] allfiles = System.IO.Directory.GetFiles(mo, "*.*", System.IO.SearchOption.AllDirectories);
                    BundleMod mo_mod = new BundleMod();
                    mo_mod.Name = new DirectoryInfo(mo).Name;
                    mo_mod.Author = "<UNKNOWN>";
                    mo_mod.Description = "This mod is installed in \"mod_overrides\" folder. No description for this mod is availiable. This mod was not matched with any local mods. You can only uninstall this mod.";
                    mo_mod.file = mo;
                    mo_mod.status = BundleMod.ModStatus.Unrecognized;
                    mo_mod.type = BundleMod.ModType.mod_override;
                    mo_mod.actionStatus = BundleMod.ModActionStatus.Missing;
                    mo_mod.UtilizesOverride = true;

                    if (File.Exists(Path.Combine(mo, "mod.txt")))
                    {
                        try
                        {
                            OverrideMod overrideModInformation = OverrideMod.Deserialize(File.ReadAllText(Path.Combine(mo, "mod.txt")));

                            if(!String.IsNullOrWhiteSpace(overrideModInformation.Name))
                                mo_mod.Name = overrideModInformation.Name;
                            
                            if(!String.IsNullOrWhiteSpace(overrideModInformation.Author))
                                mo_mod.Author = overrideModInformation.Author;

                            if (!String.IsNullOrWhiteSpace(overrideModInformation.Description))
                                mo_mod.Description = overrideModInformation.Description;
                            
                            mo_mod.status = BundleMod.ModStatus.Installed;
                            mo_mod.actionStatus = BundleMod.ModActionStatus.None;
                        }
                        catch(Exception exc)
                        {
                        }
                    }

                    foreach (string mo_entry in allfiles)
                    {
                        if (mo_entry.EndsWith("mod.txt"))
                            continue;
                        
                        BundleRewriteItem mo_bri = new BundleRewriteItem();
                        string filepath = mo_entry.Substring(mo.Length + 1).Replace('\\', '/');
                        string[] pathelements = filepath.Split('.');
                        if (pathelements.Length > 3)
                            continue;

                        string entrypath = pathelements[0];
                        if (pathelements.Length == 2)
                        {
                            mo_bri.BundlePath = Hash64.HashString(pathelements[0]);
                            mo_bri.BundleExtension = Hash64.HashString(pathelements[1]);
                        }
                        else if (pathelements.Length == 3)
                        {
                            mo_bri.BundlePath = Hash64.HashString(pathelements[0]);
                            UInt32 lang = 0;
                            if (UInt32.TryParse(pathelements[1], out lang))
                                mo_bri.BundleLanguage = lang;
                            mo_bri.BundleExtension = Hash64.HashString(pathelements[2]);
                            mo_bri.IsLanguageSpecific = true;
                        }
                        else
                            continue;
                        mo_bri.ModName = mo_mod.Name;
                        mo_bri.ModAuthor = mo_mod.Author;
                        mo_bri.ModDescription = mo_mod.Description;
                        mo_bri.ReplacementFile = "";
                        if (mo_bri.isOverrideable()
                            //&& !bri.ReplacementFile.EndsWith(".script")
                            )
                        {

                            if (string.IsNullOrEmpty(HashIndex.GetUnhashed(mo_bri.BundlePath)) ||
                                string.IsNullOrEmpty(HashIndex.GetUnhashed(mo_bri.BundleExtension))
                                )
                            {
                                continue;
                            }
                        }
                        mo_mod.ItemQueue.Add(mo_bri);
                    }
                    mod_overrides_mods.Add(mo_mod);
                }

                //check vs others
                Dictionary<string, BundleMod> temporarylocalModsList_master = this.modsList;

                foreach (BundleMod mo_bm in mod_overrides_mods)
                {
                    bool modMatch = false;

                    List<BundleMod> matched_mods = temporarylocalModsList_master.Values.Where(mod => mod.getEscapedName().Equals(mo_bm.Name) || mod.Name.Equals(mo_bm.Name)).ToList();

                    foreach (BundleMod bm in matched_mods)
                    {
                        modMatch = true;
                        if (mo_bm.ItemQueue.Count > bm.ItemQueue.Count) //the override mod contains too many files, not equal
                        {
                            mo_bm.canInstall = false;
                            mo_bm.canUninstall = true;
                            mo_bm.actionStatus = BundleMod.ModActionStatus.Missing;
                            mo_bm.status = BundleMod.ModStatus.Unrecognized;

                            AddModsList(mo_bm.file, mo_bm);
                        }
                        else
                        {
                            bool[] mo_checklist = new bool[mo_bm.ItemQueue.Count];

                            int checklist_i = 0;
                            bool mo_onlyfolder = !(bm.ItemQueue.Any(x => !x.isOverrideable())); //isOverradable
                            foreach (BundleRewriteItem mo_bri in mo_bm.ItemQueue)
                            {
                                if (bm.ItemQueue.Any(x => x.BundlePath == mo_bri.BundlePath && x.BundleExtension == mo_bri.BundleExtension))
                                    mo_checklist[checklist_i] = true;
                                checklist_i++;
                            }

                            bool mo_equal = !mo_checklist.Any(x => !x);
                            /*
                            for (checklist_i = 0; mo_equal && checklist_i < mo_bm.ItemQueue.Count; checklist_i++)
                                if (!mo_checklist[checklist_i])
                                    mo_equal = false;
                            */
                            if (!mo_equal)
                            {
                                mo_bm.canInstall = false;
                                mo_bm.canUninstall = true;
                                mo_bm.actionStatus = BundleMod.ModActionStatus.Missing;
                                mo_bm.status = BundleMod.ModStatus.Unrecognized;

                                AddModsList(mo_bm.file, mo_bm);
                            }
                            else
                            {
                                if (mo_onlyfolder || InstalledModsListContains(bm) > -1)
                                {
                                    bm.status = BundleMod.ModStatus.Installed;//installed
                                }
                                else
                                {
                                    bm.actionStatus = BundleMod.ModActionStatus.ForcedReinstall;
                                    bm.status = BundleMod.ModStatus.ParticallyInstalled;//installed

                                    foreach (BundleRewriteItem bri in bm.ItemQueue)
                                        bri.toReinstall = true;
                                }
                            }
                        }
                    }

                    if (!modMatch)
                    {
                        mo_bm.canInstall = false;
                        mo_bm.canUninstall = true;

                        AddModsList(mo_bm.file, mo_bm);
                    }

                }
            }

            watch.Stop();
            Console.WriteLine("LoadLocalMods.overrides - " + watch.ElapsedMilliseconds + " ms");

            watch.Restart();

            //BLT Mods
            if ( Directory.Exists( Path.Combine( StaticStorage.settings.AssetsFolder, "..", "mods") ) )
            {
                if( Directory.Exists( Path.Combine( StaticStorage.settings.AssetsFolder, "..", "mods", "base") ) )
                {
                    List<string> bltmods = Directory.EnumerateDirectories(Path.Combine(StaticStorage.settings.AssetsFolder, "..", "mods")).ToList();

                    foreach (string bltmod in bltmods)
                    {
                        if (!Directory.Exists(bltmod))
                            continue;

                        if (Path.GetFileNameWithoutExtension(bltmod).Equals("log") || Path.GetFileNameWithoutExtension(bltmod).Equals("base"))
                            continue;

                        if (!File.Exists(Path.Combine(bltmod, "mod.txt")))
                            continue;


                        BundleMod blt_mod = new BundleMod();
                        blt_mod.Name = new DirectoryInfo(bltmod).Name;
                        blt_mod.Author = "<UNKNOWN>";
                        blt_mod.Description = "This is a BLT Hook mod. No description for this mod is availiable. This mod doesn't have a proper description. You can enable/disable this mod as well as uninstall it.";
                        blt_mod.file = bltmod;
                        blt_mod.status = BundleMod.ModStatus.Installed;
                        blt_mod.type = BundleMod.ModType.lua;
                        blt_mod.actionStatus = BundleMod.ModActionStatus.None;
                        blt_mod.UtilizesOverride = false;
                        blt_mod.UtilizesBundles = false;
                        blt_mod.enabled = true;

                        try
                        {
                            FileStream bltModfs = new FileStream(Path.Combine(bltmod, "mod.txt"), FileMode.Open);
                            using (StreamReader bltModsr = new StreamReader(bltModfs))
                            {
                                try
                                {
                                    //JsonConvert
                                    dynamic jsonDe = JsonConvert.DeserializeObject(bltModsr.ReadToEnd());
                                    //dynamic jsonDe = null;
                                    if (jsonDe != null)
                                    {
                                        if (jsonDe.name != null)
                                        {
                                            blt_mod.Name = jsonDe.name;
                                        }
                                        if (jsonDe.author != null)
                                        {
                                            blt_mod.Author = jsonDe.author;
                                        }
                                        if (jsonDe.description != null)
                                        {
                                            blt_mod.Description = jsonDe.description;
                                        }
                                    }
                                }
                                catch (Exception exc)
                                {
                                    blt_mod.Description += " Failed parsing mods.txt of " + Path.GetFileNameWithoutExtension(bltmod) + ", Message: " + exc.Message;
                                }
                            }
                        }
                        catch (Exception e)
                        {
                            blt_mod.Description += " Failed parsing mods.txt of " + Path.GetFileNameWithoutExtension(bltmod) + ", Message: " + e.Message;
                        }
                        AddModsList(bltmod, blt_mod, true);
                    }
                }

                LoadBLTModManagement();
            }

            watch.Stop();
            Console.WriteLine("LoadLocalMods.blt_mods - " + watch.ElapsedMilliseconds + " ms");
        }

Ejemplo n.º 23

Mostrar archivo

        protected internal virtual void AddGuess(LabeledChunkIdentifier.LabelTagType guess, LabeledChunkIdentifier.LabelTagType correct, bool addUnknownLabels)
        {
            if (addUnknownLabels)
            {
                if (labelIndex == null)
                {
                    labelIndex = new HashIndex <string>();
                }
                labelIndex.Add(GetTypeLabel(guess));
                labelIndex.Add(GetTypeLabel(correct));
            }
            if (inCorrect)
            {
                bool prevCorrectEnded = LabeledChunkIdentifier.IsEndOfChunk(prevCorrect, correct);
                bool prevGuessEnded   = LabeledChunkIdentifier.IsEndOfChunk(prevGuess, guess);
                if (prevCorrectEnded && prevGuessEnded && prevGuess.TypeMatches(prevCorrect))
                {
                    inCorrect = false;
                    correctGuesses.IncrementCount(GetTypeLabel(prevCorrect));
                }
                else
                {
                    if (prevCorrectEnded != prevGuessEnded || !guess.TypeMatches(correct))
                    {
                        inCorrect = false;
                    }
                }
            }
            bool correctStarted = LabeledChunkIdentifier.IsStartOfChunk(prevCorrect, correct);
            bool guessStarted   = LabeledChunkIdentifier.IsStartOfChunk(prevGuess, guess);

            if (correctStarted && guessStarted && guess.TypeMatches(correct))
            {
                inCorrect = true;
            }
            if (correctStarted)
            {
                foundCorrect.IncrementCount(GetTypeLabel(correct));
            }
            if (guessStarted)
            {
                foundGuessed.IncrementCount(GetTypeLabel(guess));
            }
            if (chunker.IsIgnoreProvidedTag())
            {
                if (guess.TypeMatches(correct))
                {
                    tokensCorrect++;
                }
            }
            else
            {
                if (guess.label.Equals(correct.label))
                {
                    tokensCorrect++;
                }
            }
            tokensCount++;
            prevGuess   = guess;
            prevCorrect = correct;
        }

Ejemplo n.º 24

Mostrar archivo

Archivo: FactoredLexicon.cs Proyecto: zerouid/Stanford.CoreNLP.NET

        /// <param name="args"/>
        public static void Main(string[] args)
        {
            if (args.Length != 4)
            {
                System.Console.Error.Printf("Usage: java %s language features train_file dev_file%n", typeof(Edu.Stanford.Nlp.Parser.Lexparser.FactoredLexicon).FullName);
                System.Environment.Exit(-1);
            }
            // Command line options
            Language language = Language.ValueOf(args[0]);
            ITreebankLangParserParams tlpp = language.@params;
            Treebank trainTreebank         = tlpp.DiskTreebank();

            trainTreebank.LoadPath(args[2]);
            Treebank devTreebank = tlpp.DiskTreebank();

            devTreebank.LoadPath(args[3]);
            MorphoFeatureSpecification morphoSpec;
            Options options = GetOptions(language);

            if (language.Equals(Language.Arabic))
            {
                morphoSpec = new ArabicMorphoFeatureSpecification();
                string[] languageOptions = new string[] { "-arabicFactored" };
                tlpp.SetOptionFlag(languageOptions, 0);
            }
            else
            {
                if (language.Equals(Language.French))
                {
                    morphoSpec = new FrenchMorphoFeatureSpecification();
                    string[] languageOptions = new string[] { "-frenchFactored" };
                    tlpp.SetOptionFlag(languageOptions, 0);
                }
                else
                {
                    throw new NotSupportedException();
                }
            }
            string featureList = args[1];

            string[] features = featureList.Trim().Split(",");
            foreach (string feature in features)
            {
                morphoSpec.Activate(MorphoFeatureSpecification.MorphoFeatureType.ValueOf(feature));
            }
            System.Console.Out.WriteLine("Language: " + language.ToString());
            System.Console.Out.WriteLine("Features: " + args[1]);
            // Create word and tag indices
            // Save trees in a collection since the interface requires that....
            System.Console.Out.Write("Loading training trees...");
            IList <Tree>    trainTrees = new List <Tree>(19000);
            IIndex <string> wordIndex  = new HashIndex <string>();
            IIndex <string> tagIndex   = new HashIndex <string>();

            foreach (Tree tree in trainTreebank)
            {
                foreach (Tree subTree in tree)
                {
                    if (!subTree.IsLeaf())
                    {
                        tlpp.TransformTree(subTree, tree);
                    }
                }
                trainTrees.Add(tree);
            }
            System.Console.Out.Printf("Done! (%d trees)%n", trainTrees.Count);
            // Setup and train the lexicon.
            System.Console.Out.Write("Collecting sufficient statistics for lexicon...");
            Edu.Stanford.Nlp.Parser.Lexparser.FactoredLexicon lexicon = new Edu.Stanford.Nlp.Parser.Lexparser.FactoredLexicon(options, morphoSpec, wordIndex, tagIndex);
            lexicon.InitializeTraining(trainTrees.Count);
            lexicon.Train(trainTrees, null);
            lexicon.FinishTraining();
            System.Console.Out.WriteLine("Done!");
            trainTrees = null;
            // Load the tuning set
            System.Console.Out.Write("Loading tuning set...");
            IList <FactoredLexiconEvent> tuningSet = GetTuningSet(devTreebank, lexicon, tlpp);

            System.Console.Out.Printf("...Done! (%d events)%n", tuningSet.Count);
            // Print the probabilities that we obtain
            // TODO(spenceg): Implement tagging accuracy with FactLex
            int nCorrect             = 0;
            ICounter <string> errors = new ClassicCounter <string>();

            foreach (FactoredLexiconEvent @event in tuningSet)
            {
                IEnumerator <IntTaggedWord> itr = lexicon.RuleIteratorByWord(@event.Word(), @event.GetLoc(), @event.FeatureStr());
                ICounter <int> logScores        = new ClassicCounter <int>();
                bool           noRules          = true;
                int            goldTagId        = -1;
                while (itr.MoveNext())
                {
                    noRules = false;
                    IntTaggedWord iTW = itr.Current;
                    if (iTW.Tag() == @event.TagId())
                    {
                        log.Info("GOLD-");
                        goldTagId = iTW.Tag();
                    }
                    float tagScore = lexicon.Score(iTW, @event.GetLoc(), @event.Word(), @event.FeatureStr());
                    logScores.IncrementCount(iTW.Tag(), tagScore);
                }
                if (noRules)
                {
                    System.Console.Error.Printf("NO TAGGINGS: %s %s%n", @event.Word(), @event.FeatureStr());
                }
                else
                {
                    // Score the tagging
                    int hypTagId = Counters.Argmax(logScores);
                    if (hypTagId == goldTagId)
                    {
                        ++nCorrect;
                    }
                    else
                    {
                        string goldTag = goldTagId < 0 ? "UNSEEN" : lexicon.tagIndex.Get(goldTagId);
                        errors.IncrementCount(goldTag);
                    }
                }
                log.Info();
            }
            // Output accuracy
            double acc = (double)nCorrect / (double)tuningSet.Count;

            System.Console.Error.Printf("%n%nACCURACY: %.2f%n%n", acc * 100.0);
            log.Info("% of errors by type:");
            IList <string> biggestKeys = new List <string>(errors.KeySet());

            biggestKeys.Sort(Counters.ToComparator(errors, false, true));
            Counters.Normalize(errors);
            foreach (string key in biggestKeys)
            {
                System.Console.Error.Printf("%s\t%.2f%n", key, errors.GetCount(key) * 100.0);
            }
        }

Ejemplo n.º 25

Mostrar archivo

        /* some documentation for Roger's convenience
         * {pcfg,dep,combo}{PE,DE,TE} are precision/dep/tagging evals for the models
         *
         * parser is the PCFG parser
         * dparser is the dependency parser
         * bparser is the combining parser
         *
         * during testing:
         * tree is the test tree (gold tree)
         * binaryTree is the gold tree binarized
         * tree2b is the best PCFG paser, binarized
         * tree2 is the best PCFG parse (debinarized)
         * tree3 is the dependency parse, binarized
         * tree3db is the dependency parser, debinarized
         * tree4 is the best combo parse, binarized and then debinarized
         * tree4b is the best combo parse, binarized
         */
        public static void Main(string[] args)
        {
            Options op = new Options(new EnglishTreebankParserParams());

            // op.tlpParams may be changed to something else later, so don't use it till
            // after options are parsed.
            StringUtils.LogInvocationString(log, args);
            string path          = "/u/nlp/stuff/corpora/Treebank3/parsed/mrg/wsj";
            int    trainLow      = 200;
            int    trainHigh     = 2199;
            int    testLow       = 2200;
            int    testHigh      = 2219;
            string serializeFile = null;
            int    i             = 0;

            while (i < args.Length && args[i].StartsWith("-"))
            {
                if (Sharpen.Runtime.EqualsIgnoreCase(args[i], "-path") && (i + 1 < args.Length))
                {
                    path = args[i + 1];
                    i   += 2;
                }
                else
                {
                    if (Sharpen.Runtime.EqualsIgnoreCase(args[i], "-train") && (i + 2 < args.Length))
                    {
                        trainLow  = System.Convert.ToInt32(args[i + 1]);
                        trainHigh = System.Convert.ToInt32(args[i + 2]);
                        i        += 3;
                    }
                    else
                    {
                        if (Sharpen.Runtime.EqualsIgnoreCase(args[i], "-test") && (i + 2 < args.Length))
                        {
                            testLow  = System.Convert.ToInt32(args[i + 1]);
                            testHigh = System.Convert.ToInt32(args[i + 2]);
                            i       += 3;
                        }
                        else
                        {
                            if (Sharpen.Runtime.EqualsIgnoreCase(args[i], "-serialize") && (i + 1 < args.Length))
                            {
                                serializeFile = args[i + 1];
                                i            += 2;
                            }
                            else
                            {
                                if (Sharpen.Runtime.EqualsIgnoreCase(args[i], "-tLPP") && (i + 1 < args.Length))
                                {
                                    try
                                    {
                                        op.tlpParams = (ITreebankLangParserParams)System.Activator.CreateInstance(Sharpen.Runtime.GetType(args[i + 1]));
                                    }
                                    catch (TypeLoadException e)
                                    {
                                        log.Info("Class not found: " + args[i + 1]);
                                        throw new Exception(e);
                                    }
                                    catch (InstantiationException e)
                                    {
                                        log.Info("Couldn't instantiate: " + args[i + 1] + ": " + e.ToString());
                                        throw new Exception(e);
                                    }
                                    catch (MemberAccessException e)
                                    {
                                        log.Info("illegal access" + e);
                                        throw new Exception(e);
                                    }
                                    i += 2;
                                }
                                else
                                {
                                    if (args[i].Equals("-encoding"))
                                    {
                                        // sets encoding for TreebankLangParserParams
                                        op.tlpParams.SetInputEncoding(args[i + 1]);
                                        op.tlpParams.SetOutputEncoding(args[i + 1]);
                                        i += 2;
                                    }
                                    else
                                    {
                                        i = op.SetOptionOrWarn(args, i);
                                    }
                                }
                            }
                        }
                    }
                }
            }
            // System.out.println(tlpParams.getClass());
            ITreebankLanguagePack tlp = op.tlpParams.TreebankLanguagePack();

            op.trainOptions.sisterSplitters = Generics.NewHashSet(Arrays.AsList(op.tlpParams.SisterSplitters()));
            //    BinarizerFactory.TreeAnnotator.setTreebankLang(tlpParams);
            PrintWriter pw = op.tlpParams.Pw();

            op.testOptions.Display();
            op.trainOptions.Display();
            op.Display();
            op.tlpParams.Display();
            // setup tree transforms
            Treebank       trainTreebank = op.tlpParams.MemoryTreebank();
            MemoryTreebank testTreebank  = op.tlpParams.TestMemoryTreebank();

            // Treebank blippTreebank = ((EnglishTreebankParserParams) tlpParams).diskTreebank();
            // String blippPath = "/afs/ir.stanford.edu/data/linguistic-data/BLLIP-WSJ/";
            // blippTreebank.loadPath(blippPath, "", true);
            Timing.StartTime();
            log.Info("Reading trees...");
            testTreebank.LoadPath(path, new NumberRangeFileFilter(testLow, testHigh, true));
            if (op.testOptions.increasingLength)
            {
                testTreebank.Sort(new TreeLengthComparator());
            }
            trainTreebank.LoadPath(path, new NumberRangeFileFilter(trainLow, trainHigh, true));
            Timing.Tick("done.");
            log.Info("Binarizing trees...");
            TreeAnnotatorAndBinarizer binarizer;

            if (!op.trainOptions.leftToRight)
            {
                binarizer = new TreeAnnotatorAndBinarizer(op.tlpParams, op.forceCNF, !op.trainOptions.OutsideFactor(), true, op);
            }
            else
            {
                binarizer = new TreeAnnotatorAndBinarizer(op.tlpParams.HeadFinder(), new LeftHeadFinder(), op.tlpParams, op.forceCNF, !op.trainOptions.OutsideFactor(), true, op);
            }
            CollinsPuncTransformer collinsPuncTransformer = null;

            if (op.trainOptions.collinsPunc)
            {
                collinsPuncTransformer = new CollinsPuncTransformer(tlp);
            }
            ITreeTransformer debinarizer      = new Debinarizer(op.forceCNF);
            IList <Tree>     binaryTrainTrees = new List <Tree>();

            if (op.trainOptions.selectiveSplit)
            {
                op.trainOptions.splitters = ParentAnnotationStats.GetSplitCategories(trainTreebank, op.trainOptions.tagSelectiveSplit, 0, op.trainOptions.selectiveSplitCutOff, op.trainOptions.tagSelectiveSplitCutOff, op.tlpParams.TreebankLanguagePack());
                if (op.trainOptions.deleteSplitters != null)
                {
                    IList <string> deleted = new List <string>();
                    foreach (string del in op.trainOptions.deleteSplitters)
                    {
                        string baseDel    = tlp.BasicCategory(del);
                        bool   checkBasic = del.Equals(baseDel);
                        for (IEnumerator <string> it = op.trainOptions.splitters.GetEnumerator(); it.MoveNext();)
                        {
                            string elem     = it.Current;
                            string baseElem = tlp.BasicCategory(elem);
                            bool   delStr   = checkBasic && baseElem.Equals(baseDel) || elem.Equals(del);
                            if (delStr)
                            {
                                it.Remove();
                                deleted.Add(elem);
                            }
                        }
                    }
                    log.Info("Removed from vertical splitters: " + deleted);
                }
            }
            if (op.trainOptions.selectivePostSplit)
            {
                ITreeTransformer myTransformer = new TreeAnnotator(op.tlpParams.HeadFinder(), op.tlpParams, op);
                Treebank         annotatedTB   = trainTreebank.Transform(myTransformer);
                op.trainOptions.postSplitters = ParentAnnotationStats.GetSplitCategories(annotatedTB, true, 0, op.trainOptions.selectivePostSplitCutOff, op.trainOptions.tagSelectivePostSplitCutOff, op.tlpParams.TreebankLanguagePack());
            }
            if (op.trainOptions.hSelSplit)
            {
                binarizer.SetDoSelectiveSplit(false);
                foreach (Tree tree in trainTreebank)
                {
                    if (op.trainOptions.collinsPunc)
                    {
                        tree = collinsPuncTransformer.TransformTree(tree);
                    }
                    //tree.pennPrint(tlpParams.pw());
                    tree = binarizer.TransformTree(tree);
                }
                //binaryTrainTrees.add(tree);
                binarizer.SetDoSelectiveSplit(true);
            }
            foreach (Tree tree_1 in trainTreebank)
            {
                if (op.trainOptions.collinsPunc)
                {
                    tree_1 = collinsPuncTransformer.TransformTree(tree_1);
                }
                tree_1 = binarizer.TransformTree(tree_1);
                binaryTrainTrees.Add(tree_1);
            }
            if (op.testOptions.verbose)
            {
                binarizer.DumpStats();
            }
            IList <Tree> binaryTestTrees = new List <Tree>();

            foreach (Tree tree_2 in testTreebank)
            {
                if (op.trainOptions.collinsPunc)
                {
                    tree_2 = collinsPuncTransformer.TransformTree(tree_2);
                }
                tree_2 = binarizer.TransformTree(tree_2);
                binaryTestTrees.Add(tree_2);
            }
            Timing.Tick("done.");
            // binarization
            BinaryGrammar      bg = null;
            UnaryGrammar       ug = null;
            IDependencyGrammar dg = null;
            // DependencyGrammar dgBLIPP = null;
            ILexicon        lex        = null;
            IIndex <string> stateIndex = new HashIndex <string>();
            // extract grammars
            IExtractor <Pair <UnaryGrammar, BinaryGrammar> > bgExtractor = new BinaryGrammarExtractor(op, stateIndex);

            //Extractor bgExtractor = new SmoothedBinaryGrammarExtractor();//new BinaryGrammarExtractor();
            // Extractor lexExtractor = new LexiconExtractor();
            //Extractor dgExtractor = new DependencyMemGrammarExtractor();
            if (op.doPCFG)
            {
                log.Info("Extracting PCFG...");
                Pair <UnaryGrammar, BinaryGrammar> bgug = null;
                if (op.trainOptions.cheatPCFG)
                {
                    IList <Tree> allTrees = new List <Tree>(binaryTrainTrees);
                    Sharpen.Collections.AddAll(allTrees, binaryTestTrees);
                    bgug = bgExtractor.Extract(allTrees);
                }
                else
                {
                    bgug = bgExtractor.Extract(binaryTrainTrees);
                }
                bg = bgug.second;
                bg.SplitRules();
                ug = bgug.first;
                ug.PurgeRules();
                Timing.Tick("done.");
            }
            log.Info("Extracting Lexicon...");
            IIndex <string> wordIndex = new HashIndex <string>();
            IIndex <string> tagIndex  = new HashIndex <string>();

            lex = op.tlpParams.Lex(op, wordIndex, tagIndex);
            lex.InitializeTraining(binaryTrainTrees.Count);
            lex.Train(binaryTrainTrees);
            lex.FinishTraining();
            Timing.Tick("done.");
            if (op.doDep)
            {
                log.Info("Extracting Dependencies...");
                binaryTrainTrees.Clear();
                IExtractor <IDependencyGrammar> dgExtractor = new MLEDependencyGrammarExtractor(op, wordIndex, tagIndex);
                // dgBLIPP = (DependencyGrammar) dgExtractor.extract(new ConcatenationIterator(trainTreebank.iterator(),blippTreebank.iterator()),new TransformTreeDependency(tlpParams,true));
                // DependencyGrammar dg1 = dgExtractor.extract(trainTreebank.iterator(), new TransformTreeDependency(op.tlpParams, true));
                //dgBLIPP=(DependencyGrammar)dgExtractor.extract(blippTreebank.iterator(),new TransformTreeDependency(tlpParams));
                //dg = (DependencyGrammar) dgExtractor.extract(new ConcatenationIterator(trainTreebank.iterator(),blippTreebank.iterator()),new TransformTreeDependency(tlpParams));
                // dg=new DependencyGrammarCombination(dg1,dgBLIPP,2);
                dg = dgExtractor.Extract(binaryTrainTrees);
                //uses information whether the words are known or not, discards unknown words
                Timing.Tick("done.");
                //System.out.print("Extracting Unknown Word Model...");
                //UnknownWordModel uwm = (UnknownWordModel)uwmExtractor.extract(binaryTrainTrees);
                //Timing.tick("done.");
                System.Console.Out.Write("Tuning Dependency Model...");
                dg.Tune(binaryTestTrees);
                //System.out.println("TUNE DEPS: "+tuneDeps);
                Timing.Tick("done.");
            }
            BinaryGrammar      boundBG = bg;
            UnaryGrammar       boundUG = ug;
            IGrammarProjection gp      = new NullGrammarProjection(bg, ug);

            // serialization
            if (serializeFile != null)
            {
                log.Info("Serializing parser...");
                LexicalizedParser parser = new LexicalizedParser(lex, bg, ug, dg, stateIndex, wordIndex, tagIndex, op);
                parser.SaveParserToSerialized(serializeFile);
                Timing.Tick("done.");
            }
            // test: pcfg-parse and output
            ExhaustivePCFGParser parser_1 = null;

            if (op.doPCFG)
            {
                parser_1 = new ExhaustivePCFGParser(boundBG, boundUG, lex, op, stateIndex, wordIndex, tagIndex);
            }
            ExhaustiveDependencyParser dparser = ((op.doDep && !op.testOptions.useFastFactored) ? new ExhaustiveDependencyParser(dg, lex, op, wordIndex, tagIndex) : null);
            IScorer scorer = (op.doPCFG ? new TwinScorer(new ProjectionScorer(parser_1, gp, op), dparser) : null);
            //Scorer scorer = parser;
            BiLexPCFGParser bparser = null;

            if (op.doPCFG && op.doDep)
            {
                bparser = (op.testOptions.useN5) ? new BiLexPCFGParser.N5BiLexPCFGParser(scorer, parser_1, dparser, bg, ug, dg, lex, op, gp, stateIndex, wordIndex, tagIndex) : new BiLexPCFGParser(scorer, parser_1, dparser, bg, ug, dg, lex, op, gp, stateIndex
                                                                                                                                                                                                    , wordIndex, tagIndex);
            }
            Evalb        pcfgPE         = new Evalb("pcfg  PE", true);
            Evalb        comboPE        = new Evalb("combo PE", true);
            AbstractEval pcfgCB         = new Evalb.CBEval("pcfg  CB", true);
            AbstractEval pcfgTE         = new TaggingEval("pcfg  TE");
            AbstractEval comboTE        = new TaggingEval("combo TE");
            AbstractEval pcfgTEnoPunct  = new TaggingEval("pcfg nopunct TE");
            AbstractEval comboTEnoPunct = new TaggingEval("combo nopunct TE");
            AbstractEval depTE          = new TaggingEval("depnd TE");
            AbstractEval depDE          = new UnlabeledAttachmentEval("depnd DE", true, null, tlp.PunctuationWordRejectFilter());
            AbstractEval comboDE        = new UnlabeledAttachmentEval("combo DE", true, null, tlp.PunctuationWordRejectFilter());

            if (op.testOptions.evalb)
            {
                EvalbFormatWriter.InitEVALBfiles(op.tlpParams);
            }
            // int[] countByLength = new int[op.testOptions.maxLength+1];
            // Use a reflection ruse, so one can run this without needing the
            // tagger.  Using a function rather than a MaxentTagger means we
            // can distribute a version of the parser that doesn't include the
            // entire tagger.
            IFunction <IList <IHasWord>, List <TaggedWord> > tagger = null;

            if (op.testOptions.preTag)
            {
                try
                {
                    Type[]   argsClass = new Type[] { typeof(string) };
                    object[] arguments = new object[] { op.testOptions.taggerSerializedFile };
                    tagger = (IFunction <IList <IHasWord>, List <TaggedWord> >)Sharpen.Runtime.GetType("edu.stanford.nlp.tagger.maxent.MaxentTagger").GetConstructor(argsClass).NewInstance(arguments);
                }
                catch (Exception e)
                {
                    log.Info(e);
                    log.Info("Warning: No pretagging of sentences will be done.");
                }
            }
            for (int tNum = 0; tNum < ttSize; tNum++)
            {
                Tree tree        = testTreebank[tNum];
                int  testTreeLen = tree_2.Yield().Count;
                if (testTreeLen > op.testOptions.maxLength)
                {
                    continue;
                }
                Tree binaryTree = binaryTestTrees[tNum];
                // countByLength[testTreeLen]++;
                System.Console.Out.WriteLine("-------------------------------------");
                System.Console.Out.WriteLine("Number: " + (tNum + 1));
                System.Console.Out.WriteLine("Length: " + testTreeLen);
                //tree.pennPrint(pw);
                // System.out.println("XXXX The binary tree is");
                // binaryTree.pennPrint(pw);
                //System.out.println("Here are the tags in the lexicon:");
                //System.out.println(lex.showTags());
                //System.out.println("Here's the tagnumberer:");
                //System.out.println(Numberer.getGlobalNumberer("tags").toString());
                long timeMil1 = Runtime.CurrentTimeMillis();
                Timing.Tick("Starting parse.");
                if (op.doPCFG)
                {
                    //log.info(op.testOptions.forceTags);
                    if (op.testOptions.forceTags)
                    {
                        if (tagger != null)
                        {
                            //System.out.println("Using a tagger to set tags");
                            //System.out.println("Tagged sentence as: " + tagger.processSentence(cutLast(wordify(binaryTree.yield()))).toString(false));
                            parser_1.Parse(AddLast(tagger.Apply(CutLast(Wordify(binaryTree.Yield())))));
                        }
                        else
                        {
                            //System.out.println("Forcing tags to match input.");
                            parser_1.Parse(CleanTags(binaryTree.TaggedYield(), tlp));
                        }
                    }
                    else
                    {
                        // System.out.println("XXXX Parsing " + binaryTree.yield());
                        parser_1.Parse(binaryTree.YieldHasWord());
                    }
                }
                //Timing.tick("Done with pcfg phase.");
                if (op.doDep)
                {
                    dparser.Parse(binaryTree.YieldHasWord());
                }
                //Timing.tick("Done with dependency phase.");
                bool bothPassed = false;
                if (op.doPCFG && op.doDep)
                {
                    bothPassed = bparser.Parse(binaryTree.YieldHasWord());
                }
                //Timing.tick("Done with combination phase.");
                long timeMil2 = Runtime.CurrentTimeMillis();
                long elapsed  = timeMil2 - timeMil1;
                log.Info("Time: " + ((int)(elapsed / 100)) / 10.00 + " sec.");
                //System.out.println("PCFG Best Parse:");
                Tree tree2b = null;
                Tree tree2  = null;
                //System.out.println("Got full best parse...");
                if (op.doPCFG)
                {
                    tree2b = parser_1.GetBestParse();
                    tree2  = debinarizer.TransformTree(tree2b);
                }
                //System.out.println("Debinarized parse...");
                //tree2.pennPrint();
                //System.out.println("DepG Best Parse:");
                Tree tree3   = null;
                Tree tree3db = null;
                if (op.doDep)
                {
                    tree3 = dparser.GetBestParse();
                    // was: but wrong Tree tree3db = debinarizer.transformTree(tree2);
                    tree3db = debinarizer.TransformTree(tree3);
                    tree3.PennPrint(pw);
                }
                //tree.pennPrint();
                //((Tree)binaryTrainTrees.get(tNum)).pennPrint();
                //System.out.println("Combo Best Parse:");
                Tree tree4 = null;
                if (op.doPCFG && op.doDep)
                {
                    try
                    {
                        tree4 = bparser.GetBestParse();
                        if (tree4 == null)
                        {
                            tree4 = tree2b;
                        }
                    }
                    catch (ArgumentNullException)
                    {
                        log.Info("Blocked, using PCFG parse!");
                        tree4 = tree2b;
                    }
                }
                if (op.doPCFG && !bothPassed)
                {
                    tree4 = tree2b;
                }
                //tree4.pennPrint();
                if (op.doDep)
                {
                    depDE.Evaluate(tree3, binaryTree, pw);
                    depTE.Evaluate(tree3db, tree_2, pw);
                }
                ITreeTransformer tc      = op.tlpParams.Collinizer();
                ITreeTransformer tcEvalb = op.tlpParams.CollinizerEvalb();
                if (op.doPCFG)
                {
                    // System.out.println("XXXX Best PCFG was: ");
                    // tree2.pennPrint();
                    // System.out.println("XXXX Transformed best PCFG is: ");
                    // tc.transformTree(tree2).pennPrint();
                    //System.out.println("True Best Parse:");
                    //tree.pennPrint();
                    //tc.transformTree(tree).pennPrint();
                    pcfgPE.Evaluate(tc.TransformTree(tree2), tc.TransformTree(tree_2), pw);
                    pcfgCB.Evaluate(tc.TransformTree(tree2), tc.TransformTree(tree_2), pw);
                    Tree tree4b = null;
                    if (op.doDep)
                    {
                        comboDE.Evaluate((bothPassed ? tree4 : tree3), binaryTree, pw);
                        tree4b = tree4;
                        tree4  = debinarizer.TransformTree(tree4);
                        if (op.nodePrune)
                        {
                            NodePruner np = new NodePruner(parser_1, debinarizer);
                            tree4 = np.Prune(tree4);
                        }
                        //tree4.pennPrint();
                        comboPE.Evaluate(tc.TransformTree(tree4), tc.TransformTree(tree_2), pw);
                    }
                    //pcfgTE.evaluate(tree2, tree);
                    pcfgTE.Evaluate(tcEvalb.TransformTree(tree2), tcEvalb.TransformTree(tree_2), pw);
                    pcfgTEnoPunct.Evaluate(tc.TransformTree(tree2), tc.TransformTree(tree_2), pw);
                    if (op.doDep)
                    {
                        comboTE.Evaluate(tcEvalb.TransformTree(tree4), tcEvalb.TransformTree(tree_2), pw);
                        comboTEnoPunct.Evaluate(tc.TransformTree(tree4), tc.TransformTree(tree_2), pw);
                    }
                    System.Console.Out.WriteLine("PCFG only: " + parser_1.ScoreBinarizedTree(tree2b, 0));
                    //tc.transformTree(tree2).pennPrint();
                    tree2.PennPrint(pw);
                    if (op.doDep)
                    {
                        System.Console.Out.WriteLine("Combo: " + parser_1.ScoreBinarizedTree(tree4b, 0));
                        // tc.transformTree(tree4).pennPrint(pw);
                        tree4.PennPrint(pw);
                    }
                    System.Console.Out.WriteLine("Correct:" + parser_1.ScoreBinarizedTree(binaryTree, 0));

                    /*
                     * if (parser.scoreBinarizedTree(tree2b,true) < parser.scoreBinarizedTree(binaryTree,true)) {
                     * System.out.println("SCORE INVERSION");
                     * parser.validateBinarizedTree(binaryTree,0);
                     * }
                     */
                    tree_2.PennPrint(pw);
                }
                // end if doPCFG
                if (op.testOptions.evalb)
                {
                    if (op.doPCFG && op.doDep)
                    {
                        EvalbFormatWriter.WriteEVALBline(tcEvalb.TransformTree(tree_2), tcEvalb.TransformTree(tree4));
                    }
                    else
                    {
                        if (op.doPCFG)
                        {
                            EvalbFormatWriter.WriteEVALBline(tcEvalb.TransformTree(tree_2), tcEvalb.TransformTree(tree2));
                        }
                        else
                        {
                            if (op.doDep)
                            {
                                EvalbFormatWriter.WriteEVALBline(tcEvalb.TransformTree(tree_2), tcEvalb.TransformTree(tree3db));
                            }
                        }
                    }
                }
            }
            // end for each tree in test treebank
            if (op.testOptions.evalb)
            {
                EvalbFormatWriter.CloseEVALBfiles();
            }
            // op.testOptions.display();
            if (op.doPCFG)
            {
                pcfgPE.Display(false, pw);
                System.Console.Out.WriteLine("Grammar size: " + stateIndex.Size());
                pcfgCB.Display(false, pw);
                if (op.doDep)
                {
                    comboPE.Display(false, pw);
                }
                pcfgTE.Display(false, pw);
                pcfgTEnoPunct.Display(false, pw);
                if (op.doDep)
                {
                    comboTE.Display(false, pw);
                    comboTEnoPunct.Display(false, pw);
                }
            }
            if (op.doDep)
            {
                depTE.Display(false, pw);
                depDE.Display(false, pw);
            }
            if (op.doPCFG && op.doDep)
            {
                comboDE.Display(false, pw);
            }
        }

Ejemplo n.º 26

Mostrar archivo

Archivo: main.cs Proyecto: Luffyyy/DieselBundleViewer-Scripts

 public void execute()
 {
     HashIndex.GenerateHashList("./FullHashlist");
 }

Ejemplo n.º 27

Mostrar archivo

    private void ProcessFile(string path)
    {
        Idstring p_ids = HashIndex.Get(Path.GetFileNameWithoutExtension(path));
        var      t_ids = new Tuple <Idstring, Idstring, Idstring>(p_ids, new Idstring(0), HashIndex.Get(Path.GetExtension(path)));

        if (!this._browser.RawFiles.ContainsKey(t_ids))
        {
            this.error_output.Write(string.Format("File with path {0} does not exist!\n", path));
            this.error_output.Flush();
            return;
        }
        FileEntry file = this._browser.RawFiles[t_ids];

        if (file.BundleEntries.Count == 0 || this.ExtractedPaths.Contains(p_ids))
        {
            return;
        }

        try
        {
            if (Path.GetExtension(path) == ".object")
            {
                string   model_file = Path.Combine(Path.GetDirectoryName(path), Path.GetFileNameWithoutExtension(path)).Replace("\\", "/");
                Idstring m_ids      = HashIndex.Get(model_file);
                //error_output.WriteLine(string.Format("Attempt to ouput model file {0}", model_file));
                var t_m_ids = new Tuple <Idstring, Idstring, Idstring>(m_ids, new Idstring(0), HashIndex.Get("model"));
                if (this._browser.RawFiles.ContainsKey(t_m_ids))
                {
                    this.WriteFile(this._browser.RawFiles[t_m_ids]);
                }

                string   cooked_physics = Path.Combine(Path.GetDirectoryName(path), Path.GetFileNameWithoutExtension(path)).Replace("\\", "/");
                Idstring c_ids          = HashIndex.Get(cooked_physics);
                var      t_c_ids        = new Tuple <Idstring, Idstring, Idstring>(c_ids, new Idstring(0), HashIndex.Get("cooked_physics"));
                //error_output.WriteLine(string.Format("Attempt to ouput cooked_physics file {0}", cooked_physics));
                if (this._browser.RawFiles.ContainsKey(t_c_ids))
                {
                    this.WriteFile(this._browser.RawFiles[t_c_ids]);
                }
            }

            if (this.FileProcessors.ContainsKey(file._extension.ToString()))
            {
                this.FileProcessors[file._extension.ToString()].Invoke(file);
            }
            else
            {
                this.WriteFile(file);
            }
        }
        catch (Exception exc)
        {
            this.error_output.Write("Exception occured on file: {0}\n", file.Path);
            this.error_output.Write(exc.Message + "\n");
            this.error_output.Write(exc.StackTrace + "\n");
            this.error_output.Flush();
        }
    }

Ejemplo n.º 28

Mostrar archivo

        /// <summary>
        /// Provides some testing and opportunities for exploration of the
        /// probabilities of a BaseLexicon.
        /// </summary>
        /// <remarks>
        /// Provides some testing and opportunities for exploration of the
        /// probabilities of a BaseLexicon.  What's here currently probably
        /// only works for the English Penn Treeebank, as it uses default
        /// constructors.  Of the words given to test on,
        /// the first is treated as sentence initial, and the rest as not
        /// sentence initial.
        /// </remarks>
        /// <param name="args">
        /// The command line arguments:
        /// java BaseLexicon treebankPath fileRange unknownWordModel words
        /// </param>
        public static void Main(string[] args)
        {
            if (args.Length < 3)
            {
                log.Info("java BaseLexicon treebankPath fileRange unknownWordModel words*");
                return;
            }
            System.Console.Out.Write("Training BaseLexicon from " + args[0] + ' ' + args[1] + " ... ");
            Treebank tb = new DiskTreebank();

            tb.LoadPath(args[0], new NumberRangesFileFilter(args[1], true));
            // TODO: change this interface so the lexicon creates its own indices?
            IIndex <string> wordIndex = new HashIndex <string>();
            IIndex <string> tagIndex  = new HashIndex <string>();
            Options         op        = new Options();

            op.lexOptions.useUnknownWordSignatures = System.Convert.ToInt32(args[2]);
            Edu.Stanford.Nlp.Parser.Lexparser.BaseLexicon lex = new Edu.Stanford.Nlp.Parser.Lexparser.BaseLexicon(op, wordIndex, tagIndex);
            lex.InitializeTraining(tb.Count);
            lex.Train(tb);
            lex.FinishTraining();
            System.Console.Out.WriteLine("done.");
            System.Console.Out.WriteLine();
            NumberFormat nf = NumberFormat.GetNumberInstance();

            nf.SetMaximumFractionDigits(4);
            IList <string> impos = new List <string>();

            for (int i = 3; i < args.Length; i++)
            {
                if (lex.IsKnown(args[i]))
                {
                    System.Console.Out.WriteLine(args[i] + " is a known word.  Log probabilities [log P(w|t)] for its taggings are:");
                    for (IEnumerator <IntTaggedWord> it = lex.RuleIteratorByWord(wordIndex.AddToIndex(args[i]), i - 3, null); it.MoveNext();)
                    {
                        IntTaggedWord iTW = it.Current;
                        System.Console.Out.WriteLine(StringUtils.Pad(iTW, 24) + nf.Format(lex.Score(iTW, i - 3, wordIndex.Get(iTW.word), null)));
                    }
                }
                else
                {
                    string sig = lex.GetUnknownWordModel().GetSignature(args[i], i - 3);
                    System.Console.Out.WriteLine(args[i] + " is an unknown word.  Signature with uwm " + lex.GetUnknownWordModel().GetUnknownLevel() + ((i == 3) ? " init" : "non-init") + " is: " + sig);
                    impos.Clear();
                    IList <string> lis = new List <string>(tagIndex.ObjectsList());
                    lis.Sort();
                    foreach (string tStr in lis)
                    {
                        IntTaggedWord iTW   = new IntTaggedWord(args[i], tStr, wordIndex, tagIndex);
                        double        score = lex.Score(iTW, 1, args[i], null);
                        if (score == float.NegativeInfinity)
                        {
                            impos.Add(tStr);
                        }
                        else
                        {
                            System.Console.Out.WriteLine(StringUtils.Pad(iTW, 24) + nf.Format(score));
                        }
                    }
                    if (impos.Count > 0)
                    {
                        System.Console.Out.WriteLine(args[i] + " impossible tags: " + impos);
                    }
                }
                System.Console.Out.WriteLine();
            }
        }

Ejemplo n.º 29

Mostrar archivo

 public void execute(PackageBrowser browser)
 {
     HashIndex.GenerateHashList("./FullHashlist");
 }

Ejemplo n.º 30

Mostrar archivo

        private string GetString(ulong fullHash, HashIndex fileHashIndex, byte[] Data)
        {
            var folderHash = fileHashIndex.Parent.hash;
            var fileHash   = fileHashIndex.hash;

            bool hasFolderHash = false;

            string folder = "";

            if (HashList.ContainsKey(folderHash))
            {
                hasFolderHash = true;
                folder        = $"{HashList[folderHash]}/";
            }

            if (!hasFolderHash)
            {
                folder = $"{folderHash.ToString("X")}/";
            }



            string ext = FindMatch(Data);

            if (ext == ".bntx" || ext == ".bfres" || ext == ".bnsh" || ext == ".bfsha")
            {
                string fileName = GetBinaryHeaderName(Data);
                //Check for matches for shaders
                if (ext == ".bnsh")
                {
                    if (FNV64A1.Calculate($"{fileName}.bnsh_fsh") == fileHash)
                    {
                        fileName = $"{fileName}.bnsh_fsh";
                    }
                    else if (FNV64A1.Calculate($"{fileName}.bnsh_vsh") == fileHash)
                    {
                        fileName = $"{fileName}.bnsh_vsh";
                    }
                }
                else
                {
                    fileName = $"{fileName}{ext}";
                }

                if (hasFolderHash)
                {
                    return($"{folder}{fileName}");
                }
                else
                {
                    return($"{folder}{fileName}[FullHash={fullHash.ToString("X")}]{ext}");
                }
            }
            else
            {
                if (HashList.ContainsKey(fileHash))
                {
                    if (hasFolderHash)
                    {
                        return($"{folder}{HashList[fileHash]}");
                    }
                    else
                    {
                        return($"{folder}{HashList[fileHash]}[FullHash={fullHash.ToString("X")}]{ext}");
                    }
                }
                else
                {
                    return($"{folder}{fileHash.ToString("X")}[FullHash={fullHash.ToString("X")}]{ext}");
                }
            }
        }