/// <summary>Applies a feature count threshold to the RVFDataset.</summary> /// <remarks> /// Applies a feature count threshold to the RVFDataset. All features that /// occur fewer than <i>k</i> times are expunged. /// </remarks> public override void ApplyFeatureCountThreshold(int k) { float[] counts = GetFeatureCounts(); HashIndex <F> newFeatureIndex = new HashIndex <F>(); int[] featMap = new int[featureIndex.Size()]; for (int i = 0; i < featMap.Length; i++) { F feat = featureIndex.Get(i); if (counts[i] >= k) { int newIndex = newFeatureIndex.Count; newFeatureIndex.Add(feat); featMap[i] = newIndex; } else { featMap[i] = -1; } } // featureIndex.remove(feat); featureIndex = newFeatureIndex; // counts = null; // This is unnecessary; JVM can clean it up for (int i_1 = 0; i_1 < size; i_1++) { IList <int> featList = new List <int>(data[i_1].Length); IList <double> valueList = new List <double>(values[i_1].Length); for (int j = 0; j < data[i_1].Length; j++) { if (featMap[data[i_1][j]] >= 0) { featList.Add(featMap[data[i_1][j]]); valueList.Add(values[i_1][j]); } } data[i_1] = new int[featList.Count]; values[i_1] = new double[valueList.Count]; for (int j_1 = 0; j_1 < data[i_1].Length; j_1++) { data[i_1][j_1] = featList[j_1]; values[i_1][j_1] = valueList[j_1]; } } }
public static void ReadHashlistAndLoad(string file, PackageFileEntry be) { using FileStream fs = new FileStream(file, FileMode.Open, FileAccess.Read); using BinaryReader br = new BinaryReader(fs); byte[] data; StringBuilder sb = new StringBuilder(); string[] idstring_data; HashSet <string> new_paths = new HashSet <string>(); fs.Position = be.Address; if (be.Length == -1) { data = br.ReadBytes((int)(fs.Length - fs.Position)); } else { data = br.ReadBytes((int)be.Length); } foreach (byte read in data) { sb.Append((char)read); } idstring_data = sb.ToString().Split('\0'); sb.Clear(); foreach (string idstring in idstring_data) { new_paths.Add(idstring); } new_paths.Add("idstring_lookup"); new_paths.Add("existing_banks"); new_paths.Add("engine-package"); HashIndex.Load(ref new_paths); new_paths.Clear(); br.Close(); }
public void Read(FileReader reader) { string Signature = reader.ReadString(8, Encoding.ASCII); if (Signature != "GFLXPACK") { throw new Exception($"Invalid signature {Signature}! Expected GFLXPACK."); } version = reader.ReadInt32(); uint padding = reader.ReadUInt32(); uint FileCount = reader.ReadUInt32(); FolderCount = reader.ReadInt32(); ulong FileInfoOffset = reader.ReadUInt64(); ulong hashArrayOffset = reader.ReadUInt64(); ulong hashArrayIndexOffset = reader.ReadUInt64(); reader.Seek((long)hashArrayOffset, SeekOrigin.Begin); for (int i = 0; i < FileCount; i++) { ulong hash = reader.ReadUInt64(); hashes.Add(hash); } reader.Seek((long)hashArrayIndexOffset, SeekOrigin.Begin); for (int i = 0; i < FileCount; i++) { HashIndex hashindex = new HashIndex(); hashindex.Read(reader); hashIndices.Add(hashindex); } reader.Seek((long)FileInfoOffset, SeekOrigin.Begin); for (int i = 0; i < FileCount; i++) { FileEntry fileEntry = new FileEntry(); fileEntry.Read(reader); fileEntry.Text = hashes[i].ToString(); Nodes.Add(fileEntry); files.Add(fileEntry); } }
public void execute(PackageBrowser browser) { this.ExtractedPaths = new HashSet <Idstring>(); this.error_output = new StreamWriter("./heist_extractor.log"); this._browser = browser; System.Diagnostics.Stopwatch clock = new System.Diagnostics.Stopwatch(); clock.Start(); this.error_output.Write("Heist Extractor executed" + "\n"); this.error_output.Flush(); Idstring ids = HashIndex.Get(this.heist_world); Idstring ids_ext = HashIndex.Get("world"); var tids = new Tuple <Idstring, Idstring, Idstring>(ids, new Idstring(0), ids_ext); if (browser.RawFiles.ContainsKey(tids)) { this.ProcessWorld(browser.RawFiles[tids]); } else { Console.WriteLine("World File does not exist"); } //this.ProcessFolder(browser.Root); //Path.Combine(Definitions.HashDir, hashlist_tag) using (StreamWriter str = new StreamWriter(new FileStream(Path.Combine(this.OutputPath, "add.xml"), FileMode.Create, FileAccess.Write))) { str.Write("<table>\n"); foreach (Idstring path in this.ExtractedPaths) { string[] split = path.ToString().Split('.'); str.Write(String.Format("\t<{0} path=\"{1}\" force=\"true\"/>\n", split[1], split[0])); } str.Write("</table>\n"); } clock.Stop(); this.error_output.Write("Scrape operation took {0} seconds" + "\n", clock.Elapsed.TotalSeconds.ToString()); this.error_output.Close(); }
public App() { #if !DEBUG Dispatcher.UnhandledException += OnException; if (File.Exists("debug")) #endif AllocConsole(); Console.WriteLine("Loading local hashlist"); if (File.Exists("Data/hashlist")) { HashIndex.LoadParallel("Data/hashlist"); } else { Console.WriteLine("Local hashlist is missing!"); } LoadConverters(); }
protected internal virtual void InitTagBins() { IIndex <string> tagBinIndex = new HashIndex <string>(); tagBin = new int[tagIndex.Size()]; for (int t = 0; t < tagBin.Length; t++) { string tagStr = tagIndex.Get(t); string binStr; if (tagProjection == null) { binStr = tagStr; } else { binStr = tagProjection.Project(tagStr); } tagBin[t] = tagBinIndex.AddToIndex(binStr); } numTagBins = tagBinIndex.Size(); }
public virtual LexicalizedParser GetParserDataFromTreebank(Treebank trainTreebank) { log.Info("Binarizing training trees..."); IList <Tree> binaryTrainTrees = GetAnnotatedBinaryTreebankFromTreebank(trainTreebank); Timing.Tick("done."); IIndex <string> stateIndex = new HashIndex <string>(); log.Info("Extracting PCFG..."); IExtractor <Pair <UnaryGrammar, BinaryGrammar> > bgExtractor = new BinaryGrammarExtractor(op, stateIndex); Pair <UnaryGrammar, BinaryGrammar> bgug = bgExtractor.Extract(binaryTrainTrees); BinaryGrammar bg = bgug.second; bg.SplitRules(); UnaryGrammar ug = bgug.first; ug.PurgeRules(); Timing.Tick("done."); log.Info("Extracting Lexicon..."); IIndex <string> wordIndex = new HashIndex <string>(); IIndex <string> tagIndex = new HashIndex <string>(); ILexicon lex = op.tlpParams.Lex(op, wordIndex, tagIndex); lex.InitializeTraining(binaryTrainTrees.Count); lex.Train(binaryTrainTrees); lex.FinishTraining(); Timing.Tick("done."); IExtractor <IDependencyGrammar> dgExtractor = op.tlpParams.DependencyGrammarExtractor(op, wordIndex, tagIndex); IDependencyGrammar dg = null; if (op.doDep) { log.Info("Extracting Dependencies..."); dg = dgExtractor.Extract(binaryTrainTrees); dg.SetLexicon(lex); Timing.Tick("done."); } log.Info("Done extracting grammars and lexicon."); return(new LexicalizedParser(lex, bg, ug, dg, stateIndex, wordIndex, tagIndex, op)); }
/// <summary>Retains the given features in the Dataset.</summary> /// <remarks> /// Retains the given features in the Dataset. All features that /// do not occur in features are expunged. /// </remarks> public virtual void RetainFeatures(ICollection <F> features) { //float[] counts = getFeatureCounts(); IIndex <F> newFeatureIndex = new HashIndex <F>(); int[] featMap = new int[featureIndex.Size()]; for (int i = 0; i < featMap.Length; i++) { F feat = featureIndex.Get(i); if (features.Contains(feat)) { int newIndex = newFeatureIndex.Size(); newFeatureIndex.Add(feat); featMap[i] = newIndex; } else { featMap[i] = -1; } } // featureIndex.remove(feat); featureIndex = newFeatureIndex; // counts = null; // This is unnecessary; JVM can clean it up for (int i_1 = 0; i_1 < size; i_1++) { IList <int> featList = new List <int>(data[i_1].Length); for (int j = 0; j < data[i_1].Length; j++) { if (featMap[data[i_1][j]] >= 0) { featList.Add(featMap[data[i_1][j]]); } } data[i_1] = new int[featList.Count]; for (int j_1 = 0; j_1 < data[i_1].Length; j_1++) { data[i_1][j_1] = featList[j_1]; } } }
private void ModDetails_Load(object sender, EventArgs e) { this.Text = "Mod Details - " + this._mod.Name; this.ModNameText.Text = this._mod.Name; this.AuthorText.Text = this._mod.Author; this.DescriptionText.Text = this._mod.Description; foreach (BundleRewriteItem item in this._mod.ItemQueue) { BundleRewriteItem newBri = new BundleRewriteItem(); newBri.ReplacementFile = Path.GetFileName(this._mod.file) + "/" + item.ReplacementFile; String sourcefile = ""; String path = HashIndex.GetUnhashed(item.BundlePath); String extension = HashIndex.GetUnhashed(item.BundleExtension); if (!string.IsNullOrEmpty(path) && !string.IsNullOrEmpty(extension)) { if (item.IsLanguageSpecific) { sourcefile = path + "." + item.BundleLanguage + "." + extension; } else { sourcefile = path + "." + extension; } newBri.SourceFile = sourcefile; _items.Add(newBri); } } this.ReplacementFilesGridView.DataSource = _items; this.ReplacementFilesGridView.Update(); }
/*private void ProcessFolder(IParent folder) * { * foreach (IChild child in folder.Children.Values) * { * if (child is FileEntry) * this.ProcessFile(child as FileEntry); * else if (child is IParent) * this.ProcessFolder(child as IParent); * } * }*/ private void WriteFile(FileEntry entry, byte[] byt = null) { Idstring ids = HashIndex.Get(entry.Path); if (entry.BundleEntries.Count == 0 || this.ExtractedPaths.Contains(ids)) { return; } string path = Path.Combine(this.OutputPath, entry.Path); string folder = Path.GetDirectoryName(path); if (!Directory.Exists(folder)) { Directory.CreateDirectory(folder); } byte[] bytes = byt ?? entry.FileBytes() ?? new byte[0]; File.WriteAllBytes(path, bytes); this.ExtractedPaths.Add(ids); }
internal static RVFDatum <string, int> ReadDatum(string[] values, int classColumn, ICollection <int> skip, IDictionary <int, IIndex <string> > indices) { ClassicCounter <int> c = new ClassicCounter <int>(); RVFDatum <string, int> d = new RVFDatum <string, int>(c); int attrNo = 0; for (int index = 0; index < values.Length; index++) { if (index == classColumn) { d.SetLabel(values[index]); continue; } if (skip.Contains(int.Parse(index))) { continue; } int featKey = int.Parse(attrNo); IIndex <string> ind = indices[featKey]; if (ind == null) { ind = new HashIndex <string>(); indices[featKey] = ind; } // MG: condition on isLocked is useless, since add(E) contains such a condition: //if (!ind.isLocked()) { ind.Add(values[index]); //} int valInd = ind.IndexOf(values[index]); if (valInd == -1) { valInd = 0; logger.Info("unknown attribute value " + values[index] + " of attribute " + attrNo); } c.IncrementCount(featKey, valInd); attrNo++; } return(d); }
/// <summary>Removes all features from the dataset that are not in featureSet.</summary> /// <param name="featureSet"/> public virtual void SelectFeaturesFromSet(ICollection <F> featureSet) { HashIndex <F> newFeatureIndex = new HashIndex <F>(); int[] featMap = new int[featureIndex.Size()]; Arrays.Fill(featMap, -1); foreach (F feature in featureSet) { int oldID = featureIndex.IndexOf(feature); if (oldID >= 0) { // it's a valid feature in the index int newID = newFeatureIndex.AddToIndex(feature); featMap[oldID] = newID; } } featureIndex = newFeatureIndex; for (int i = 0; i < size; i++) { IList <int> featList = new List <int>(data[i].Length); IList <double> valueList = new List <double>(values[i].Length); for (int j = 0; j < data[i].Length; j++) { if (featMap[data[i][j]] >= 0) { featList.Add(featMap[data[i][j]]); valueList.Add(values[i][j]); } } data[i] = new int[featList.Count]; values[i] = new double[valueList.Count]; for (int j_1 = 0; j_1 < data[i].Length; j_1++) { data[i][j_1] = featList[j_1]; values[i][j_1] = valueList[j_1]; } } }
/// <summary>Generic method to select features based on the feature scores vector provided as an argument.</summary> /// <param name="numFeatures">number of features to be selected.</param> /// <param name="scores">a vector of size total number of features in the data.</param> public virtual void SelectFeatures(int numFeatures, double[] scores) { IList <ScoredObject <F> > scoredFeatures = new List <ScoredObject <F> >(); for (int i = 0; i < scores.Length; i++) { scoredFeatures.Add(new ScoredObject <F>(featureIndex.Get(i), scores[i])); } scoredFeatures.Sort(ScoredComparator.DescendingComparator); IIndex <F> newFeatureIndex = new HashIndex <F>(); for (int i_1 = 0; i_1 < scoredFeatures.Count && i_1 < numFeatures; i_1++) { newFeatureIndex.Add(scoredFeatures[i_1].Object()); } //logger.info(scoredFeatures.get(i)); for (int i_2 = 0; i_2 < size; i_2++) { int[] newData = new int[data[i_2].Length]; int curIndex = 0; for (int j = 0; j < data[i_2].Length; j++) { int index; if ((index = newFeatureIndex.IndexOf(featureIndex.Get(data[i_2][j]))) != -1) { newData[curIndex++] = index; } } int[] newDataTrimmed = new int[curIndex]; lock (typeof(Runtime)) { System.Array.Copy(newData, 0, newDataTrimmed, 0, curIndex); } data[i_2] = newDataTrimmed; } featureIndex = newFeatureIndex; }
private void ProcessWorld(FileEntry file) { foreach (KeyValuePair <string, IChild> child in file.Parent.Children) { if (child.Value is FileEntry) { this.WriteFile(child.Value as FileEntry); } } this.WriteFile(file); this.ProcessScriptData(file, new List <XMLTagLookup> { new XMLTagLookup { node_name = "environment_values", value = new[] { "environment" }, Converter = (hash) => { return(hash + ".environment"); } } }); string continents_file = Path.Combine(Path.GetDirectoryName(file.Path), "continents").Replace("\\", "/"); Idstring ids = HashIndex.Get(continents_file); var t_ids = new Tuple <Idstring, Idstring, Idstring>(ids, new Idstring(0), HashIndex.Get("continents")); if (this._browser.RawFiles.ContainsKey(t_ids)) { FileEntry c_file = this._browser.RawFiles[t_ids]; this.WriteFile(c_file); string xml = ScriptActions.GetConverter("scriptdata", "script_cxml").export(c_file.FileStream(), true); XmlDocument doc = new XmlDocument(); try { doc.LoadXml(xml); foreach (XmlNode child in doc.ChildNodes[0]) { this.ProcessFile(Path.Combine(Path.GetDirectoryName(file.Path), string.Format("{0}/{0}.continent", child.Attributes.GetNamedItem("name").Value)).Replace("\\", "/")); } } catch (Exception exc) { this.error_output.Write("Exception occured on file: {0}\n", c_file.Path); if (xml != null) { this.error_output.Write(xml + "\n"); } this.error_output.Write(exc.Message + "\n"); this.error_output.Write(exc.StackTrace + "\n"); this.error_output.Flush(); return; } } else { this.error_output.Write("Continents file {0} does not exist!\n", continents_file); } string mission_file = Path.Combine(Path.GetDirectoryName(file.Path), "mission").Replace("\\", "/"); Idstring m_ids = HashIndex.Get(mission_file); var t_m_ids = new Tuple <Idstring, Idstring, Idstring>(m_ids, new Idstring(0), HashIndex.Get("mission")); if (this._browser.RawFiles.ContainsKey(t_m_ids)) { FileEntry m_file = this._browser.RawFiles[t_m_ids]; this.WriteFile(m_file); string xml = ScriptActions.GetConverter("scriptdata", "script_cxml").export(m_file.FileStream(), true); XmlDocument doc = new XmlDocument(); try { doc.LoadXml(xml); foreach (XmlNode child in doc.ChildNodes[0]) { this.ProcessFile(Path.Combine(Path.GetDirectoryName(file.Path), string.Format("{0}.mission", child.Attributes.GetNamedItem("file").Value)).Replace("\\", "/")); } } catch (Exception exc) { this.error_output.Write("Exception occured on file: {0}\n", m_file.Path); if (xml != null) { this.error_output.Write(xml + "\n"); } this.error_output.Write(exc.Message + "\n"); this.error_output.Write(exc.StackTrace + "\n"); this.error_output.Flush(); return; } } else { this.error_output.Write("Mission file {0} does not exist!\n", continents_file); } this.error_output.Flush(); }
public static void GenerateHashlist(string workingPath, string file, PackageFileEntry be) { ReadHashlistAndLoad(file, be); HashIndex.GenerateHashList(Path.Combine(workingPath, HashlistFile)); }
public static void Main(string[] args) { if (args.Length < minArgs) { System.Console.Out.WriteLine(usage.ToString()); System.Environment.Exit(-1); } ITreebankLangParserParams tlpp = new EnglishTreebankParserParams(); DiskTreebank tb = null; string encoding = "UTF-8"; Language lang = Language.English; for (int i = 0; i < args.Length; i++) { if (args[i].StartsWith("-")) { switch (args[i]) { case "-l": { lang = Language.ValueOf(args[++i].Trim()); tlpp = lang.@params; break; } case "-e": { encoding = args[++i]; break; } default: { System.Console.Out.WriteLine(usage.ToString()); System.Environment.Exit(-1); break; } } } else { if (tb == null) { if (tlpp == null) { System.Console.Out.WriteLine(usage.ToString()); System.Environment.Exit(-1); } else { tlpp.SetInputEncoding(encoding); tlpp.SetOutputEncoding(encoding); tb = tlpp.DiskTreebank(); } } tb.LoadPath(args[i]); } } PrintWriter pw = tlpp.Pw(); Options op = new Options(); Options.LexOptions lexOptions = op.lexOptions; if (lang == Language.French) { lexOptions.useUnknownWordSignatures = 1; lexOptions.smartMutation = false; lexOptions.unknownSuffixSize = 2; lexOptions.unknownPrefixSize = 1; } else { if (lang == Language.Arabic) { lexOptions.smartMutation = false; lexOptions.useUnknownWordSignatures = 9; lexOptions.unknownPrefixSize = 1; lexOptions.unknownSuffixSize = 1; } } IIndex <string> wordIndex = new HashIndex <string>(); IIndex <string> tagIndex = new HashIndex <string>(); ILexicon lex = tlpp.Lex(op, wordIndex, tagIndex); int computeAfter = (int)(0.50 * tb.Count); ICounter <string> vocab = new ClassicCounter <string>(); ICounter <string> unkCounter = new ClassicCounter <string>(); int treeId = 0; foreach (Tree t in tb) { IList <ILabel> yield = t.Yield(); int posId = 0; foreach (ILabel word in yield) { vocab.IncrementCount(word.Value()); if (treeId > computeAfter && vocab.GetCount(word.Value()) < 2.0) { // if(lex.getUnknownWordModel().getSignature(word.value(), posId++).equals("UNK")) // pw.println(word.value()); unkCounter.IncrementCount(lex.GetUnknownWordModel().GetSignature(word.Value(), posId++)); } } treeId++; } IList <string> biggestKeys = new List <string>(unkCounter.KeySet()); biggestKeys.Sort(Counters.ToComparatorDescending(unkCounter)); foreach (string wordType in biggestKeys) { pw.Printf("%s\t%d%n", wordType, (int)unkCounter.GetCount(wordType)); } pw.Close(); pw.Close(); }
/// <exception cref="System.Exception"/> protected internal override void LoadTextClassifier(BufferedReader br) { base.LoadTextClassifier(br); string line = br.ReadLine(); string[] toks = line.Split("\\t"); if (!toks[0].Equals("nodeFeatureIndicesMap.size()=")) { throw new Exception("format error in nodeFeatureIndicesMap"); } int nodeFeatureIndicesMapSize = System.Convert.ToInt32(toks[1]); nodeFeatureIndicesMap = new HashIndex <int>(); int count = 0; while (count < nodeFeatureIndicesMapSize) { line = br.ReadLine(); toks = line.Split("\\t"); int idx = System.Convert.ToInt32(toks[0]); if (count != idx) { throw new Exception("format error"); } nodeFeatureIndicesMap.Add(System.Convert.ToInt32(toks[1])); count++; } line = br.ReadLine(); toks = line.Split("\\t"); if (!toks[0].Equals("edgeFeatureIndicesMap.size()=")) { throw new Exception("format error"); } int edgeFeatureIndicesMapSize = System.Convert.ToInt32(toks[1]); edgeFeatureIndicesMap = new HashIndex <int>(); count = 0; while (count < edgeFeatureIndicesMapSize) { line = br.ReadLine(); toks = line.Split("\\t"); int idx = System.Convert.ToInt32(toks[0]); if (count != idx) { throw new Exception("format error"); } edgeFeatureIndicesMap.Add(System.Convert.ToInt32(toks[1])); count++; } int weightsLength = -1; if (flags.secondOrderNonLinear) { line = br.ReadLine(); toks = line.Split("\\t"); if (!toks[0].Equals("inputLayerWeights4Edge.length=")) { throw new Exception("format error"); } weightsLength = System.Convert.ToInt32(toks[1]); inputLayerWeights4Edge = new double[weightsLength][]; count = 0; while (count < weightsLength) { line = br.ReadLine(); toks = line.Split("\\t"); int weights2Length = System.Convert.ToInt32(toks[0]); inputLayerWeights4Edge[count] = new double[weights2Length]; string[] weightsValue = toks[1].Split(" "); if (weights2Length != weightsValue.Length) { throw new Exception("weights format error"); } for (int i2 = 0; i2 < weights2Length; i2++) { inputLayerWeights4Edge[count][i2] = double.ParseDouble(weightsValue[i2]); } count++; } line = br.ReadLine(); toks = line.Split("\\t"); if (!toks[0].Equals("outputLayerWeights4Edge.length=")) { throw new Exception("format error"); } weightsLength = System.Convert.ToInt32(toks[1]); outputLayerWeights4Edge = new double[weightsLength][]; count = 0; while (count < weightsLength) { line = br.ReadLine(); toks = line.Split("\\t"); int weights2Length = System.Convert.ToInt32(toks[0]); outputLayerWeights4Edge[count] = new double[weights2Length]; string[] weightsValue = toks[1].Split(" "); if (weights2Length != weightsValue.Length) { throw new Exception("weights format error"); } for (int i2 = 0; i2 < weights2Length; i2++) { outputLayerWeights4Edge[count][i2] = double.ParseDouble(weightsValue[i2]); } count++; } } else { line = br.ReadLine(); toks = line.Split("\\t"); if (!toks[0].Equals("linearWeights.length=")) { throw new Exception("format error"); } weightsLength = System.Convert.ToInt32(toks[1]); linearWeights = new double[weightsLength][]; count = 0; while (count < weightsLength) { line = br.ReadLine(); toks = line.Split("\\t"); int weights2Length = System.Convert.ToInt32(toks[0]); linearWeights[count] = new double[weights2Length]; string[] weightsValue = toks[1].Split(" "); if (weights2Length != weightsValue.Length) { throw new Exception("weights format error"); } for (int i2 = 0; i2 < weights2Length; i2++) { linearWeights[count][i2] = double.ParseDouble(weightsValue[i2]); } count++; } } line = br.ReadLine(); toks = line.Split("\\t"); if (!toks[0].Equals("inputLayerWeights.length=")) { throw new Exception("format error"); } weightsLength = System.Convert.ToInt32(toks[1]); inputLayerWeights = new double[weightsLength][]; count = 0; while (count < weightsLength) { line = br.ReadLine(); toks = line.Split("\\t"); int weights2Length = System.Convert.ToInt32(toks[0]); inputLayerWeights[count] = new double[weights2Length]; string[] weightsValue = toks[1].Split(" "); if (weights2Length != weightsValue.Length) { throw new Exception("weights format error"); } for (int i2 = 0; i2 < weights2Length; i2++) { inputLayerWeights[count][i2] = double.ParseDouble(weightsValue[i2]); } count++; } line = br.ReadLine(); toks = line.Split("\\t"); if (!toks[0].Equals("outputLayerWeights.length=")) { throw new Exception("format error"); } weightsLength = System.Convert.ToInt32(toks[1]); outputLayerWeights = new double[weightsLength][]; count = 0; while (count < weightsLength) { line = br.ReadLine(); toks = line.Split("\\t"); int weights2Length = System.Convert.ToInt32(toks[0]); outputLayerWeights[count] = new double[weights2Length]; string[] weightsValue = toks[1].Split(" "); if (weights2Length != weightsValue.Length) { throw new Exception("weights format error"); } for (int i2 = 0; i2 < weights2Length; i2++) { outputLayerWeights[count][i2] = double.ParseDouble(weightsValue[i2]); } count++; } }
/// <exception cref="System.IO.IOException"/> public static void Main(string[] args) { IDictionary <string, int> flagsToNumArgs = Generics.NewHashMap(); flagsToNumArgs["-parser"] = int.Parse(3); flagsToNumArgs["-lex"] = int.Parse(3); flagsToNumArgs["-test"] = int.Parse(2); flagsToNumArgs["-out"] = int.Parse(1); flagsToNumArgs["-lengthPenalty"] = int.Parse(1); flagsToNumArgs["-penaltyType"] = int.Parse(1); flagsToNumArgs["-maxLength"] = int.Parse(1); flagsToNumArgs["-stats"] = int.Parse(2); IDictionary <string, string[]> argMap = StringUtils.ArgsToMap(args, flagsToNumArgs); bool eval = argMap.Contains("-eval"); PrintWriter pw = null; if (argMap.Contains("-out")) { pw = new PrintWriter(new OutputStreamWriter(new FileOutputStream((argMap["-out"])[0]), "GB18030"), true); } log.Info("ChineseCharacterBasedLexicon called with args:"); ChineseTreebankParserParams ctpp = new ChineseTreebankParserParams(); for (int i = 0; i < args.Length; i++) { ctpp.SetOptionFlag(args, i); log.Info(" " + args[i]); } log.Info(); Options op = new Options(ctpp); if (argMap.Contains("-stats")) { string[] statArgs = (argMap["-stats"]); MemoryTreebank rawTrainTreebank = op.tlpParams.MemoryTreebank(); IFileFilter trainFilt = new NumberRangesFileFilter(statArgs[1], false); rawTrainTreebank.LoadPath(new File(statArgs[0]), trainFilt); log.Info("Done reading trees."); MemoryTreebank trainTreebank; if (argMap.Contains("-annotate")) { trainTreebank = new MemoryTreebank(); TreeAnnotator annotator = new TreeAnnotator(ctpp.HeadFinder(), ctpp, op); foreach (Tree tree in rawTrainTreebank) { trainTreebank.Add(annotator.TransformTree(tree)); } log.Info("Done annotating trees."); } else { trainTreebank = rawTrainTreebank; } PrintStats(trainTreebank, pw); System.Environment.Exit(0); } int maxLength = 1000000; // Test.verbose = true; if (argMap.Contains("-norm")) { op.testOptions.lengthNormalization = true; } if (argMap.Contains("-maxLength")) { maxLength = System.Convert.ToInt32((argMap["-maxLength"])[0]); } op.testOptions.maxLength = 120; bool combo = argMap.Contains("-combo"); if (combo) { ctpp.useCharacterBasedLexicon = true; op.testOptions.maxSpanForTags = 10; op.doDep = false; op.dcTags = false; } LexicalizedParser lp = null; ILexicon lex = null; if (argMap.Contains("-parser")) { string[] parserArgs = (argMap["-parser"]); if (parserArgs.Length > 1) { IFileFilter trainFilt = new NumberRangesFileFilter(parserArgs[1], false); lp = LexicalizedParser.TrainFromTreebank(parserArgs[0], trainFilt, op); if (parserArgs.Length == 3) { string filename = parserArgs[2]; log.Info("Writing parser in serialized format to file " + filename + " "); System.Console.Error.Flush(); ObjectOutputStream @out = IOUtils.WriteStreamFromString(filename); @out.WriteObject(lp); @out.Close(); log.Info("done."); } } else { string parserFile = parserArgs[0]; lp = LexicalizedParser.LoadModel(parserFile, op); } lex = lp.GetLexicon(); op = lp.GetOp(); ctpp = (ChineseTreebankParserParams)op.tlpParams; } if (argMap.Contains("-rad")) { ctpp.useUnknownCharacterModel = true; } if (argMap.Contains("-lengthPenalty")) { ctpp.lengthPenalty = double.Parse((argMap["-lengthPenalty"])[0]); } if (argMap.Contains("-penaltyType")) { ctpp.penaltyType = System.Convert.ToInt32((argMap["-penaltyType"])[0]); } if (argMap.Contains("-lex")) { string[] lexArgs = (argMap["-lex"]); if (lexArgs.Length > 1) { IIndex <string> wordIndex = new HashIndex <string>(); IIndex <string> tagIndex = new HashIndex <string>(); lex = ctpp.Lex(op, wordIndex, tagIndex); MemoryTreebank rawTrainTreebank = op.tlpParams.MemoryTreebank(); IFileFilter trainFilt = new NumberRangesFileFilter(lexArgs[1], false); rawTrainTreebank.LoadPath(new File(lexArgs[0]), trainFilt); log.Info("Done reading trees."); MemoryTreebank trainTreebank; if (argMap.Contains("-annotate")) { trainTreebank = new MemoryTreebank(); TreeAnnotator annotator = new TreeAnnotator(ctpp.HeadFinder(), ctpp, op); foreach (Tree tree in rawTrainTreebank) { tree = annotator.TransformTree(tree); trainTreebank.Add(tree); } log.Info("Done annotating trees."); } else { trainTreebank = rawTrainTreebank; } lex.InitializeTraining(trainTreebank.Count); lex.Train(trainTreebank); lex.FinishTraining(); log.Info("Done training lexicon."); if (lexArgs.Length == 3) { string filename = lexArgs.Length == 3 ? lexArgs[2] : "parsers/chineseCharLex.ser.gz"; log.Info("Writing lexicon in serialized format to file " + filename + " "); System.Console.Error.Flush(); ObjectOutputStream @out = IOUtils.WriteStreamFromString(filename); @out.WriteObject(lex); @out.Close(); log.Info("done."); } } else { string lexFile = lexArgs.Length == 1 ? lexArgs[0] : "parsers/chineseCharLex.ser.gz"; log.Info("Reading Lexicon from file " + lexFile); ObjectInputStream @in = IOUtils.ReadStreamFromString(lexFile); try { lex = (ILexicon)@in.ReadObject(); } catch (TypeLoadException) { throw new Exception("Bad serialized file: " + lexFile); } @in.Close(); } } if (argMap.Contains("-test")) { bool segmentWords = ctpp.segment; bool parse = lp != null; System.Diagnostics.Debug.Assert((parse || segmentWords)); // WordCatConstituent.collinizeWords = argMap.containsKey("-collinizeWords"); // WordCatConstituent.collinizeTags = argMap.containsKey("-collinizeTags"); IWordSegmenter seg = null; if (segmentWords) { seg = (IWordSegmenter)lex; } string[] testArgs = (argMap["-test"]); MemoryTreebank testTreebank = op.tlpParams.MemoryTreebank(); IFileFilter testFilt = new NumberRangesFileFilter(testArgs[1], false); testTreebank.LoadPath(new File(testArgs[0]), testFilt); ITreeTransformer subcategoryStripper = op.tlpParams.SubcategoryStripper(); ITreeTransformer collinizer = ctpp.Collinizer(); WordCatEquivalenceClasser eqclass = new WordCatEquivalenceClasser(); WordCatEqualityChecker eqcheck = new WordCatEqualityChecker(); EquivalenceClassEval basicEval = new EquivalenceClassEval(eqclass, eqcheck, "basic"); EquivalenceClassEval collinsEval = new EquivalenceClassEval(eqclass, eqcheck, "collinized"); IList <string> evalTypes = new List <string>(3); bool goodPOS = false; if (segmentWords) { evalTypes.Add(WordCatConstituent.wordType); if (ctpp.segmentMarkov && !parse) { evalTypes.Add(WordCatConstituent.tagType); goodPOS = true; } } if (parse) { evalTypes.Add(WordCatConstituent.tagType); evalTypes.Add(WordCatConstituent.catType); if (combo) { evalTypes.Add(WordCatConstituent.wordType); goodPOS = true; } } TreeToBracketProcessor proc = new TreeToBracketProcessor(evalTypes); log.Info("Testing..."); foreach (Tree goldTop in testTreebank) { Tree gold = goldTop.FirstChild(); IList <IHasWord> goldSentence = gold.YieldHasWord(); if (goldSentence.Count > maxLength) { log.Info("Skipping sentence; too long: " + goldSentence.Count); continue; } else { log.Info("Processing sentence; length: " + goldSentence.Count); } IList <IHasWord> s; if (segmentWords) { StringBuilder goldCharBuf = new StringBuilder(); foreach (IHasWord aGoldSentence in goldSentence) { StringLabel word = (StringLabel)aGoldSentence; goldCharBuf.Append(word.Value()); } string goldChars = goldCharBuf.ToString(); s = seg.Segment(goldChars); } else { s = goldSentence; } Tree tree; if (parse) { tree = lp.ParseTree(s); if (tree == null) { throw new Exception("PARSER RETURNED NULL!!!"); } } else { tree = Edu.Stanford.Nlp.Trees.Trees.ToFlatTree(s); tree = subcategoryStripper.TransformTree(tree); } if (pw != null) { if (parse) { tree.PennPrint(pw); } else { IEnumerator sentIter = s.GetEnumerator(); for (; ;) { Word word = (Word)sentIter.Current; pw.Print(word.Word()); if (sentIter.MoveNext()) { pw.Print(" "); } else { break; } } } pw.Println(); } if (eval) { ICollection ourBrackets; ICollection goldBrackets; ourBrackets = proc.AllBrackets(tree); goldBrackets = proc.AllBrackets(gold); if (goodPOS) { Sharpen.Collections.AddAll(ourBrackets, TreeToBracketProcessor.CommonWordTagTypeBrackets(tree, gold)); Sharpen.Collections.AddAll(goldBrackets, TreeToBracketProcessor.CommonWordTagTypeBrackets(gold, tree)); } basicEval.Eval(ourBrackets, goldBrackets); System.Console.Out.WriteLine("\nScores:"); basicEval.DisplayLast(); Tree collinsTree = collinizer.TransformTree(tree); Tree collinsGold = collinizer.TransformTree(gold); ourBrackets = proc.AllBrackets(collinsTree); goldBrackets = proc.AllBrackets(collinsGold); if (goodPOS) { Sharpen.Collections.AddAll(ourBrackets, TreeToBracketProcessor.CommonWordTagTypeBrackets(collinsTree, collinsGold)); Sharpen.Collections.AddAll(goldBrackets, TreeToBracketProcessor.CommonWordTagTypeBrackets(collinsGold, collinsTree)); } collinsEval.Eval(ourBrackets, goldBrackets); System.Console.Out.WriteLine("\nCollinized scores:"); collinsEval.DisplayLast(); System.Console.Out.WriteLine(); } } if (eval) { basicEval.Display(); System.Console.Out.WriteLine(); collinsEval.Display(); } } }
/// <summary> /// This method lets you train and test a segmenter relative to a /// Treebank. /// </summary> /// <remarks> /// This method lets you train and test a segmenter relative to a /// Treebank. /// <p> /// <i>Implementation note:</i> This method is largely cloned from /// LexicalizedParser's main method. Should we try to have it be able /// to train segmenters to stop things going out of sync? /// </remarks> public static void Main(string[] args) { bool train = false; bool saveToSerializedFile = false; bool saveToTextFile = false; string serializedInputFileOrUrl = null; string textInputFileOrUrl = null; string serializedOutputFileOrUrl = null; string textOutputFileOrUrl = null; string treebankPath = null; Treebank testTreebank = null; // Treebank tuneTreebank = null; string testPath = null; IFileFilter testFilter = null; IFileFilter trainFilter = null; string encoding = null; // variables needed to process the files to be parsed ITokenizerFactory <Word> tokenizerFactory = null; // DocumentPreprocessor documentPreprocessor = new DocumentPreprocessor(); bool tokenized = false; // whether or not the input file has already been tokenized IFunction <IList <IHasWord>, IList <IHasWord> > escaper = new ChineseEscaper(); // int tagDelimiter = -1; // String sentenceDelimiter = "\n"; // boolean fromXML = false; int argIndex = 0; if (args.Length < 1) { log.Info("usage: java edu.stanford.nlp.parser.lexparser." + "LexicalizedParser parserFileOrUrl filename*"); return; } Options op = new Options(); op.tlpParams = new ChineseTreebankParserParams(); // while loop through option arguments while (argIndex < args.Length && args[argIndex][0] == '-') { if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-train")) { train = true; saveToSerializedFile = true; int numSubArgs = NumSubArgs(args, argIndex); argIndex++; if (numSubArgs > 1) { treebankPath = args[argIndex]; argIndex++; } else { throw new Exception("Error: -train option must have treebankPath as first argument."); } if (numSubArgs == 2) { trainFilter = new NumberRangesFileFilter(args[argIndex++], true); } else { if (numSubArgs >= 3) { try { int low = System.Convert.ToInt32(args[argIndex]); int high = System.Convert.ToInt32(args[argIndex + 1]); trainFilter = new NumberRangeFileFilter(low, high, true); argIndex += 2; } catch (NumberFormatException) { // maybe it's a ranges expression? trainFilter = new NumberRangesFileFilter(args[argIndex], true); argIndex++; } } } } else { if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-encoding")) { // sets encoding for TreebankLangParserParams encoding = args[argIndex + 1]; op.tlpParams.SetInputEncoding(encoding); op.tlpParams.SetOutputEncoding(encoding); argIndex += 2; } else { if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-loadFromSerializedFile")) { // load the parser from a binary serialized file // the next argument must be the path to the parser file serializedInputFileOrUrl = args[argIndex + 1]; argIndex += 2; } else { // doesn't make sense to load from TextFile -pichuan // } else if (args[argIndex].equalsIgnoreCase("-loadFromTextFile")) { // // load the parser from declarative text file // // the next argument must be the path to the parser file // textInputFileOrUrl = args[argIndex + 1]; // argIndex += 2; if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-saveToSerializedFile")) { saveToSerializedFile = true; serializedOutputFileOrUrl = args[argIndex + 1]; argIndex += 2; } else { if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-saveToTextFile")) { // save the parser to declarative text file saveToTextFile = true; textOutputFileOrUrl = args[argIndex + 1]; argIndex += 2; } else { if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-treebank")) { // the next argument is the treebank path and range for testing int numSubArgs = NumSubArgs(args, argIndex); argIndex++; if (numSubArgs == 1) { testFilter = new NumberRangesFileFilter(args[argIndex++], true); } else { if (numSubArgs > 1) { testPath = args[argIndex++]; if (numSubArgs == 2) { testFilter = new NumberRangesFileFilter(args[argIndex++], true); } else { if (numSubArgs >= 3) { try { int low = System.Convert.ToInt32(args[argIndex]); int high = System.Convert.ToInt32(args[argIndex + 1]); testFilter = new NumberRangeFileFilter(low, high, true); argIndex += 2; } catch (NumberFormatException) { // maybe it's a ranges expression? testFilter = new NumberRangesFileFilter(args[argIndex++], true); } } } } } } else { int j = op.tlpParams.SetOptionFlag(args, argIndex); if (j == argIndex) { log.Info("Unknown option ignored: " + args[argIndex]); j++; } argIndex = j; } } } } } } } // end while loop through arguments ITreebankLangParserParams tlpParams = op.tlpParams; // all other arguments are order dependent and // are processed in order below Edu.Stanford.Nlp.Parser.Lexparser.ChineseLexiconAndWordSegmenter cs = null; if (!train && op.testOptions.verbose) { System.Console.Out.WriteLine("Currently " + new DateTime()); PrintArgs(args, System.Console.Out); } if (train) { PrintArgs(args, System.Console.Out); // so we train a parser using the treebank if (treebankPath == null) { // the next arg must be the treebank path, since it wasn't give earlier treebankPath = args[argIndex]; argIndex++; if (args.Length > argIndex + 1) { try { // the next two args might be the range int low = System.Convert.ToInt32(args[argIndex]); int high = System.Convert.ToInt32(args[argIndex + 1]); trainFilter = new NumberRangeFileFilter(low, high, true); argIndex += 2; } catch (NumberFormatException) { // maybe it's a ranges expression? trainFilter = new NumberRangesFileFilter(args[argIndex], true); argIndex++; } } } Treebank trainTreebank = MakeTreebank(treebankPath, op, trainFilter); IIndex <string> wordIndex = new HashIndex <string>(); IIndex <string> tagIndex = new HashIndex <string>(); cs = new Edu.Stanford.Nlp.Parser.Lexparser.ChineseLexiconAndWordSegmenter(trainTreebank, op, wordIndex, tagIndex); } else { if (textInputFileOrUrl != null) { } else { // so we load the segmenter from a text grammar file // XXXXX fix later -pichuan //cs = new LexicalizedParser(textInputFileOrUrl, true, op); // so we load a serialized segmenter if (serializedInputFileOrUrl == null) { // the next argument must be the path to the serialized parser serializedInputFileOrUrl = args[argIndex]; argIndex++; } try { cs = new Edu.Stanford.Nlp.Parser.Lexparser.ChineseLexiconAndWordSegmenter(serializedInputFileOrUrl, op); } catch (ArgumentException) { log.Info("Error loading segmenter, exiting..."); System.Environment.Exit(0); } } } // the following has to go after reading parser to make sure // op and tlpParams are the same for train and test TreePrint treePrint = op.testOptions.TreePrint(tlpParams); if (testFilter != null) { if (testPath == null) { if (treebankPath == null) { throw new Exception("No test treebank path specified..."); } else { log.Info("No test treebank path specified. Using train path: \"" + treebankPath + "\""); testPath = treebankPath; } } testTreebank = tlpParams.TestMemoryTreebank(); testTreebank.LoadPath(testPath, testFilter); } op.trainOptions.sisterSplitters = Generics.NewHashSet(Arrays.AsList(tlpParams.SisterSplitters())); // at this point we should be sure that op.tlpParams is // set appropriately (from command line, or from grammar file), // and will never change again. We also set the tlpParams of the // LexicalizedParser instance to be the same object. This is // redundancy that we probably should take out eventually. // // -- Roger if (op.testOptions.verbose) { log.Info("Lexicon is " + cs.GetType().FullName); } PrintWriter pwOut = tlpParams.Pw(); PrintWriter pwErr = tlpParams.Pw(System.Console.Error); // Now what do we do with the parser we've made if (saveToTextFile) { // save the parser to textGrammar format if (textOutputFileOrUrl != null) { SaveSegmenterDataToText(cs, textOutputFileOrUrl); } else { log.Info("Usage: must specify a text segmenter data output path"); } } if (saveToSerializedFile) { if (serializedOutputFileOrUrl == null && argIndex < args.Length) { // the next argument must be the path to serialize to serializedOutputFileOrUrl = args[argIndex]; argIndex++; } if (serializedOutputFileOrUrl != null) { SaveSegmenterDataToSerialized(cs, serializedOutputFileOrUrl); } else { if (textOutputFileOrUrl == null && testTreebank == null) { // no saving/parsing request has been specified log.Info("usage: " + "java edu.stanford.nlp.parser.lexparser.ChineseLexiconAndWordSegmenter" + "-train trainFilesPath [start stop] serializedParserFilename"); } } } /* --------------------- Testing part!!!! ----------------------- */ if (op.testOptions.verbose) { } // printOptions(false, op); if (testTreebank != null || (argIndex < args.Length && Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-treebank"))) { // test parser on treebank if (testTreebank == null) { // the next argument is the treebank path and range for testing testTreebank = tlpParams.TestMemoryTreebank(); if (args.Length < argIndex + 4) { testTreebank.LoadPath(args[argIndex + 1]); } else { int testlow = System.Convert.ToInt32(args[argIndex + 2]); int testhigh = System.Convert.ToInt32(args[argIndex + 3]); testTreebank.LoadPath(args[argIndex + 1], new NumberRangeFileFilter(testlow, testhigh, true)); } } } }
protected void ReadFile(BinaryReader _br) { br = _br; uint bnk_count = br.ReadUInt32(); //Skip second count br.BaseStream.Position += 4; uint bnk_offset = br.ReadUInt32(); uint section_pointer = br.ReadUInt32(); uint unknown1 = br.ReadUInt32(); uint sound_count = br.ReadUInt32(); //Skip second count br.BaseStream.Position += 4; uint sound_offset = br.ReadUInt32(); //Skips section pointer, unknown1, unknown2 br.BaseStream.Position += 12; uint u_count = br.ReadUInt32(); //Skip second count br.BaseStream.Position += 4; uint u_offset = br.ReadUInt32(); br.BaseStream.Position = bnk_offset; for (int i = 0; i < bnk_count; i++) { br.BaseStream.Position += 4; uint position = br.ReadUInt32(); this.SeekPush(); br.BaseStream.Position = position; Soundbanks.Add(this.ReadString()); this.SeekPop(); } br.BaseStream.Position = sound_offset; Dictionary <ulong, uint> sound_lookups = new Dictionary <ulong, uint>(); for (int i = 0; i < sound_count; i++) { uint id = (uint)br.ReadUInt64(); ulong hash = br.ReadUInt64(); if (sound_lookups.ContainsKey(hash)) { uint other_id = sound_lookups[hash]; continue; } sound_lookups.Add(hash, id); } br.BaseStream.Position = u_offset; for (int i = 0; i < u_count; i++) { ulong hash = br.ReadUInt64(); br.BaseStream.Position += 4; uint string_pos = br.ReadUInt32(); this.SeekPush(); br.BaseStream.Position = string_pos; string str = this.ReadString(); this.SeekPop(); if (!sound_lookups.ContainsKey(hash)) { continue; } uint id = sound_lookups[hash]; if (SoundLookups.ContainsKey(id)) { continue; } Idstring ids = HashIndex.Get(hash); SoundLookups.Add(id, new Tuple <string, Idstring>(id.ToString() != str ? str : null, ids)); } }
private static async Task CreateIndex(DocumentClient client, string collectionName) { Console.WriteLine("Set up Indexes"); DocumentCollection collection = await client.ReadDocumentCollectionAsync(UriFactory.CreateDocumentCollectionUri(databaseId, collectionName)); /* * Range over /prop/? (or /*) can be used to serve the following queries efficiently: * SELECT * FROM collection c WHERE c.prop = "value" * SELECT * FROM collection c WHERE c.prop > 5 * SELECT * FROM collection c ORDER BY c.prop */ Index indexNum = new RangeIndex(DataType.Number); collection.IndexingPolicy.IncludedPaths.Add(new IncludedPath() { Indexes = new Collection <Index>() { indexNum }, Path = @"/FamilyId/?" }); /* * Hash over /prop/? (or /*) can be used to serve the following queries efficiently: * SELECT * FROM collection c WHERE c.prop = "value" */ Index indexArray = new HashIndex(DataType.String); collection.IndexingPolicy.IncludedPaths.Add(new IncludedPath() { Indexes = new Collection <Index>() { indexArray }, Path = @"/Address/*" }); /* * Hash over /props/[]/? (or /* or /props/*) can be used to serve the following queries efficiently: * SELECT tag FROM collection c JOIN tag IN c.props WHERE tag = 5 */ Index indexArr = new HashIndex(DataType.String); collection.IndexingPolicy.IncludedPaths.Add(new IncludedPath() { Indexes = new Collection <Index>() { indexArr }, Path = @"/Children/[]/?" }); /* exclude from index Parents */ collection.IndexingPolicy.ExcludedPaths.Add(new ExcludedPath() { Path = @"/Parents/*" }); await client.ReplaceDocumentCollectionAsync(collection); }
/// <summary> /// The load local mods. /// </summary> /// public void LoadMods(bool overrride = false) { var watch = Stopwatch.StartNew(); if (Directory.Exists(modsDirectory)) { watch.Restart(); if (overrride) this._modsList.Clear(); List<string> leftovers = this.modsList.Keys.ToList(); string[] pdmods = Directory.GetFiles(modsDirectory, "*.pdmod"); //System.Threading.Tasks.Parallel.ForEach(pdmods, file => foreach (string file in pdmods) { if (!File.Exists(file)) return;//continue; if (LoadSingleMod(file)) { leftovers.Remove(file); } }//); watch.Stop(); Console.WriteLine("LoadLocalMods.pdmods - " + watch.ElapsedMilliseconds + " ms"); watch.Restart(); foreach (string left in leftovers) RemoveModsList(left); watch.Stop(); Console.WriteLine("LoadLocalMods.pdmods.leftovers - " + watch.ElapsedMilliseconds + " ms"); } else { if (this.CanCreateDirectory(modsDirectory)) { Directory.CreateDirectory(modsDirectory); } } watch.Restart(); //load in override folder if (Directory.Exists(Path.Combine(StaticStorage.settings.AssetsFolder, "mod_overrides"))) { List<BundleMod> mod_overrides_mods = new List<BundleMod>(); string[] mod_overrides = Directory.EnumerateDirectories(Path.Combine(StaticStorage.settings.AssetsFolder, "mod_overrides")).ToArray(); foreach (string mo in mod_overrides) { if (!Directory.Exists(mo)) continue;//continue; if (new DirectoryInfo(mo).Name == "Bundle_Modder_Shared") continue;//continue; string[] allfiles = System.IO.Directory.GetFiles(mo, "*.*", System.IO.SearchOption.AllDirectories); BundleMod mo_mod = new BundleMod(); mo_mod.Name = new DirectoryInfo(mo).Name; mo_mod.Author = "<UNKNOWN>"; mo_mod.Description = "This mod is installed in \"mod_overrides\" folder. No description for this mod is availiable. This mod was not matched with any local mods. You can only uninstall this mod."; mo_mod.file = mo; mo_mod.status = BundleMod.ModStatus.Unrecognized; mo_mod.type = BundleMod.ModType.mod_override; mo_mod.actionStatus = BundleMod.ModActionStatus.Missing; mo_mod.UtilizesOverride = true; if (File.Exists(Path.Combine(mo, "mod.txt"))) { try { OverrideMod overrideModInformation = OverrideMod.Deserialize(File.ReadAllText(Path.Combine(mo, "mod.txt"))); if(!String.IsNullOrWhiteSpace(overrideModInformation.Name)) mo_mod.Name = overrideModInformation.Name; if(!String.IsNullOrWhiteSpace(overrideModInformation.Author)) mo_mod.Author = overrideModInformation.Author; if (!String.IsNullOrWhiteSpace(overrideModInformation.Description)) mo_mod.Description = overrideModInformation.Description; mo_mod.status = BundleMod.ModStatus.Installed; mo_mod.actionStatus = BundleMod.ModActionStatus.None; } catch(Exception exc) { } } foreach (string mo_entry in allfiles) { if (mo_entry.EndsWith("mod.txt")) continue; BundleRewriteItem mo_bri = new BundleRewriteItem(); string filepath = mo_entry.Substring(mo.Length + 1).Replace('\\', '/'); string[] pathelements = filepath.Split('.'); if (pathelements.Length > 3) continue; string entrypath = pathelements[0]; if (pathelements.Length == 2) { mo_bri.BundlePath = Hash64.HashString(pathelements[0]); mo_bri.BundleExtension = Hash64.HashString(pathelements[1]); } else if (pathelements.Length == 3) { mo_bri.BundlePath = Hash64.HashString(pathelements[0]); UInt32 lang = 0; if (UInt32.TryParse(pathelements[1], out lang)) mo_bri.BundleLanguage = lang; mo_bri.BundleExtension = Hash64.HashString(pathelements[2]); mo_bri.IsLanguageSpecific = true; } else continue; mo_bri.ModName = mo_mod.Name; mo_bri.ModAuthor = mo_mod.Author; mo_bri.ModDescription = mo_mod.Description; mo_bri.ReplacementFile = ""; if (mo_bri.isOverrideable() //&& !bri.ReplacementFile.EndsWith(".script") ) { if (string.IsNullOrEmpty(HashIndex.GetUnhashed(mo_bri.BundlePath)) || string.IsNullOrEmpty(HashIndex.GetUnhashed(mo_bri.BundleExtension)) ) { continue; } } mo_mod.ItemQueue.Add(mo_bri); } mod_overrides_mods.Add(mo_mod); } //check vs others Dictionary<string, BundleMod> temporarylocalModsList_master = this.modsList; foreach (BundleMod mo_bm in mod_overrides_mods) { bool modMatch = false; List<BundleMod> matched_mods = temporarylocalModsList_master.Values.Where(mod => mod.getEscapedName().Equals(mo_bm.Name) || mod.Name.Equals(mo_bm.Name)).ToList(); foreach (BundleMod bm in matched_mods) { modMatch = true; if (mo_bm.ItemQueue.Count > bm.ItemQueue.Count) //the override mod contains too many files, not equal { mo_bm.canInstall = false; mo_bm.canUninstall = true; mo_bm.actionStatus = BundleMod.ModActionStatus.Missing; mo_bm.status = BundleMod.ModStatus.Unrecognized; AddModsList(mo_bm.file, mo_bm); } else { bool[] mo_checklist = new bool[mo_bm.ItemQueue.Count]; int checklist_i = 0; bool mo_onlyfolder = !(bm.ItemQueue.Any(x => !x.isOverrideable())); //isOverradable foreach (BundleRewriteItem mo_bri in mo_bm.ItemQueue) { if (bm.ItemQueue.Any(x => x.BundlePath == mo_bri.BundlePath && x.BundleExtension == mo_bri.BundleExtension)) mo_checklist[checklist_i] = true; checklist_i++; } bool mo_equal = !mo_checklist.Any(x => !x); /* for (checklist_i = 0; mo_equal && checklist_i < mo_bm.ItemQueue.Count; checklist_i++) if (!mo_checklist[checklist_i]) mo_equal = false; */ if (!mo_equal) { mo_bm.canInstall = false; mo_bm.canUninstall = true; mo_bm.actionStatus = BundleMod.ModActionStatus.Missing; mo_bm.status = BundleMod.ModStatus.Unrecognized; AddModsList(mo_bm.file, mo_bm); } else { if (mo_onlyfolder || InstalledModsListContains(bm) > -1) { bm.status = BundleMod.ModStatus.Installed;//installed } else { bm.actionStatus = BundleMod.ModActionStatus.ForcedReinstall; bm.status = BundleMod.ModStatus.ParticallyInstalled;//installed foreach (BundleRewriteItem bri in bm.ItemQueue) bri.toReinstall = true; } } } } if (!modMatch) { mo_bm.canInstall = false; mo_bm.canUninstall = true; AddModsList(mo_bm.file, mo_bm); } } } watch.Stop(); Console.WriteLine("LoadLocalMods.overrides - " + watch.ElapsedMilliseconds + " ms"); watch.Restart(); //BLT Mods if ( Directory.Exists( Path.Combine( StaticStorage.settings.AssetsFolder, "..", "mods") ) ) { if( Directory.Exists( Path.Combine( StaticStorage.settings.AssetsFolder, "..", "mods", "base") ) ) { List<string> bltmods = Directory.EnumerateDirectories(Path.Combine(StaticStorage.settings.AssetsFolder, "..", "mods")).ToList(); foreach (string bltmod in bltmods) { if (!Directory.Exists(bltmod)) continue; if (Path.GetFileNameWithoutExtension(bltmod).Equals("log") || Path.GetFileNameWithoutExtension(bltmod).Equals("base")) continue; if (!File.Exists(Path.Combine(bltmod, "mod.txt"))) continue; BundleMod blt_mod = new BundleMod(); blt_mod.Name = new DirectoryInfo(bltmod).Name; blt_mod.Author = "<UNKNOWN>"; blt_mod.Description = "This is a BLT Hook mod. No description for this mod is availiable. This mod doesn't have a proper description. You can enable/disable this mod as well as uninstall it."; blt_mod.file = bltmod; blt_mod.status = BundleMod.ModStatus.Installed; blt_mod.type = BundleMod.ModType.lua; blt_mod.actionStatus = BundleMod.ModActionStatus.None; blt_mod.UtilizesOverride = false; blt_mod.UtilizesBundles = false; blt_mod.enabled = true; try { FileStream bltModfs = new FileStream(Path.Combine(bltmod, "mod.txt"), FileMode.Open); using (StreamReader bltModsr = new StreamReader(bltModfs)) { try { //JsonConvert dynamic jsonDe = JsonConvert.DeserializeObject(bltModsr.ReadToEnd()); //dynamic jsonDe = null; if (jsonDe != null) { if (jsonDe.name != null) { blt_mod.Name = jsonDe.name; } if (jsonDe.author != null) { blt_mod.Author = jsonDe.author; } if (jsonDe.description != null) { blt_mod.Description = jsonDe.description; } } } catch (Exception exc) { blt_mod.Description += " Failed parsing mods.txt of " + Path.GetFileNameWithoutExtension(bltmod) + ", Message: " + exc.Message; } } } catch (Exception e) { blt_mod.Description += " Failed parsing mods.txt of " + Path.GetFileNameWithoutExtension(bltmod) + ", Message: " + e.Message; } AddModsList(bltmod, blt_mod, true); } } LoadBLTModManagement(); } watch.Stop(); Console.WriteLine("LoadLocalMods.blt_mods - " + watch.ElapsedMilliseconds + " ms"); }
protected internal virtual void AddGuess(LabeledChunkIdentifier.LabelTagType guess, LabeledChunkIdentifier.LabelTagType correct, bool addUnknownLabels) { if (addUnknownLabels) { if (labelIndex == null) { labelIndex = new HashIndex <string>(); } labelIndex.Add(GetTypeLabel(guess)); labelIndex.Add(GetTypeLabel(correct)); } if (inCorrect) { bool prevCorrectEnded = LabeledChunkIdentifier.IsEndOfChunk(prevCorrect, correct); bool prevGuessEnded = LabeledChunkIdentifier.IsEndOfChunk(prevGuess, guess); if (prevCorrectEnded && prevGuessEnded && prevGuess.TypeMatches(prevCorrect)) { inCorrect = false; correctGuesses.IncrementCount(GetTypeLabel(prevCorrect)); } else { if (prevCorrectEnded != prevGuessEnded || !guess.TypeMatches(correct)) { inCorrect = false; } } } bool correctStarted = LabeledChunkIdentifier.IsStartOfChunk(prevCorrect, correct); bool guessStarted = LabeledChunkIdentifier.IsStartOfChunk(prevGuess, guess); if (correctStarted && guessStarted && guess.TypeMatches(correct)) { inCorrect = true; } if (correctStarted) { foundCorrect.IncrementCount(GetTypeLabel(correct)); } if (guessStarted) { foundGuessed.IncrementCount(GetTypeLabel(guess)); } if (chunker.IsIgnoreProvidedTag()) { if (guess.TypeMatches(correct)) { tokensCorrect++; } } else { if (guess.label.Equals(correct.label)) { tokensCorrect++; } } tokensCount++; prevGuess = guess; prevCorrect = correct; }
/// <param name="args"/> public static void Main(string[] args) { if (args.Length != 4) { System.Console.Error.Printf("Usage: java %s language features train_file dev_file%n", typeof(Edu.Stanford.Nlp.Parser.Lexparser.FactoredLexicon).FullName); System.Environment.Exit(-1); } // Command line options Language language = Language.ValueOf(args[0]); ITreebankLangParserParams tlpp = language.@params; Treebank trainTreebank = tlpp.DiskTreebank(); trainTreebank.LoadPath(args[2]); Treebank devTreebank = tlpp.DiskTreebank(); devTreebank.LoadPath(args[3]); MorphoFeatureSpecification morphoSpec; Options options = GetOptions(language); if (language.Equals(Language.Arabic)) { morphoSpec = new ArabicMorphoFeatureSpecification(); string[] languageOptions = new string[] { "-arabicFactored" }; tlpp.SetOptionFlag(languageOptions, 0); } else { if (language.Equals(Language.French)) { morphoSpec = new FrenchMorphoFeatureSpecification(); string[] languageOptions = new string[] { "-frenchFactored" }; tlpp.SetOptionFlag(languageOptions, 0); } else { throw new NotSupportedException(); } } string featureList = args[1]; string[] features = featureList.Trim().Split(","); foreach (string feature in features) { morphoSpec.Activate(MorphoFeatureSpecification.MorphoFeatureType.ValueOf(feature)); } System.Console.Out.WriteLine("Language: " + language.ToString()); System.Console.Out.WriteLine("Features: " + args[1]); // Create word and tag indices // Save trees in a collection since the interface requires that.... System.Console.Out.Write("Loading training trees..."); IList <Tree> trainTrees = new List <Tree>(19000); IIndex <string> wordIndex = new HashIndex <string>(); IIndex <string> tagIndex = new HashIndex <string>(); foreach (Tree tree in trainTreebank) { foreach (Tree subTree in tree) { if (!subTree.IsLeaf()) { tlpp.TransformTree(subTree, tree); } } trainTrees.Add(tree); } System.Console.Out.Printf("Done! (%d trees)%n", trainTrees.Count); // Setup and train the lexicon. System.Console.Out.Write("Collecting sufficient statistics for lexicon..."); Edu.Stanford.Nlp.Parser.Lexparser.FactoredLexicon lexicon = new Edu.Stanford.Nlp.Parser.Lexparser.FactoredLexicon(options, morphoSpec, wordIndex, tagIndex); lexicon.InitializeTraining(trainTrees.Count); lexicon.Train(trainTrees, null); lexicon.FinishTraining(); System.Console.Out.WriteLine("Done!"); trainTrees = null; // Load the tuning set System.Console.Out.Write("Loading tuning set..."); IList <FactoredLexiconEvent> tuningSet = GetTuningSet(devTreebank, lexicon, tlpp); System.Console.Out.Printf("...Done! (%d events)%n", tuningSet.Count); // Print the probabilities that we obtain // TODO(spenceg): Implement tagging accuracy with FactLex int nCorrect = 0; ICounter <string> errors = new ClassicCounter <string>(); foreach (FactoredLexiconEvent @event in tuningSet) { IEnumerator <IntTaggedWord> itr = lexicon.RuleIteratorByWord(@event.Word(), @event.GetLoc(), @event.FeatureStr()); ICounter <int> logScores = new ClassicCounter <int>(); bool noRules = true; int goldTagId = -1; while (itr.MoveNext()) { noRules = false; IntTaggedWord iTW = itr.Current; if (iTW.Tag() == @event.TagId()) { log.Info("GOLD-"); goldTagId = iTW.Tag(); } float tagScore = lexicon.Score(iTW, @event.GetLoc(), @event.Word(), @event.FeatureStr()); logScores.IncrementCount(iTW.Tag(), tagScore); } if (noRules) { System.Console.Error.Printf("NO TAGGINGS: %s %s%n", @event.Word(), @event.FeatureStr()); } else { // Score the tagging int hypTagId = Counters.Argmax(logScores); if (hypTagId == goldTagId) { ++nCorrect; } else { string goldTag = goldTagId < 0 ? "UNSEEN" : lexicon.tagIndex.Get(goldTagId); errors.IncrementCount(goldTag); } } log.Info(); } // Output accuracy double acc = (double)nCorrect / (double)tuningSet.Count; System.Console.Error.Printf("%n%nACCURACY: %.2f%n%n", acc * 100.0); log.Info("% of errors by type:"); IList <string> biggestKeys = new List <string>(errors.KeySet()); biggestKeys.Sort(Counters.ToComparator(errors, false, true)); Counters.Normalize(errors); foreach (string key in biggestKeys) { System.Console.Error.Printf("%s\t%.2f%n", key, errors.GetCount(key) * 100.0); } }
/* some documentation for Roger's convenience * {pcfg,dep,combo}{PE,DE,TE} are precision/dep/tagging evals for the models * * parser is the PCFG parser * dparser is the dependency parser * bparser is the combining parser * * during testing: * tree is the test tree (gold tree) * binaryTree is the gold tree binarized * tree2b is the best PCFG paser, binarized * tree2 is the best PCFG parse (debinarized) * tree3 is the dependency parse, binarized * tree3db is the dependency parser, debinarized * tree4 is the best combo parse, binarized and then debinarized * tree4b is the best combo parse, binarized */ public static void Main(string[] args) { Options op = new Options(new EnglishTreebankParserParams()); // op.tlpParams may be changed to something else later, so don't use it till // after options are parsed. StringUtils.LogInvocationString(log, args); string path = "/u/nlp/stuff/corpora/Treebank3/parsed/mrg/wsj"; int trainLow = 200; int trainHigh = 2199; int testLow = 2200; int testHigh = 2219; string serializeFile = null; int i = 0; while (i < args.Length && args[i].StartsWith("-")) { if (Sharpen.Runtime.EqualsIgnoreCase(args[i], "-path") && (i + 1 < args.Length)) { path = args[i + 1]; i += 2; } else { if (Sharpen.Runtime.EqualsIgnoreCase(args[i], "-train") && (i + 2 < args.Length)) { trainLow = System.Convert.ToInt32(args[i + 1]); trainHigh = System.Convert.ToInt32(args[i + 2]); i += 3; } else { if (Sharpen.Runtime.EqualsIgnoreCase(args[i], "-test") && (i + 2 < args.Length)) { testLow = System.Convert.ToInt32(args[i + 1]); testHigh = System.Convert.ToInt32(args[i + 2]); i += 3; } else { if (Sharpen.Runtime.EqualsIgnoreCase(args[i], "-serialize") && (i + 1 < args.Length)) { serializeFile = args[i + 1]; i += 2; } else { if (Sharpen.Runtime.EqualsIgnoreCase(args[i], "-tLPP") && (i + 1 < args.Length)) { try { op.tlpParams = (ITreebankLangParserParams)System.Activator.CreateInstance(Sharpen.Runtime.GetType(args[i + 1])); } catch (TypeLoadException e) { log.Info("Class not found: " + args[i + 1]); throw new Exception(e); } catch (InstantiationException e) { log.Info("Couldn't instantiate: " + args[i + 1] + ": " + e.ToString()); throw new Exception(e); } catch (MemberAccessException e) { log.Info("illegal access" + e); throw new Exception(e); } i += 2; } else { if (args[i].Equals("-encoding")) { // sets encoding for TreebankLangParserParams op.tlpParams.SetInputEncoding(args[i + 1]); op.tlpParams.SetOutputEncoding(args[i + 1]); i += 2; } else { i = op.SetOptionOrWarn(args, i); } } } } } } } // System.out.println(tlpParams.getClass()); ITreebankLanguagePack tlp = op.tlpParams.TreebankLanguagePack(); op.trainOptions.sisterSplitters = Generics.NewHashSet(Arrays.AsList(op.tlpParams.SisterSplitters())); // BinarizerFactory.TreeAnnotator.setTreebankLang(tlpParams); PrintWriter pw = op.tlpParams.Pw(); op.testOptions.Display(); op.trainOptions.Display(); op.Display(); op.tlpParams.Display(); // setup tree transforms Treebank trainTreebank = op.tlpParams.MemoryTreebank(); MemoryTreebank testTreebank = op.tlpParams.TestMemoryTreebank(); // Treebank blippTreebank = ((EnglishTreebankParserParams) tlpParams).diskTreebank(); // String blippPath = "/afs/ir.stanford.edu/data/linguistic-data/BLLIP-WSJ/"; // blippTreebank.loadPath(blippPath, "", true); Timing.StartTime(); log.Info("Reading trees..."); testTreebank.LoadPath(path, new NumberRangeFileFilter(testLow, testHigh, true)); if (op.testOptions.increasingLength) { testTreebank.Sort(new TreeLengthComparator()); } trainTreebank.LoadPath(path, new NumberRangeFileFilter(trainLow, trainHigh, true)); Timing.Tick("done."); log.Info("Binarizing trees..."); TreeAnnotatorAndBinarizer binarizer; if (!op.trainOptions.leftToRight) { binarizer = new TreeAnnotatorAndBinarizer(op.tlpParams, op.forceCNF, !op.trainOptions.OutsideFactor(), true, op); } else { binarizer = new TreeAnnotatorAndBinarizer(op.tlpParams.HeadFinder(), new LeftHeadFinder(), op.tlpParams, op.forceCNF, !op.trainOptions.OutsideFactor(), true, op); } CollinsPuncTransformer collinsPuncTransformer = null; if (op.trainOptions.collinsPunc) { collinsPuncTransformer = new CollinsPuncTransformer(tlp); } ITreeTransformer debinarizer = new Debinarizer(op.forceCNF); IList <Tree> binaryTrainTrees = new List <Tree>(); if (op.trainOptions.selectiveSplit) { op.trainOptions.splitters = ParentAnnotationStats.GetSplitCategories(trainTreebank, op.trainOptions.tagSelectiveSplit, 0, op.trainOptions.selectiveSplitCutOff, op.trainOptions.tagSelectiveSplitCutOff, op.tlpParams.TreebankLanguagePack()); if (op.trainOptions.deleteSplitters != null) { IList <string> deleted = new List <string>(); foreach (string del in op.trainOptions.deleteSplitters) { string baseDel = tlp.BasicCategory(del); bool checkBasic = del.Equals(baseDel); for (IEnumerator <string> it = op.trainOptions.splitters.GetEnumerator(); it.MoveNext();) { string elem = it.Current; string baseElem = tlp.BasicCategory(elem); bool delStr = checkBasic && baseElem.Equals(baseDel) || elem.Equals(del); if (delStr) { it.Remove(); deleted.Add(elem); } } } log.Info("Removed from vertical splitters: " + deleted); } } if (op.trainOptions.selectivePostSplit) { ITreeTransformer myTransformer = new TreeAnnotator(op.tlpParams.HeadFinder(), op.tlpParams, op); Treebank annotatedTB = trainTreebank.Transform(myTransformer); op.trainOptions.postSplitters = ParentAnnotationStats.GetSplitCategories(annotatedTB, true, 0, op.trainOptions.selectivePostSplitCutOff, op.trainOptions.tagSelectivePostSplitCutOff, op.tlpParams.TreebankLanguagePack()); } if (op.trainOptions.hSelSplit) { binarizer.SetDoSelectiveSplit(false); foreach (Tree tree in trainTreebank) { if (op.trainOptions.collinsPunc) { tree = collinsPuncTransformer.TransformTree(tree); } //tree.pennPrint(tlpParams.pw()); tree = binarizer.TransformTree(tree); } //binaryTrainTrees.add(tree); binarizer.SetDoSelectiveSplit(true); } foreach (Tree tree_1 in trainTreebank) { if (op.trainOptions.collinsPunc) { tree_1 = collinsPuncTransformer.TransformTree(tree_1); } tree_1 = binarizer.TransformTree(tree_1); binaryTrainTrees.Add(tree_1); } if (op.testOptions.verbose) { binarizer.DumpStats(); } IList <Tree> binaryTestTrees = new List <Tree>(); foreach (Tree tree_2 in testTreebank) { if (op.trainOptions.collinsPunc) { tree_2 = collinsPuncTransformer.TransformTree(tree_2); } tree_2 = binarizer.TransformTree(tree_2); binaryTestTrees.Add(tree_2); } Timing.Tick("done."); // binarization BinaryGrammar bg = null; UnaryGrammar ug = null; IDependencyGrammar dg = null; // DependencyGrammar dgBLIPP = null; ILexicon lex = null; IIndex <string> stateIndex = new HashIndex <string>(); // extract grammars IExtractor <Pair <UnaryGrammar, BinaryGrammar> > bgExtractor = new BinaryGrammarExtractor(op, stateIndex); //Extractor bgExtractor = new SmoothedBinaryGrammarExtractor();//new BinaryGrammarExtractor(); // Extractor lexExtractor = new LexiconExtractor(); //Extractor dgExtractor = new DependencyMemGrammarExtractor(); if (op.doPCFG) { log.Info("Extracting PCFG..."); Pair <UnaryGrammar, BinaryGrammar> bgug = null; if (op.trainOptions.cheatPCFG) { IList <Tree> allTrees = new List <Tree>(binaryTrainTrees); Sharpen.Collections.AddAll(allTrees, binaryTestTrees); bgug = bgExtractor.Extract(allTrees); } else { bgug = bgExtractor.Extract(binaryTrainTrees); } bg = bgug.second; bg.SplitRules(); ug = bgug.first; ug.PurgeRules(); Timing.Tick("done."); } log.Info("Extracting Lexicon..."); IIndex <string> wordIndex = new HashIndex <string>(); IIndex <string> tagIndex = new HashIndex <string>(); lex = op.tlpParams.Lex(op, wordIndex, tagIndex); lex.InitializeTraining(binaryTrainTrees.Count); lex.Train(binaryTrainTrees); lex.FinishTraining(); Timing.Tick("done."); if (op.doDep) { log.Info("Extracting Dependencies..."); binaryTrainTrees.Clear(); IExtractor <IDependencyGrammar> dgExtractor = new MLEDependencyGrammarExtractor(op, wordIndex, tagIndex); // dgBLIPP = (DependencyGrammar) dgExtractor.extract(new ConcatenationIterator(trainTreebank.iterator(),blippTreebank.iterator()),new TransformTreeDependency(tlpParams,true)); // DependencyGrammar dg1 = dgExtractor.extract(trainTreebank.iterator(), new TransformTreeDependency(op.tlpParams, true)); //dgBLIPP=(DependencyGrammar)dgExtractor.extract(blippTreebank.iterator(),new TransformTreeDependency(tlpParams)); //dg = (DependencyGrammar) dgExtractor.extract(new ConcatenationIterator(trainTreebank.iterator(),blippTreebank.iterator()),new TransformTreeDependency(tlpParams)); // dg=new DependencyGrammarCombination(dg1,dgBLIPP,2); dg = dgExtractor.Extract(binaryTrainTrees); //uses information whether the words are known or not, discards unknown words Timing.Tick("done."); //System.out.print("Extracting Unknown Word Model..."); //UnknownWordModel uwm = (UnknownWordModel)uwmExtractor.extract(binaryTrainTrees); //Timing.tick("done."); System.Console.Out.Write("Tuning Dependency Model..."); dg.Tune(binaryTestTrees); //System.out.println("TUNE DEPS: "+tuneDeps); Timing.Tick("done."); } BinaryGrammar boundBG = bg; UnaryGrammar boundUG = ug; IGrammarProjection gp = new NullGrammarProjection(bg, ug); // serialization if (serializeFile != null) { log.Info("Serializing parser..."); LexicalizedParser parser = new LexicalizedParser(lex, bg, ug, dg, stateIndex, wordIndex, tagIndex, op); parser.SaveParserToSerialized(serializeFile); Timing.Tick("done."); } // test: pcfg-parse and output ExhaustivePCFGParser parser_1 = null; if (op.doPCFG) { parser_1 = new ExhaustivePCFGParser(boundBG, boundUG, lex, op, stateIndex, wordIndex, tagIndex); } ExhaustiveDependencyParser dparser = ((op.doDep && !op.testOptions.useFastFactored) ? new ExhaustiveDependencyParser(dg, lex, op, wordIndex, tagIndex) : null); IScorer scorer = (op.doPCFG ? new TwinScorer(new ProjectionScorer(parser_1, gp, op), dparser) : null); //Scorer scorer = parser; BiLexPCFGParser bparser = null; if (op.doPCFG && op.doDep) { bparser = (op.testOptions.useN5) ? new BiLexPCFGParser.N5BiLexPCFGParser(scorer, parser_1, dparser, bg, ug, dg, lex, op, gp, stateIndex, wordIndex, tagIndex) : new BiLexPCFGParser(scorer, parser_1, dparser, bg, ug, dg, lex, op, gp, stateIndex , wordIndex, tagIndex); } Evalb pcfgPE = new Evalb("pcfg PE", true); Evalb comboPE = new Evalb("combo PE", true); AbstractEval pcfgCB = new Evalb.CBEval("pcfg CB", true); AbstractEval pcfgTE = new TaggingEval("pcfg TE"); AbstractEval comboTE = new TaggingEval("combo TE"); AbstractEval pcfgTEnoPunct = new TaggingEval("pcfg nopunct TE"); AbstractEval comboTEnoPunct = new TaggingEval("combo nopunct TE"); AbstractEval depTE = new TaggingEval("depnd TE"); AbstractEval depDE = new UnlabeledAttachmentEval("depnd DE", true, null, tlp.PunctuationWordRejectFilter()); AbstractEval comboDE = new UnlabeledAttachmentEval("combo DE", true, null, tlp.PunctuationWordRejectFilter()); if (op.testOptions.evalb) { EvalbFormatWriter.InitEVALBfiles(op.tlpParams); } // int[] countByLength = new int[op.testOptions.maxLength+1]; // Use a reflection ruse, so one can run this without needing the // tagger. Using a function rather than a MaxentTagger means we // can distribute a version of the parser that doesn't include the // entire tagger. IFunction <IList <IHasWord>, List <TaggedWord> > tagger = null; if (op.testOptions.preTag) { try { Type[] argsClass = new Type[] { typeof(string) }; object[] arguments = new object[] { op.testOptions.taggerSerializedFile }; tagger = (IFunction <IList <IHasWord>, List <TaggedWord> >)Sharpen.Runtime.GetType("edu.stanford.nlp.tagger.maxent.MaxentTagger").GetConstructor(argsClass).NewInstance(arguments); } catch (Exception e) { log.Info(e); log.Info("Warning: No pretagging of sentences will be done."); } } for (int tNum = 0; tNum < ttSize; tNum++) { Tree tree = testTreebank[tNum]; int testTreeLen = tree_2.Yield().Count; if (testTreeLen > op.testOptions.maxLength) { continue; } Tree binaryTree = binaryTestTrees[tNum]; // countByLength[testTreeLen]++; System.Console.Out.WriteLine("-------------------------------------"); System.Console.Out.WriteLine("Number: " + (tNum + 1)); System.Console.Out.WriteLine("Length: " + testTreeLen); //tree.pennPrint(pw); // System.out.println("XXXX The binary tree is"); // binaryTree.pennPrint(pw); //System.out.println("Here are the tags in the lexicon:"); //System.out.println(lex.showTags()); //System.out.println("Here's the tagnumberer:"); //System.out.println(Numberer.getGlobalNumberer("tags").toString()); long timeMil1 = Runtime.CurrentTimeMillis(); Timing.Tick("Starting parse."); if (op.doPCFG) { //log.info(op.testOptions.forceTags); if (op.testOptions.forceTags) { if (tagger != null) { //System.out.println("Using a tagger to set tags"); //System.out.println("Tagged sentence as: " + tagger.processSentence(cutLast(wordify(binaryTree.yield()))).toString(false)); parser_1.Parse(AddLast(tagger.Apply(CutLast(Wordify(binaryTree.Yield()))))); } else { //System.out.println("Forcing tags to match input."); parser_1.Parse(CleanTags(binaryTree.TaggedYield(), tlp)); } } else { // System.out.println("XXXX Parsing " + binaryTree.yield()); parser_1.Parse(binaryTree.YieldHasWord()); } } //Timing.tick("Done with pcfg phase."); if (op.doDep) { dparser.Parse(binaryTree.YieldHasWord()); } //Timing.tick("Done with dependency phase."); bool bothPassed = false; if (op.doPCFG && op.doDep) { bothPassed = bparser.Parse(binaryTree.YieldHasWord()); } //Timing.tick("Done with combination phase."); long timeMil2 = Runtime.CurrentTimeMillis(); long elapsed = timeMil2 - timeMil1; log.Info("Time: " + ((int)(elapsed / 100)) / 10.00 + " sec."); //System.out.println("PCFG Best Parse:"); Tree tree2b = null; Tree tree2 = null; //System.out.println("Got full best parse..."); if (op.doPCFG) { tree2b = parser_1.GetBestParse(); tree2 = debinarizer.TransformTree(tree2b); } //System.out.println("Debinarized parse..."); //tree2.pennPrint(); //System.out.println("DepG Best Parse:"); Tree tree3 = null; Tree tree3db = null; if (op.doDep) { tree3 = dparser.GetBestParse(); // was: but wrong Tree tree3db = debinarizer.transformTree(tree2); tree3db = debinarizer.TransformTree(tree3); tree3.PennPrint(pw); } //tree.pennPrint(); //((Tree)binaryTrainTrees.get(tNum)).pennPrint(); //System.out.println("Combo Best Parse:"); Tree tree4 = null; if (op.doPCFG && op.doDep) { try { tree4 = bparser.GetBestParse(); if (tree4 == null) { tree4 = tree2b; } } catch (ArgumentNullException) { log.Info("Blocked, using PCFG parse!"); tree4 = tree2b; } } if (op.doPCFG && !bothPassed) { tree4 = tree2b; } //tree4.pennPrint(); if (op.doDep) { depDE.Evaluate(tree3, binaryTree, pw); depTE.Evaluate(tree3db, tree_2, pw); } ITreeTransformer tc = op.tlpParams.Collinizer(); ITreeTransformer tcEvalb = op.tlpParams.CollinizerEvalb(); if (op.doPCFG) { // System.out.println("XXXX Best PCFG was: "); // tree2.pennPrint(); // System.out.println("XXXX Transformed best PCFG is: "); // tc.transformTree(tree2).pennPrint(); //System.out.println("True Best Parse:"); //tree.pennPrint(); //tc.transformTree(tree).pennPrint(); pcfgPE.Evaluate(tc.TransformTree(tree2), tc.TransformTree(tree_2), pw); pcfgCB.Evaluate(tc.TransformTree(tree2), tc.TransformTree(tree_2), pw); Tree tree4b = null; if (op.doDep) { comboDE.Evaluate((bothPassed ? tree4 : tree3), binaryTree, pw); tree4b = tree4; tree4 = debinarizer.TransformTree(tree4); if (op.nodePrune) { NodePruner np = new NodePruner(parser_1, debinarizer); tree4 = np.Prune(tree4); } //tree4.pennPrint(); comboPE.Evaluate(tc.TransformTree(tree4), tc.TransformTree(tree_2), pw); } //pcfgTE.evaluate(tree2, tree); pcfgTE.Evaluate(tcEvalb.TransformTree(tree2), tcEvalb.TransformTree(tree_2), pw); pcfgTEnoPunct.Evaluate(tc.TransformTree(tree2), tc.TransformTree(tree_2), pw); if (op.doDep) { comboTE.Evaluate(tcEvalb.TransformTree(tree4), tcEvalb.TransformTree(tree_2), pw); comboTEnoPunct.Evaluate(tc.TransformTree(tree4), tc.TransformTree(tree_2), pw); } System.Console.Out.WriteLine("PCFG only: " + parser_1.ScoreBinarizedTree(tree2b, 0)); //tc.transformTree(tree2).pennPrint(); tree2.PennPrint(pw); if (op.doDep) { System.Console.Out.WriteLine("Combo: " + parser_1.ScoreBinarizedTree(tree4b, 0)); // tc.transformTree(tree4).pennPrint(pw); tree4.PennPrint(pw); } System.Console.Out.WriteLine("Correct:" + parser_1.ScoreBinarizedTree(binaryTree, 0)); /* * if (parser.scoreBinarizedTree(tree2b,true) < parser.scoreBinarizedTree(binaryTree,true)) { * System.out.println("SCORE INVERSION"); * parser.validateBinarizedTree(binaryTree,0); * } */ tree_2.PennPrint(pw); } // end if doPCFG if (op.testOptions.evalb) { if (op.doPCFG && op.doDep) { EvalbFormatWriter.WriteEVALBline(tcEvalb.TransformTree(tree_2), tcEvalb.TransformTree(tree4)); } else { if (op.doPCFG) { EvalbFormatWriter.WriteEVALBline(tcEvalb.TransformTree(tree_2), tcEvalb.TransformTree(tree2)); } else { if (op.doDep) { EvalbFormatWriter.WriteEVALBline(tcEvalb.TransformTree(tree_2), tcEvalb.TransformTree(tree3db)); } } } } } // end for each tree in test treebank if (op.testOptions.evalb) { EvalbFormatWriter.CloseEVALBfiles(); } // op.testOptions.display(); if (op.doPCFG) { pcfgPE.Display(false, pw); System.Console.Out.WriteLine("Grammar size: " + stateIndex.Size()); pcfgCB.Display(false, pw); if (op.doDep) { comboPE.Display(false, pw); } pcfgTE.Display(false, pw); pcfgTEnoPunct.Display(false, pw); if (op.doDep) { comboTE.Display(false, pw); comboTEnoPunct.Display(false, pw); } } if (op.doDep) { depTE.Display(false, pw); depDE.Display(false, pw); } if (op.doPCFG && op.doDep) { comboDE.Display(false, pw); } }
public void execute() { HashIndex.GenerateHashList("./FullHashlist"); }
private void ProcessFile(string path) { Idstring p_ids = HashIndex.Get(Path.GetFileNameWithoutExtension(path)); var t_ids = new Tuple <Idstring, Idstring, Idstring>(p_ids, new Idstring(0), HashIndex.Get(Path.GetExtension(path))); if (!this._browser.RawFiles.ContainsKey(t_ids)) { this.error_output.Write(string.Format("File with path {0} does not exist!\n", path)); this.error_output.Flush(); return; } FileEntry file = this._browser.RawFiles[t_ids]; if (file.BundleEntries.Count == 0 || this.ExtractedPaths.Contains(p_ids)) { return; } try { if (Path.GetExtension(path) == ".object") { string model_file = Path.Combine(Path.GetDirectoryName(path), Path.GetFileNameWithoutExtension(path)).Replace("\\", "/"); Idstring m_ids = HashIndex.Get(model_file); //error_output.WriteLine(string.Format("Attempt to ouput model file {0}", model_file)); var t_m_ids = new Tuple <Idstring, Idstring, Idstring>(m_ids, new Idstring(0), HashIndex.Get("model")); if (this._browser.RawFiles.ContainsKey(t_m_ids)) { this.WriteFile(this._browser.RawFiles[t_m_ids]); } string cooked_physics = Path.Combine(Path.GetDirectoryName(path), Path.GetFileNameWithoutExtension(path)).Replace("\\", "/"); Idstring c_ids = HashIndex.Get(cooked_physics); var t_c_ids = new Tuple <Idstring, Idstring, Idstring>(c_ids, new Idstring(0), HashIndex.Get("cooked_physics")); //error_output.WriteLine(string.Format("Attempt to ouput cooked_physics file {0}", cooked_physics)); if (this._browser.RawFiles.ContainsKey(t_c_ids)) { this.WriteFile(this._browser.RawFiles[t_c_ids]); } } if (this.FileProcessors.ContainsKey(file._extension.ToString())) { this.FileProcessors[file._extension.ToString()].Invoke(file); } else { this.WriteFile(file); } } catch (Exception exc) { this.error_output.Write("Exception occured on file: {0}\n", file.Path); this.error_output.Write(exc.Message + "\n"); this.error_output.Write(exc.StackTrace + "\n"); this.error_output.Flush(); } }
/// <summary> /// Provides some testing and opportunities for exploration of the /// probabilities of a BaseLexicon. /// </summary> /// <remarks> /// Provides some testing and opportunities for exploration of the /// probabilities of a BaseLexicon. What's here currently probably /// only works for the English Penn Treeebank, as it uses default /// constructors. Of the words given to test on, /// the first is treated as sentence initial, and the rest as not /// sentence initial. /// </remarks> /// <param name="args"> /// The command line arguments: /// java BaseLexicon treebankPath fileRange unknownWordModel words /// </param> public static void Main(string[] args) { if (args.Length < 3) { log.Info("java BaseLexicon treebankPath fileRange unknownWordModel words*"); return; } System.Console.Out.Write("Training BaseLexicon from " + args[0] + ' ' + args[1] + " ... "); Treebank tb = new DiskTreebank(); tb.LoadPath(args[0], new NumberRangesFileFilter(args[1], true)); // TODO: change this interface so the lexicon creates its own indices? IIndex <string> wordIndex = new HashIndex <string>(); IIndex <string> tagIndex = new HashIndex <string>(); Options op = new Options(); op.lexOptions.useUnknownWordSignatures = System.Convert.ToInt32(args[2]); Edu.Stanford.Nlp.Parser.Lexparser.BaseLexicon lex = new Edu.Stanford.Nlp.Parser.Lexparser.BaseLexicon(op, wordIndex, tagIndex); lex.InitializeTraining(tb.Count); lex.Train(tb); lex.FinishTraining(); System.Console.Out.WriteLine("done."); System.Console.Out.WriteLine(); NumberFormat nf = NumberFormat.GetNumberInstance(); nf.SetMaximumFractionDigits(4); IList <string> impos = new List <string>(); for (int i = 3; i < args.Length; i++) { if (lex.IsKnown(args[i])) { System.Console.Out.WriteLine(args[i] + " is a known word. Log probabilities [log P(w|t)] for its taggings are:"); for (IEnumerator <IntTaggedWord> it = lex.RuleIteratorByWord(wordIndex.AddToIndex(args[i]), i - 3, null); it.MoveNext();) { IntTaggedWord iTW = it.Current; System.Console.Out.WriteLine(StringUtils.Pad(iTW, 24) + nf.Format(lex.Score(iTW, i - 3, wordIndex.Get(iTW.word), null))); } } else { string sig = lex.GetUnknownWordModel().GetSignature(args[i], i - 3); System.Console.Out.WriteLine(args[i] + " is an unknown word. Signature with uwm " + lex.GetUnknownWordModel().GetUnknownLevel() + ((i == 3) ? " init" : "non-init") + " is: " + sig); impos.Clear(); IList <string> lis = new List <string>(tagIndex.ObjectsList()); lis.Sort(); foreach (string tStr in lis) { IntTaggedWord iTW = new IntTaggedWord(args[i], tStr, wordIndex, tagIndex); double score = lex.Score(iTW, 1, args[i], null); if (score == float.NegativeInfinity) { impos.Add(tStr); } else { System.Console.Out.WriteLine(StringUtils.Pad(iTW, 24) + nf.Format(score)); } } if (impos.Count > 0) { System.Console.Out.WriteLine(args[i] + " impossible tags: " + impos); } } System.Console.Out.WriteLine(); } }
public void execute(PackageBrowser browser) { HashIndex.GenerateHashList("./FullHashlist"); }
private string GetString(ulong fullHash, HashIndex fileHashIndex, byte[] Data) { var folderHash = fileHashIndex.Parent.hash; var fileHash = fileHashIndex.hash; bool hasFolderHash = false; string folder = ""; if (HashList.ContainsKey(folderHash)) { hasFolderHash = true; folder = $"{HashList[folderHash]}/"; } if (!hasFolderHash) { folder = $"{folderHash.ToString("X")}/"; } string ext = FindMatch(Data); if (ext == ".bntx" || ext == ".bfres" || ext == ".bnsh" || ext == ".bfsha") { string fileName = GetBinaryHeaderName(Data); //Check for matches for shaders if (ext == ".bnsh") { if (FNV64A1.Calculate($"{fileName}.bnsh_fsh") == fileHash) { fileName = $"{fileName}.bnsh_fsh"; } else if (FNV64A1.Calculate($"{fileName}.bnsh_vsh") == fileHash) { fileName = $"{fileName}.bnsh_vsh"; } } else { fileName = $"{fileName}{ext}"; } if (hasFolderHash) { return($"{folder}{fileName}"); } else { return($"{folder}{fileName}[FullHash={fullHash.ToString("X")}]{ext}"); } } else { if (HashList.ContainsKey(fileHash)) { if (hasFolderHash) { return($"{folder}{HashList[fileHash]}"); } else { return($"{folder}{HashList[fileHash]}[FullHash={fullHash.ToString("X")}]{ext}"); } } else { return($"{folder}{fileHash.ToString("X")}[FullHash={fullHash.ToString("X")}]{ext}"); } } }