/// <summary> /// Loads all proximity classes from the hard disk The WORDS map must be /// created before! /// </summary> /// <exception cref="System.IO.IOException"/> public static void LoadProximityClasses(string proxFileName) { log.Info("Loading proximity classes..."); BufferedReader @in = null; try { @in = new BufferedReader(new FileReader(proxFileName)); } catch (IOException) { log.Info("Warning: no proximity database found."); return; } string line; while ((line = @in.ReadLine()) != null) { List <string> tokens = SimpleTokenize.Tokenize(line); if (tokens.Count > 0) { int key = Words.Get(tokens[0]); List <int> value = new List <int>(); for (int i = 0; i < tokens.Count && i < ProximityClassSize; i++) { int word = Words.Get(tokens[i]); value.Add(word); } ProxClasses[key] = value; } } @in.Close(); log.Info("Finished loading proximity classes."); }
/// <summary>Loads one dictionary from disk</summary> /// <exception cref="Java.IO.FileNotFoundException"/> /// <exception cref="System.IO.IOException"/> private static void LoadDictionary(IDictionary <string, string> dict, string file) { BufferedReader @in = new BufferedReader(new FileReader(file)); string line; while ((line = @in.ReadLine()) != null) { List <string> tokens = SimpleTokenize.Tokenize(line); if (tokens.Count > 0) { string lower = tokens[0].ToLower(); if (tokens.Count == 1) { dict[lower] = "true"; } else { dict[lower] = tokens[1]; } } } }
/// <exception cref="System.IO.IOException"/> private void ReadPredictedEntityBoundaries(BufferedReader @is) { // System.out.println("Reading boundaries from file: " + mPrefix); // // read Massi's B-ENT, I-ENT, or O labels // List <string> labels = new List <string>(); string line; while ((line = @is.ReadLine()) != null) { List <string> tokens = SimpleTokenize.Tokenize(line); if (tokens.IsEmpty() == false) { labels.Add(tokens[0]); } } System.Diagnostics.Debug.Assert((labels.Count == mTokens.Count)); int entityId = 1; // // traverse the label array and create entities as needed // for (int i = 0; i < labels.Count; i++) { // System.out.println(labels.get(i)); if (labels[i].StartsWith("B-") || labels[i].StartsWith("I-")) { // Massi's // ents // may // start // with // I-ENT int startToken = i; int endToken = i + 1; while (endToken < labels.Count && labels[endToken].StartsWith("I-")) { endToken++; } // // Set the type/subtype to whatever Massi predicted // This is not directly used in this system. It is needed only // to generate the APF files with Massi info, which are needed // by Edgar. Otherwise type/subtype could be safely set to "none". // string label = labels[startToken]; int dash = label.IndexOf("-", 2); if (dash <= 2 || dash >= label.Length) { throw new Exception(label); } System.Diagnostics.Debug.Assert((dash > 2 && dash < label.Length - 1)); string type = Sharpen.Runtime.Substring(label, 2, dash); string subtype = Sharpen.Runtime.Substring(label, dash + 1); /* * String type = "none"; String subtype = "none"; */ // create a new entity between [startToken, endToken) MakeEntity(startToken, endToken, entityId, type, subtype); // skip over this entity i = endToken - 1; entityId++; } else { System.Diagnostics.Debug.Assert((labels[i].Equals("O"))); } } }