Пример #1
0
 /// <summary>
 /// Fetches the index of this string If mCreate is true, the entry is created
 /// if it does not exist.
 /// </summary>
 /// <remarks>
 /// Fetches the index of this string If mCreate is true, the entry is created
 /// if it does not exist. If mCreate is true, the count of the entry is
 /// incremented for every get If no entry found throws an exception if
 /// shouldThrow == true
 /// </remarks>
 public virtual int Get(string s, bool shouldThrow)
 {
     StringDictionary.IndexAndCount ic = mDict[s];
     if (mCreate)
     {
         if (ic == null)
         {
             ic       = new StringDictionary.IndexAndCount(mDict.Count, 0);
             mDict[s] = ic;
             mInverse[int.Parse(ic.mIndex)] = s;
         }
         ic.mCount++;
     }
     if (ic != null)
     {
         return(ic.mIndex);
     }
     if (shouldThrow)
     {
         throw new Exception("Unknown entry \"" + s + "\" in dictionary \"" + mName + "\"!");
     }
     else
     {
         return(-1);
     }
 }
Пример #2
0
        /// <summary>Loads all saved dictionary entries from disk</summary>
        /// <exception cref="System.IO.IOException"/>
        public virtual void Load(string path, string prefix)
        {
            string fileName = path + File.separator + prefix + "." + mName;

            using (BufferedReader @is = IOUtils.ReaderFromString(fileName))
            {
                for (string line; (line = @is.ReadLine()) != null;)
                {
                    List <string> tokens = SimpleTokenize.Tokenize(line);
                    if (tokens.Count != 3)
                    {
                        throw new Exception("Invalid dictionary line: " + line);
                    }
                    int index = System.Convert.ToInt32(tokens[1]);
                    int count = System.Convert.ToInt32(tokens[2]);
                    if (index < 0 || count <= 0)
                    {
                        throw new Exception("Invalid dictionary line: " + line);
                    }
                    StringDictionary.IndexAndCount ic = new StringDictionary.IndexAndCount(index, count);
                    mDict[tokens[0]]           = ic;
                    mInverse[int.Parse(index)] = tokens[0];
                }
                log.Info("Loaded " + mDict.Count + " entries for dictionary \"" + mName + "\".");
            }
        }
Пример #3
0
 public virtual StringDictionary.IndexAndCount GetIndexAndCount(string s)
 {
     StringDictionary.IndexAndCount ic = mDict[s];
     if (mCreate)
     {
         if (ic == null)
         {
             ic       = new StringDictionary.IndexAndCount(mDict.Count, 0);
             mDict[s] = ic;
             mInverse[int.Parse(ic.mIndex)] = s;
         }
         ic.mCount++;
     }
     return(ic);
 }
Пример #4
0
        /// <summary>
        /// Saves all dictionary entries that appeared
        /// <literal>&gt;</literal>
        /// threshold times Note: feature
        /// indices are changed to contiguous values starting at 0. This is needed in
        /// order to minimize the memory allocated for the expanded feature vectors
        /// (average perceptron).
        /// </summary>
        /// <exception cref="System.IO.IOException"/>
        public virtual void Save(string path, string prefix, int threshold)
        {
            string     fileName = path + File.separator + prefix + "." + mName;
            TextWriter os       = new TextWriter(new FileOutputStream(fileName));
            int        index    = 0;

            foreach (KeyValuePair <string, StringDictionary.IndexAndCount> entry in mDict)
            {
                StringDictionary.IndexAndCount ic = entry.Value;
                if (ic.mCount > threshold)
                {
                    os.WriteLine(entry.Key + ' ' + index + ' ' + ic.mCount);
                    index++;
                }
            }
            os.Close();
            log.Info("Saved " + index + "/" + mDict.Count + " entries for dictionary \"" + mName + "\".");
        }