Dictionary represented by a text file.

Format allowed: 1 entry per line:

An entry can be:

suggestion suggestion fieldDelimiter weight suggestion fieldDelimiter weight fieldDelimiter payload where the default fieldDelimiter is DEFAULT_FIELD_DELIMITER (a tab)

NOTE: In order to have payload enabled, the first entry has to have a payload If the weight for an entry is not specified then a value of 1 is used A payload cannot be specified without having the weight specified for an entry If the payload for an entry is not specified (assuming payload is enabled) then an empty payload is returned An entry cannot have more than two fieldDelimiters

Example: word1 word2 TAB 100 TAB payload1 word3 TAB 101 word4 word3 TAB 102
Inheritance: IDictionary
示例#1
0
        public void TestFileWithDifferentDelimiter()
        {
            KeyValuePair <List <List <string> >, string> fileInput = generateFileInput(AtLeast(100), " , ", true, true);
            Stream                inputReader = new MemoryStream(fileInput.Value.getBytes(Encoding.UTF8));
            FileDictionary        dictionary  = new FileDictionary(inputReader, " , ");
            List <List <string> > entries     = fileInput.Key;
            IInputIterator        inputIter   = dictionary.GetEntryIterator();

            assertTrue(inputIter.HasPayloads);
            BytesRef term;
            int      count = 0;

            while ((term = inputIter.Next()) != null)
            {
                assertTrue(entries.size() > count);
                List <string> entry = entries[count];
                assertTrue(entry.size() >= 2); // at least term and weight
                assertEquals(entry[0], term.Utf8ToString());
                assertEquals(long.Parse(entry[1], CultureInfo.InvariantCulture), inputIter.Weight);
                if (entry.size() == 3)
                {
                    assertEquals(entry[2], inputIter.Payload.Utf8ToString());
                }
                else
                {
                    assertEquals(inputIter.Payload.Length, 0);
                }
                count++;
            }
            assertEquals(count, entries.size());
        }
示例#2
0
 internal FileIterator(FileDictionary outerInstance)
 {
     this.outerInstance = outerInstance;
     outerInstance.line = [email protected]();
     if (outerInstance.line == null)
     {
         outerInstance.done = true;
         IOUtils.Close(outerInstance.@in);
     }
     else
     {
         string[] fields = outerInstance.line.Split(new string[] { outerInstance.fieldDelimiter }, StringSplitOptions.RemoveEmptyEntries);
         if (fields.Length > 3)
         {
             throw new System.ArgumentException("More than 3 fields in one line");
         } // term, weight, payload
         else if (fields.Length == 3)
         {
             hasPayloads = true;
             spare.CopyChars(fields[0]);
             ReadWeight(fields[1]);
             curPayload.CopyChars(fields[2]);
         } // term, weight
         else if (fields.Length == 2)
         {
             spare.CopyChars(fields[0]);
             ReadWeight(fields[1]);
         } // only term
         else
         {
             spare.CopyChars(fields[0]);
             curWeight = 1;
         }
     }
 }
示例#3
0
        public void TestFileWithOneEntry()
        {
            KeyValuePair <IList <IList <string> >, string> fileInput = generateFileInput(1, FileDictionary.DEFAULT_FIELD_DELIMITER, true, true);
            Stream                  inputReader = new MemoryStream(fileInput.Value.getBytes(Encoding.UTF8));
            FileDictionary          dictionary  = new FileDictionary(inputReader);
            IList <IList <string> > entries     = fileInput.Key;
            IInputEnumerator        inputIter   = dictionary.GetEntryEnumerator();

            assertTrue(inputIter.HasPayloads);
            int count = 0;

            while (inputIter.MoveNext())
            {
                assertTrue(entries.size() > count);
                IList <string> entry = entries[count];
                assertTrue(entry.size() >= 2); // at least term and weight
                assertEquals(entry[0], inputIter.Current.Utf8ToString());
                assertEquals(long.Parse(entry[1], CultureInfo.InvariantCulture), inputIter.Weight);
                if (entry.size() == 3)
                {
                    assertEquals(entry[2], inputIter.Payload.Utf8ToString());
                }
                else
                {
                    assertEquals(inputIter.Payload.Length, 0);
                }
                count++;
            }
            assertEquals(count, entries.size());
        }
 public void TestFileWithTerm()
 {
     KeyValuePair<List<List<string>>, string> fileInput = generateFileInput(AtLeast(100), FileDictionary.DEFAULT_FIELD_DELIMITER, false, false);
     Stream inputReader = new MemoryStream(fileInput.Value.getBytes(Encoding.UTF8));
     FileDictionary dictionary = new FileDictionary(inputReader);
     List<List<string>> entries = fileInput.Key;
     IInputIterator inputIter = dictionary.EntryIterator;
     assertFalse(inputIter.HasPayloads);
     BytesRef term;
     int count = 0;
     while ((term = inputIter.Next()) != null)
     {
         assertTrue(entries.size() > count);
         List<string> entry = entries[count];
         assertTrue(entry.size() >= 1); // at least a term
         assertEquals(entry[0], term.Utf8ToString());
         assertEquals(1, inputIter.Weight);
         assertNull(inputIter.Payload);
         count++;
     }
     assertEquals(count, entries.size());
 }
示例#5
0
        public void TestFileWithTerm()
        {
            KeyValuePair <IList <IList <string> >, string> fileInput = generateFileInput(AtLeast(100), FileDictionary.DEFAULT_FIELD_DELIMITER, false, false);
            Stream                  inputReader = new MemoryStream(fileInput.Value.getBytes(Encoding.UTF8));
            FileDictionary          dictionary  = new FileDictionary(inputReader);
            IList <IList <string> > entries     = fileInput.Key;
            IInputEnumerator        inputIter   = dictionary.GetEntryEnumerator();

            assertFalse(inputIter.HasPayloads);
            int count = 0;

            while (inputIter.MoveNext())
            {
                assertTrue(entries.size() > count);
                IList <string> entry = entries[count];
                assertTrue(entry.size() >= 1); // at least a term
                assertEquals(entry[0], inputIter.Current.Utf8ToString());
                assertEquals(1, inputIter.Weight);
                assertNull(inputIter.Payload);
                count++;
            }
            assertEquals(count, entries.size());
        }
示例#6
0
        public void TestFileWithWeight()
        {
            KeyValuePair <List <List <string> >, string> fileInput = generateFileInput(AtLeast(100), FileDictionary.DEFAULT_FIELD_DELIMITER, true, false);
            Stream                inputReader = new MemoryStream(fileInput.Value.getBytes(Encoding.UTF8));
            FileDictionary        dictionary  = new FileDictionary(inputReader);
            List <List <String> > entries     = fileInput.Key;
            IInputIterator        inputIter   = dictionary.GetEntryIterator();

            assertFalse(inputIter.HasPayloads);
            BytesRef term;
            int      count = 0;

            while ((term = inputIter.Next()) != null)
            {
                assertTrue(entries.size() > count);
                List <String> entry = entries[count];
                assertTrue(entry.size() >= 1); // at least a term
                assertEquals(entry[0], term.Utf8ToString());
                assertEquals((entry.size() == 2) ? long.Parse(entry[1], CultureInfo.InvariantCulture) : 1, inputIter.Weight);
                assertNull(inputIter.Payload);
                count++;
            }
            assertEquals(count, entries.size());
        }
示例#7
0
 internal FileIterator(FileDictionary outerInstance)
 {
     this.outerInstance = outerInstance;
     outerInstance.line = [email protected]();
     if (outerInstance.line == null)
     {
         outerInstance.done = true;
         IOUtils.Close(outerInstance.@in);
     }
     else
     {
         string[] fields = outerInstance.line.Split(outerInstance.fieldDelimiter, true);
         if (fields.Length > 3)
         {
             throw new System.ArgumentException("More than 3 fields in one line");
         } // term, weight, payload
         else if (fields.Length == 3)
         {
             hasPayloads = true;
             spare.CopyChars(fields[0]);
             ReadWeight(fields[1]);
             curPayload.CopyChars(fields[2]);
         } // term, weight
         else if (fields.Length == 2)
         {
             spare.CopyChars(fields[0]);
             ReadWeight(fields[1]);
         } // only term
         else
         {
             spare.CopyChars(fields[0]);
             curWeight = 1;
         }
     }
 }
 public void TestFileWithDifferentDelimiter()
 {
     KeyValuePair<List<List<string>>, string> fileInput = generateFileInput(AtLeast(100), " , ", true, true);
     Stream inputReader = new MemoryStream(fileInput.Value.getBytes(Encoding.UTF8));
     FileDictionary dictionary = new FileDictionary(inputReader, " , ");
     List<List<string>> entries = fileInput.Key;
     IInputIterator inputIter = dictionary.EntryIterator;
     assertTrue(inputIter.HasPayloads);
     BytesRef term;
     int count = 0;
     while ((term = inputIter.Next()) != null)
     {
         assertTrue(entries.size() > count);
         List<string> entry = entries[count];
         assertTrue(entry.size() >= 2); // at least term and weight
         assertEquals(entry[0], term.Utf8ToString());
         assertEquals(long.Parse(entry[1]), inputIter.Weight);
         if (entry.size() == 3)
         {
             assertEquals(entry[2], inputIter.Payload.Utf8ToString());
         }
         else
         {
             assertEquals(inputIter.Payload.Length, 0);
         }
         count++;
     }
     assertEquals(count, entries.size());
 }