// NOTE: Currently this code is the same as the IPADIC dictionary entry parser,
        // which is okay for all the dictionaries supported so far...
        public GenericDictionaryEntry parse(String entry)
        {
            string[] fields = ParseLine(entry);

            string surface  = fields[0];
            short  leftId   = short.Parse(fields[1]);
            short  rightId  = short.Parse(fields[2]);
            short  wordCost = short.Parse(fields[3]);

            string[] pos = new string[6];
            Array.Copy(fields, 4, pos, 0, pos.Length);

            string[] features = new string[fields.Length - 10];
            Array.Copy(fields, 10, features, 0, features.Length);

            GenericDictionaryEntry.Builder builder = new GenericDictionaryEntry.Builder()
            {
                Surface              = surface,
                LeftId               = leftId,
                RightId              = rightId,
                WordCost             = wordCost,
                PartOfSpeechFeatures = pos,
                OtherFeatures        = features
            };

            GenericDictionaryEntry dictionaryEntry = new GenericDictionaryEntry(builder);

            return(dictionaryEntry);
        }
示例#2
0
        public void ReadTokenInfo(Stream input)
        {
            try
            {
                input.Position = 0;
                var reader     = new StreamReader(input, Encoding.GetEncoding(encoding));
                int entryCount = posInfo.GetEntryCount();

                while (!reader.EndOfStream)
                {
                    T entry = Parse(reader.ReadLine().RemapCharIfNeeded());

                    GenericDictionaryEntry dictionaryEntry = MakeGenericDictionaryEntry(entry);

                    short leftId   = dictionaryEntry.GetLeftId();
                    short rightId  = dictionaryEntry.GetRightId();
                    short wordCost = dictionaryEntry.GetWordCost();

                    string[] allPosFeatures = dictionaryEntry.GetPartOfSpeechFeatures();

                    List <int> posFeatureIds = posInfo.MapFeatures(allPosFeatures);

                    string[]   featureList     = dictionaryEntry.GetOtherFeatures();
                    List <int> otherFeatureIds = otherInfo.MapFeatures(featureList);

                    BufferEntry bufferEntry = new BufferEntry();
                    bufferEntry.TokenInfo.Add(leftId);
                    bufferEntry.TokenInfo.Add(rightId);
                    bufferEntry.TokenInfo.Add(wordCost);

                    if (EntriesFitInAByte(entryCount))
                    {
                        List <Byte> posFeatureIdBytes = CreatePosFeatureIds(posFeatureIds);
                        bufferEntry.PosInfo.AddRange(posFeatureIdBytes);
                    }
                    else
                    {
                        foreach (int posFeatureId in posFeatureIds)
                        {
                            bufferEntry.TokenInfo.Add((short)posFeatureId);
                        }
                    }

                    bufferEntry.Features.AddRange(otherFeatureIds);

                    bufferEntries.Add(bufferEntry);
                    surfaces.Add(dictionaryEntry.GetSurface());

                    if (dictionaryEntries != null)
                    {
                        dictionaryEntries.Add(dictionaryEntry);
                    }
                }
            }
            catch (IOException ex)
            {
                throw new IOException("TokenInfoDictionaryCompilerBase.AnalyzeTokenInfo: " + ex.Message);
            }
        }
示例#3
0
        public int[][] MakeCosts()
        {
            int[][] costs = new int[dictionaryEntries.Count][];

            for (int i = 0; i < dictionaryEntries.Count; i++)
            {
                GenericDictionaryEntry entry = dictionaryEntries[i];

                costs[i] = new int[] { entry.GetLeftId(), entry.GetRightId(), entry.GetWordCost() };
            }

            return(costs);
        }
示例#4
0
        public int[] GetEntryIndices(String surface)
        {
            List <int> indices = new List <int>();

            for (int i = 0; i < dictionaryEntries.Count; i++)
            {
                GenericDictionaryEntry entry = dictionaryEntries[i];

                if (entry.GetSurface().Equals(surface))
                {
                    indices.Add(i);
                }
            }

            return(ToArray(indices));
        }
示例#5
0
        public string[][] MakeFeatures()
        {
            string[][] features = new String[dictionaryEntries.Count][];

            for (int i = 0; i < dictionaryEntries.Count; i++)
            {
                GenericDictionaryEntry entry = dictionaryEntries[i];

                List <string> tmp = new List <string>();
                tmp.AddRange(entry.GetPartOfSpeechFeatures());
                tmp.AddRange(entry.GetOtherFeatures());

                features[i] = tmp.ToArray();
            }

            return(features);
        }
示例#6
0
        public void AnalyzeTokenInfo(Stream input)
        {
            try
            {
                input.Position = 0;
                var    reader = new StreamReader(input, Encoding.GetEncoding(encoding));
                string line;
                while (!reader.EndOfStream)
                {
                    line = reader.ReadLine().RemapCharIfNeeded();
                    T entry = Parse(line);

                    GenericDictionaryEntry dictionaryEntry = MakeGenericDictionaryEntry(entry);
                    posInfo.MapFeatures(dictionaryEntry.GetPartOfSpeechFeatures());
                }
            }
            catch (IOException ex)
            {
                throw new IOException("TokenInfoDictionaryCompilerBase.AnalyzeTokenInfo: " + ex.Message);
            }
        }
示例#7
0
        public void ReadUnknownDefinition(Stream input, String encoding)
        {
            try
            {
                input.Position = 0;
                using (var reader = new StreamReader(input, Encoding.GetEncoding(encoding)))
                {
                    UnknownDictionaryEntryParser parser = new UnknownDictionaryEntryParser();

                    while (!reader.EndOfStream)
                    {
                        GenericDictionaryEntry entry = parser.parse(reader.ReadLine().RemapCharIfNeeded());

                        dictionaryEntries.Add(entry);
                    }
                }
            }
            catch (IOException ex)
            {
                throw new IOException("UnknownDictionaryCompiler.ReadUnknownDefinition: " + ex.Message);
            }
        }