示例#1
0
        public override string ToString()
        {
            const string format = "[base: '{0}', normal-form: '{1}', pos: '{2}', {{morpho-form-endings: '{3}'}}]";

            var sb = new StringBuilder();

            foreach (var morphoFormEnding in MorphoFormEndings)
            {
                if (sb.Length != 0)
                {
                    sb.Append(", ");
                }
                sb.Append(StringsHelper.ToString(morphoFormEnding));
            }

            var _base = StringsHelper.ToString(Base);

            /*
             * var normalForm = (_MorphoFormEndings.Length != 0)
             *               ? StringsHelper.CreateWordForm( _Base, _MorphoFormEndings[ 0 ] )
             *               : _base;
             */
            var normalForm = GetNormalForm();

            return(string.Format(format, _base, normalForm, PartOfSpeech, sb.ToString()));
        }
示例#2
0
 private static MModelRecord ToModelRecord(this KeyValuePair <IntPtr, BucketValue> pair)
 {
     return(new MModelRecord()
     {
         Ngram = StringsHelper.ToString(pair.Key),
         WeighByLanguages = new WeighByLanguageEnumerator(pair.Value)
     });
 }
示例#3
0
        private static void ToModelRecord(IntPtr baseIntPtr, out ModelRecord m)
        {
            var s = StringsHelper.ToString(baseIntPtr);

            m = new ModelRecord()
            {
                Ngram = s, Probability = ToProbability(baseIntPtr, s.Length + 1)
            };
        }
示例#4
0
        public static ModelRecord ToModelRecord(IntPtr baseIntPtr)
        {
            var s = StringsHelper.ToString(baseIntPtr);

            return(new ModelRecord()
            {
                Ngram = s, Probability = ToProbability(baseIntPtr, s.Length + 1)
            });
        }
示例#5
0
        private static string ToString(SortedListIntPtrKey <Pair[]> .Tuple[] array)
        {
            var sb = new System.Text.StringBuilder();

            foreach (var a in array)
            {
                sb.Append(StringsHelper.ToString(a.Key)).Append(Environment.NewLine);
            }
            return(sb.ToString());
        }
示例#6
0
 /// <summary>
 /// сохранение морфотипа
 /// </summary>
 private void AddMorphoType2Dictionary(ref MorphoTypeNative_pair_t morphoTypePair)
 {
     if (_MorphoTypesDictionary.ContainsKey(morphoTypePair.Name))
     {
         _ModelLoadingErrorCallback("Duplicated morpho-type", StringsHelper.ToString(morphoTypePair.Name)); //throw (new DuplicatedMorphoTypeException());
     }
     else
     {
         _MorphoTypesDictionary.Add(morphoTypePair.Name, morphoTypePair.MorphoType);
     }
 }
示例#7
0
        public IEnumerable <MModelRecord> GetAllRecords()
        {
            //return (_Dictionary.GetAllModelRecords());

            foreach (var p in _Dictionary)
            {
                yield return(new MModelRecord()
                {
                    Ngram = StringsHelper.ToString(p.Key),
                    WeighByLanguages = ToArrayOfWeighByLanguage(p.Value)
                });                                                                                          //new WeighByLanguageEnumerator( p.Value ) }); //
            }
        }
示例#8
0
            /// <summary>
            /// создание морфотипа из строки
            /// </summary>
            private MorphoTypeNative_pair_t?CreateMorphoTypePair(char *lineBase, int lineLength)
            {
                var index1 = IndexOf(lineBase, COMMA);

                if (index1 == -1)
                {
                    return(null);
                }
                var index2 = IndexAfter_MORPHO_TYPE(lineBase + index1 + 1);

                if (index2 == -1)
                {
                    return(null);
                }

                var partOfSpeech = default(PartOfSpeechEnum);

                if (_EnumParserPartOfSpeech.TryParse(lineBase, index1, ref partOfSpeech))
                {
                    var    startIndex = index1 + 1 + index2 + 1;
                    IntPtr namePtr;
                    AllocHGlobalAndCopy(lineBase + startIndex, lineLength - startIndex, out namePtr);

                    var morphoType     = new MorphoTypeNative(_PartOfSpeechList.GetPartOfSpeech(partOfSpeech));
                    var morphoTypePair = new MorphoTypeNative_pair_t()
                    {
                        Name       = namePtr,
                        MorphoType = morphoType,
                    };
                    return(morphoTypePair);
                }
                else
                {
                    var pos = StringsHelper.ToString(lineBase, index1);
                    _ModelLoadingErrorCallback("Unknown part-of-speech: '" + pos + '\'', StringsHelper.ToString(lineBase));
                }
                return(null);
            }
 public override string ToString()
 {
     return("[" + StringsHelper.ToString(Ending) + ", {" + string.Join(",", (IEnumerable <MorphoAttributePair>)MorphoAttributePairs) + "}]");
 }
示例#10
0
 public override string ToString() => $"'{StringsHelper.ToString( BasePtr + StartIndex, Length )}'";
示例#11
0
 public override string ToString()
 {
     return(StringsHelper.ToString(Start, Length));
 }
示例#12
0
 public override string ToString()
 {
     return("MorphoTypeName: '" + StringsHelper.ToString(MorphoTypeName) +
            "', PartOfSpeech: '" + StringsHelper.ToString(PartOfSpeech) + '\'');
 }
示例#13
0
 public override string ToString()
 {
     return($"[{StringsHelper.ToString(Ending)}, {{{string.Join(",", MorphoAttributePairs)}}}]");
 }
示例#14
0
 public override string ToString() => StringsHelper.ToString(Start, Length);
示例#15
0
            private void LoadModelFilenameContent(LoadModelFilenameContentCallback callbackAction)
            {
                using (var emmf = EnumeratorMMF.Create(ModelFilename))
                {
                    var lineCount = 0;
                    var text      = default(string);
                    var weight    = default(float);
                    var pair      = new Pair()
                    {
                        Language = this.Language
                    };

                    #region [.read first line.]
                    if (!emmf.MoveNext())
                    {
                        return;
                    }
                    #endregion

                    #region [.skip beginning comments.]
                    for ( ; ;)
                    {
                        #region [.check on comment.]
                        if (*emmf.Current.Start != '#')
                        {
                            break;
                        }
                        #endregion

                        #region [.move to next line.]
                        if (!emmf.MoveNext())
                        {
                            return;
                        }
                        #endregion
                    }
                    #endregion

                    #region [.read all lines.]
                    for ( ; ;)
                    {
                        lineCount++;

                        var ns = emmf.Current;

                        #region [.first-value in string.]
                        int startIndex_1  = 0;
                        int finishIndex_2 = ns.Length - 1;

                        #region commented
                        //skip starts white-spaces

                        /*for ( ; ; )
                         * {
                         *  if ( ((_CTM[ ns.Start[ startIndex_1 ] ] & CharType.IsWhiteSpace) != CharType.IsWhiteSpace) ||
                         *       (finishIndex_2 <= ++startIndex_1)
                         *     )
                         *  {
                         *      break;
                         *  }
                         * }*/
                        #endregion
                        //search '\t'
                        int startIndex_2  = 0;
                        int finishIndex_1 = 0;
                        for ( ; ;)
                        {
                            if (ns.Start[finishIndex_1] == '\t')
                            {
                                startIndex_2 = finishIndex_1 + 1;
                                finishIndex_1--;
                                break;
                            }
                            //not found '\t'
                            if (finishIndex_2 <= ++finishIndex_1)
                            {
                                throw (new InvalidDataException(string.Format(INVALIDDATAEXCEPTION_FORMAT_MESSAGE, ModelFilename, lineCount, ns.ToString())));
                            }
                        }
                        //skip ends white-spaces
                        for ( ; ;)
                        {
                            if (((_CTM[ns.Start[finishIndex_1]] & CharType.IsWhiteSpace) != CharType.IsWhiteSpace) ||
                                (--finishIndex_1 <= startIndex_1)
                                )
                            {
                                break;
                            }
                        }

                        if (finishIndex_1 < startIndex_1)
                        {
                            throw (new InvalidDataException(string.Format(INVALIDDATAEXCEPTION_FORMAT_MESSAGE, ModelFilename, lineCount, ns.ToString())));
                        }
                        #endregion

                        #region [.second-value in string.]
                        //skip starts white-spaces
                        for ( ; ;)
                        {
                            if (((_CTM[ns.Start[startIndex_2]] & CharType.IsWhiteSpace) != CharType.IsWhiteSpace) ||
                                (finishIndex_2 <= ++startIndex_2)
                                )
                            {
                                break;
                            }
                        }
                        #region commented
                        //skip ends white-spaces

                        /*for ( ; ; )
                         * {
                         *  if ( ((_CTM[ ns.Start[ finishIndex_2 ] ] & CharType.IsWhiteSpace) != CharType.IsWhiteSpace) ||
                         *       (--finishIndex_2 <= startIndex_1)
                         *     )
                         *  {
                         *      break;
                         *  }
                         * }*/
                        #endregion
                        #endregion

                        #region [.fill 'Pair_v1' & calling 'callbackAction()'.]
                        var len = (finishIndex_2 - startIndex_2) + 1;
                        text = StringsHelper.ToString(ns.Start + startIndex_2, len);

                        if (!float.TryParse(text, NS, NFI, out weight))     //if ( !Number.TryParseSingle( text, NS, NFI, out weight ) )
                        {
                            throw (new InvalidDataException(string.Format(INVALIDDATAEXCEPTION_FORMAT_MESSAGE, ModelFilename, lineCount, ns.ToString())));
                        }

                        len  = (finishIndex_1 - startIndex_1) + 1;
                        text = StringsHelper.ToString(ns.Start + startIndex_1, len);
                        StringsHelper.ToUpperInvariantInPlace(text);

                        pair.Text   = text;
                        pair.Weight = weight;
                        callbackAction(ref pair);
                        #endregion

                        #region [.move to next line.]
                        if (!emmf.MoveNext())
                        {
                            break;
                        }
                        #endregion
                    }
                    #endregion
                }
            }
示例#16
0
 public override string ToString()
 {
     return(StringsHelper.ToString(TextPtr) + ", {" + string.Join("; ", WeightClasses) + '}');
 }
示例#17
0
            private static void LoadModelFilenameContentMMF(string modelFilename, LoadModelFilenameContentMMFCallback callbackAction)
            {
                using (var emmf = EnumeratorMMF.Create(modelFilename))
                {
                    var lineCount        = 0;
                    var text             = default(string);
                    var weight           = default(float);
                    var row              = new ModelRow();
                    var weightClasses    = new List <float>(100);
                    var weightClassesLen = -1;

                    #region [.move to first line.]
                    if (!emmf.MoveNext())
                    {
                        return;
                    }
                    #endregion

                    #region [.skip beginning comments.]
                    for ( ; ;)
                    {
                        lineCount++;

                        #region [.check on comment.]
                        if (*emmf.Current.Start != '#')
                        {
                            break;
                        }
                        #endregion

                        #region [.move to next line.]
                        if (!emmf.MoveNext())
                        {
                            return;
                        }
                        #endregion
                    }
                    #endregion

                    #region [.read all lines.]
                    for ( ; ;)
                    {
                        lineCount++;

                        var ns = emmf.Current;

                        #region [.skip comment.]
                        if (*ns.Start == '#')
                        {
                            #region [.move to next line.]
                            if (!emmf.MoveNext())
                            {
                                break;
                            }
                            #endregion
                            continue;
                        }
                        #endregion

                        #region [.first-value in string.]
                        int startIndex_1  = 0;
                        int finishIndex_2 = ns.Length - 1;

                        //search '\t'
                        int startIndex_2  = 0;
                        int finishIndex_1 = 0;
                        for ( ; ;)
                        {
                            if (ns.Start[finishIndex_1] == TABULATION)
                            {
                                startIndex_2 = finishIndex_1 + 1;
                                finishIndex_1--;
                                break;
                            }
                            //not found '\t'
                            if (finishIndex_2 <= ++finishIndex_1)
                            {
                                throw (new InvalidDataException(string.Format(INVALIDDATAEXCEPTION_FORMAT_MESSAGE, modelFilename, lineCount, ns.ToString())));
                            }
                        }
                        //skip ends white-spaces
                        for ( ; ;)
                        {
                            if (((_CTM[ns.Start[finishIndex_1]] & CharType.IsWhiteSpace) != CharType.IsWhiteSpace) ||
                                (--finishIndex_1 <= startIndex_1)
                                )
                            {
                                break;
                            }
                        }

                        if (finishIndex_1 < startIndex_1)
                        {
                            throw (new InvalidDataException(string.Format(INVALIDDATAEXCEPTION_FORMAT_MESSAGE, modelFilename, lineCount, ns.ToString())));
                        }
                        #endregion

                        #region [.second-value in string.]
                        //tokinize weight-of-classes
                        int len;
                        for ( ; startIndex_2 <= finishIndex_2; startIndex_2++)
                        {
                            //skip starts white-spaces
                            if (((_CTM[ns.Start[startIndex_2]] & CharType.IsWhiteSpace) == CharType.IsWhiteSpace))
                            {
                                continue;
                            }

                            //search end of weight-value
                            for (var si = startIndex_2; ;)
                            {
                                if (((_CTM[ns.Start[startIndex_2]] & CharType.IsWhiteSpace) != CharType.IsWhiteSpace))
                                {
                                    if (finishIndex_2 == startIndex_2)
                                    {
                                        startIndex_2++;
                                    }
                                    else
                                    {
                                        startIndex_2++;
                                        continue;
                                    }
                                }

                                //try parse weight-value
                                len  = (startIndex_2 - si);// +1;
                                text = StringsHelper.ToString(ns.Start + si, len);

                                if (!float.TryParse(text, NS, NFI, out weight))     //if ( !Number.TryParseSingle( text, NS, NFI, out weight ) )
                                {
                                    throw (new InvalidDataException(string.Format(INVALIDDATAEXCEPTION_FORMAT_MESSAGE, modelFilename, lineCount, ns.ToString())));
                                }
                                weightClasses.Add(weight);
                                si = startIndex_2 + 1;

                                break;
                            }
                        }
                        #endregion

                        #region [.fill 'ModelRow' & calling 'callbackAction()'.]
                        if (weightClassesLen == -1)
                        {
                            if (weightClasses.Count == 0)
                            {
                                throw (new InvalidDataException(string.Format(INVALIDDATAEXCEPTION_FORMAT_MESSAGE, modelFilename, lineCount, ns.ToString()) + " => classes weightes not found"));
                            }
                            weightClassesLen = weightClasses.Count;
                        }
                        else if (weightClassesLen != weightClasses.Count)
                        {
                            Debug.WriteLine(string.Format(INVALIDDATAEXCEPTION_FORMAT_MESSAGE, modelFilename, lineCount, ns.ToString()) + " => different count of classes weightes");
                            continue;
                            //throw (new InvalidDataException( string.Format( INVALIDDATAEXCEPTION_FORMAT_MESSAGE, modelFilename, lineCount, ns.ToString() ) + " => different count of classes weightes" ));
                        }

                        row.TextLength = (finishIndex_1 - startIndex_1) + 1;
                        var textPtr = ns.Start + startIndex_1;
                        textPtr[row.TextLength] = '\0';
                        StringsHelper.ToUpperInvariantInPlace(textPtr, row.TextLength);

                        row.TextPtr       = textPtr;
                        row.WeightClasses = weightClasses;

                        callbackAction(ref row);

                        //clear weight-classes temp-buffer
                        weightClasses.Clear();
                        #endregion

                        #region [.move to next line.]
                        if (!emmf.MoveNext())
                        {
                            break;
                        }
                        #endregion
                    }
                    #endregion
                }
            }
示例#18
0
            private void LoadModelFilenameContent(LoadModelFileContentCallback callbackAction)
            {
                using (var emmf = EnumeratorMMF.Create(ModelFilename))
                {
                    var lineCount   = 0;
                    var text        = default(string);
                    var probability = default(double);
                    var pair        = new Pair();

                    #region [.read first line.]
                    if (!emmf.MoveNext())
                    {
                        return;
                    }
                    #endregion

                    #region [.skip beginning comments.]
                    for ( ; ;)
                    {
                        #region [.check on comment.]
                        if (*emmf.Current.Start != '#')
                        {
                            break;
                        }
                        #endregion

                        #region [.move to next line.]
                        if (!emmf.MoveNext())
                        {
                            return;
                        }
                        #endregion
                    }
                    #endregion

                    #region [.read all lines.]
                    for ( ; ;)
                    {
                        lineCount++;

                        var ns = emmf.Current;

                        #region [.first-value in string.]
                        int startIndex_1  = 0;
                        int finishIndex_2 = ns.Length - 1;

                        //search '\t'
                        int startIndex_2  = 0;
                        int finishIndex_1 = 0;
                        for ( ; ;)
                        {
                            if (ns.Start[finishIndex_1] == '\t')
                            {
                                startIndex_2 = finishIndex_1 + 1;
                                finishIndex_1--;
                                break;
                            }
                            //not found '\t'
                            if (finishIndex_2 <= ++finishIndex_1)
                            {
                                throw (new InvalidDataException(string.Format(INVALIDDATAEXCEPTION_FORMAT_MESSAGE, ModelFilename, lineCount, ns.ToString())));
                            }
                        }
                        //skip ends white-spaces
                        for ( ; ;)
                        {
                            if (((_CTM[ns.Start[finishIndex_1]] & CharType.IsWhiteSpace) != CharType.IsWhiteSpace) ||
                                (--finishIndex_1 <= startIndex_1)
                                )
                            {
                                break;
                            }
                        }

                        if (finishIndex_1 < startIndex_1)
                        {
                            throw (new InvalidDataException(string.Format(INVALIDDATAEXCEPTION_FORMAT_MESSAGE, ModelFilename, lineCount, ns.ToString())));
                        }
                        #endregion

                        #region [.second-value in string.]
                        //skip starts white-spaces
                        for ( ; ;)
                        {
                            if (((_CTM[ns.Start[startIndex_2]] & CharType.IsWhiteSpace) != CharType.IsWhiteSpace) ||
                                (finishIndex_2 <= ++startIndex_2)
                                )
                            {
                                break;
                            }
                        }
                        #endregion

                        #region [.fill 'Pair_v2' & calling 'callbackAction()'.]
                        var len = (finishIndex_2 - startIndex_2) + 1;
                        text = StringsHelper.ToString(ns.Start + startIndex_2, len);

                        if (!double.TryParse(text, NS, NFI, out probability))
                        {
                            throw (new InvalidDataException(string.Format(INVALIDDATAEXCEPTION_FORMAT_MESSAGE, ModelFilename, lineCount, ns.ToString())));
                        }

                        pair.TextLength = (finishIndex_1 - startIndex_1) + 1;
                        var textPtr = ns.Start + startIndex_1;
                        textPtr[pair.TextLength] = '\0';
                        StringsHelper.ToUpperInvariantInPlace(textPtr, pair.TextLength);

                        pair.TextPtr     = (IntPtr)textPtr;
                        pair.Probability = probability;

                        callbackAction(ref pair);
                        #endregion

                        #region [.move to next line.]
                        if (!emmf.MoveNext())
                        {
                            break;
                        }
                        #endregion
                    }
                    #endregion
                }
            }
示例#19
0
 public override string ToString() => ($"'{StringsHelper.ToString( TextPtr )}': {Probability}");
示例#20
0
 private static ModelRecord ToModelRecord(this KeyValuePair <IntPtr, double> p) => new ModelRecord()
 {
     Ngram = StringsHelper.ToString(p.Key), Probability = p.Value
 };
示例#21
0
            /// <summary>
            /// создание морфоформы из строки
            /// </summary>
            private MorphoFormNative?CreateMorphoForm(MorphoTypeNative morphoType, char *lineBase)
            {
                #region find index-of-COLON & check on length
                var index = IndexOf(lineBase, COLON);
                if ((index == -1) || (ENDING_BUFFER_SIZE <= index))
                {
                    _ModelLoadingErrorCallback("Index of COLON is undefined or length the line is too long", StringsHelper.ToString(lineBase));
                    return(null);
                }
                #endregion

                #region fill '_ENDING_LOWER_BUFFER'
                var i = 0;
                for (char *ptr = lineBase; i < index; ptr++, i++)
                {
                    var ch = *ptr;
                    if ((_CHARTYPE_MAP[ch] & CharType.IsWhiteSpace) == CharType.IsWhiteSpace)
                    {
                        break;
                    }
                    _ENDING_LOWER_BUFFER[i] = _LOWER_INVARIANT_MAP[ch];
                }
                _ENDING_LOWER_BUFFER[i] = '\0';
                #endregion

                #region fill '_MorphoAttributePairs_Buffer'
                _MorphoAttributePairs_Buffer.Clear();
                for (char *ptr = lineBase + index + 1; ; ptr++)
                {
                    var ch = *ptr;
                    if (ch == '\0')
                    {
                        break;
                    }

                    if ((_CHARTYPE_MAP[ch] & CharType.IsLetter) != CharType.IsLetter)
                    {
                        continue;
                    }

                    var len = 0;
                    for (; ; ptr++)
                    {
                        ch = *ptr;
                        if (ch == '\0')
                        {
                            break;
                        }
                        var ct = _CHARTYPE_MAP[ch];
                        if ((ct & CharType.IsLetter) != CharType.IsLetter &&
                            (ct & CharType.IsDigit) != CharType.IsDigit)
                        {
                            break;
                        }
                        len++;
                    }
                    if (len != 0)
                    {
                        var morphoAttribute = default(MorphoAttributeEnum);
                        if (_EnumParserMorphoAttribute.TryParse(ptr - len, len, ref morphoAttribute))
                        {
                            var map = _MorphoAttributeList.TryGetMorphoAttributePair(morphoType.MorphoAttributeGroup, morphoAttribute);
                            if (map.HasValue)
                            {
                                _MorphoAttributePairs_Buffer.Add(map.Value);
                            }
#if DEBUG
                            //*
                            //TOO MANY ERRORS AFTER last (2016.12.28) getting morpho-dcitionaries from 'LangAnalyzerStd-[ilook]'
                            else
                            {
                                var attr = new string( ptr - len, 0, len );
                                _ModelLoadingErrorCallback("Error in morpho-attribute: '" + attr + '\'', StringsHelper.ToString(lineBase));
                            }
                            //*/
#endif
                        }
                        else
                        {
                            var attr = new string(ptr - len, 0, len);
                            _ModelLoadingErrorCallback("Unknown morpho-attribute: '" + attr + '\'', StringsHelper.ToString(lineBase));
                        }
                    }

                    if (ch == '\0')
                    {
                        break;
                    }
                }
                #endregion

                #region Allocate native-memory for endingOfWord
                //*
                IntPtr endingPtr;
                IntPtr endingUpperPtr;
                if ((i == 1) && (_ENDING_LOWER_BUFFER[0] == UNDERLINE))
                {
                    endingPtr      = _EMPTY_STRING;
                    endingUpperPtr = _EMPTY_STRING;
                }
                else
                {
                    #region ending-in-original-case
                    endingPtr = new IntPtr(_ENDING_LOWER_BUFFER);

                    if (_EndingDictionary.TryGetValue(endingPtr, out IntPtr existsPtr))
                    {
                        endingPtr = existsPtr;
                    }
                    else
                    {
                        AllocHGlobalAndCopy(_ENDING_LOWER_BUFFER, index, out endingPtr);
                        _EndingDictionary.Add(endingPtr, endingPtr);
                    }
                    #endregion

                    #region ending-in-upper-case
                    StringsHelper.ToUpperInvariant(_ENDING_LOWER_BUFFER, _ENDING_UPPER_BUFFER);

                    endingUpperPtr = new IntPtr(_ENDING_UPPER_BUFFER);

                    if (_EndingDictionary.TryGetValue(endingUpperPtr, out existsPtr))
                    {
                        endingUpperPtr = existsPtr;
                    }
                    else
                    {
                        AllocHGlobalAndCopy(_ENDING_UPPER_BUFFER, index, out endingUpperPtr);
                        _EndingDictionary.Add(endingUpperPtr, endingUpperPtr);
                    }
                    #endregion
                }
                #endregion

                var morphoForm = new MorphoFormNative((char *)endingPtr, (char *)endingUpperPtr, _MorphoAttributePairs_Buffer);
                return(morphoForm);
            }
 public override string ToString() => ("[" + StringsHelper.ToString(EndingUpper) + ", {" + string.Join(",", MorphoAttributes) + "}]");
示例#23
0
 public override string ToString()
 {
     return(StringsHelper.ToString(TextPtr) + ", " + Weight + ", " + Language);
 }