예제 #1
0
        public static Lib.Data.Firma FirmaInText(string text)
        {
            if (string.IsNullOrEmpty(text))
            {
                return(null);
            }

            string value = Devmasters.TextUtil.RemoveDiacritics(TextUtil.NormalizeToBlockText(text).ToLower());

            foreach (var k in Firma.Koncovky.Select(m => m.ToLower()).OrderByDescending(m => m.Length))
            {
                if (value.Contains(k))
                {
                    value = value.Replace(k, k.Replace(' ', (char)160)); //nahrad mezery char(160) - non breaking space, aby to tvorilo 1 slovo
                }
                else if (k.EndsWith(".") && value.EndsWith(k.Substring(0, k.Length - 1)))
                {
                    value = value.Replace(k.Substring(0, k.Length - 1), k.Replace(' ', (char)160)); //nahrad mezery char(160) - non breaking space, aby to tvorilo 1 slovo
                }
            }
            //find company name
            string[] words = value.Split(new char[] { ' ', ',', ';' }, StringSplitOptions.RemoveEmptyEntries);

            //get back space instead of #160
            words = words.Select(m => m.Replace((char)160, ' ')).ToArray();

            for (int firstWord = 0; firstWord < words.Length; firstWord++)
            {
                for (int skipWord = 0; skipWord < words.Length - firstWord; skipWord++)
                {
                    string[] cutWords = words.Skip(firstWord)               //preskoc slovo na zacatku
                                        .Reverse().Skip(skipWord).Reverse() // a ubirej od konce
                                        .ToArray();
                    string wordCombination = cutWords.Aggregate((f, s) => f + " " + s);
                    string koncovka;
                    string firmaBezKoncovky = Lib.Data.Firma.JmenoBezKoncovkyFull(wordCombination, out koncovka);
                    string simpleName       = Devmasters.TextUtil.RemoveDiacritics(firmaBezKoncovky).ToLower().Trim();
                    //+ "|" + koncovka;


                    if (firmaBezKoncovky.Length > 3 &&
                        StaticData.FirmyNazvyOnlyAscii.Get().ContainsKey(simpleName)
                        )
                    {
                        //nasel jsem ico?
                        foreach (var ico in StaticData.FirmyNazvyOnlyAscii.Get()[simpleName])
                        {
                            Firma f = Firmy.Get(ico); //TODO StaticData.FirmyNazvyAscii.Get()[simpleName]);
                            if (f.Valid)
                            {
                                var firmaFromText = TextUtil.ReplaceDuplicates(System.Text.RegularExpressions.Regex.Replace(wordCombination, @"[,;_""']", " ", defaultRegexOptions), ' ');
                                var firmaFromDB   = TextUtil.ReplaceDuplicates(System.Text.RegularExpressions.Regex.Replace(f.Jmeno, @"[,;_""']", " ", defaultRegexOptions), ' ');
                                var rozdil        = LevenshteinDistanceCompute(
                                    TextUtil.RemoveDiacritics(firmaFromDB).ToLower(),
                                    firmaFromText.ToLower()
                                    );
                                var fKoncovka = f.KoncovkaFirmy();
                                var nextWord  = "";
                                if (firstWord + cutWords.Length < words.Length - 1)
                                {
                                    nextWord = words[firstWord + cutWords.Length];
                                }

                                if (string.IsNullOrEmpty(fKoncovka))
                                {
                                    return(f);
                                }
                                if (!string.IsNullOrEmpty(fKoncovka) && LevenshteinDistanceCompute(cutWords.Last(), fKoncovka) < 2)
                                {
                                    return(f);
                                }
                                if (!string.IsNullOrEmpty(fKoncovka) && LevenshteinDistanceCompute(nextWord, fKoncovka) < 2)
                                {
                                    return(f);
                                }
                            }
                        }
                        //looking for next
                        //return null;
                    }
                }
            }

            return(null);
        }