Ejemplo n.º 1
0
        public static void TestParser(string[] srcNameList, List <string> trgNameList)
        {
            var sbp = new StatsBasedParser(trgNameList);

            //int z = 0;			foreach (var srcName in srcNameList)			{				Debug.WriteLine("{0}", ++z);				foreach (var m in sbp.ClosestMatches(srcName))					Debug.WriteLine("{0,64} => {1}", srcName, m);			}
            if (Debugger.IsAttached)
            {
                Debugger.Break();
            }
        }
Ejemplo n.º 2
0
        public List <string> ClosestMatches(string src)
        {
            //wp>Debug.WriteLine(src, "\n::");

            var matches = new List <string>();

            //0: if there is an exact match:
            if (_trgList.Contains(src, StringComparer.OrdinalIgnoreCase))             //tu: !!!!!!!!! //todo: remove ToLower from word stats view !!!!!!!
            {
                var exactMatch = _trgList.FirstOrDefault(r => string.Compare(r, src, true) == 0);
                MatchBaseCsv = exactMatch;
                matches.Add(exactMatch);
                return(matches);
            }

            //1a: order by FoU:
            var wa = src.Split(OlpPresets.DelimAll, StringSplitOptions.RemoveEmptyEntries);

            var srcWordsByFoU = GetWordStats(wa, _wordStatCollection);

            //1: if there is a SINGLE match by a single word with Fou==1:
            if (srcWordsByFoU.Count(r => r.Usage == 1) == 1)
            {
                var singleUseWord = srcWordsByFoU.First(r => r.Usage == 1);
                var trgMatch      = _trgList.First(r => r.ToLower().Contains(singleUseWord.Word.ToLower()));
                matches.Add(trgMatch);
                MatchBaseCsv = string.Join("·", srcWordsByFoU.Select(r => r.Word));
                return(matches);
            }

            //1: if there is more than 1 match by a single word with Fou==1:
            if (srcWordsByFoU.Count(r => r.Usage == 1) > 1)
            {
                foreach (var matchByFoU1 in srcWordsByFoU.Where(r => r.Usage == 1))
                {
                    if (!matches.Contains(matchByFoU1.Word, StringComparer.OrdinalIgnoreCase))
                    {
                        matches.Add(matchByFoU1.Word);
                    }
                }
                Debug.Assert(matches.Count == 1, "Need more wits to find a better way to tell which one matches");
                MatchBaseCsv = string.Join("·", srcWordsByFoU.Select(r => r.Word));
                return(matches);
            }

            //3: if contains all the words from src name, ignoring the org order
            var matchByAllWords = _trgList.Where(r => StatsBasedParser.ContainsAllWordsOpt(r, wa));

            if (matchByAllWords.Any())
            {
                MatchBaseCsv = string.Join("·", wa);
                return(matchByAllWords.ToList());
            }

            //3: if contains the words from src name and stats array
            if (srcWordsByFoU.Count() > 1)
            {
                for (int i = srcWordsByFoU.Count(); i > 0; i--)
                {
                    var notAllOrdered = srcWordsByFoU.OrderBy(r => r.Usage).Take(i).Select(r => r.Word).ToArray();

                    var matchBy1stWords = _trgList.Where(r => StatsBasedParser.ContainsAllWordsOpt(r, notAllOrdered));
                    if (matchBy1stWords.Any())
                    {
                        MatchBaseCsv = string.Join("·", notAllOrdered);
                        return(matchBy1stWords.ToList());
                    }
                }
            }

            return(matches);
        }