Пример #1
0
            public static string[] Prepare(string query)
            {
                List<string> preparedQueries = new List<string>();
                string parasitesFile = Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.CommonApplicationData), ConfigurationManager.AppSettings["DataFolder"], "MoreByThisAuthorParasites.txt");
                var initialParasites = new List<String>()
                    {
                        "(compilador)",
                        //"a.",
                        //"and",
                        //"and others",
                        "annotated",
                        "artist",
                        "artistic",
                        "associate",
                        "authors",
                        "author:",
                        "author",
                        //"b.",
                        //"br",
                        //"br.",
                        "bishop",
                        " by ",
                        //"c.",
                        "c.c.v.i.",
                        "c.h.s.",
                        "c.i.c.m.",
                        "c.n.d.",
                        "c.p.",
                        "c.pp.s.",
                        "c.s.c.",
                        "c.s.j.",
                        "c.s.sp.",
                        "c.ss.r.",
                        "ccvi",
                        //"chs",
                        //"cicm",
                        //"cnd",
                        "commentary author:",
                        "commentary",
                        "compiladors",
                        "compilador",
                        "compiler",
                        "contributors",
                        "contributor",
                        //"cp",
                        "cpps",
                        //"csc",
                        "csj",
                        "cssp",
                        "cssr",
                        " csv,",
                        ", csv",
                        //"d.",
                        "designers",
                        "designer",
                        "directors",
                        "director",
                        //"dr",
                        //"dr.",
                        //"e.",
                        "edited",
                        "editors",
                        "editor asociado",
                        "editor",
                        "editora",
                        //"f.",
                        "forewords",
                        " foreword ",
                        "foreword",
                        //"fr",
                        //"fr.",
                        "galatians",
                        "general",
                        "guide",
                        //"h.",
                        //"i.",
                        "i.h.m.",
                        "ihm",
                        "iii",
                        "illustrators",
                        "illuminator",
                        "illustrator",
                        "introduced",
                        "introduction",
                        //"j.",
                        //"jr",
                        ", jr.",
                        //"k.",
                        //"l.",
                        "lecturers",
                        "lecturer",
                        //"m.",
                        //"m.s.",
                        //"map",
                        //"ms",
                        //"n.",
                        "notes",
                        "o carm",
                        //"ocarm",
                        //"o.",
                        "o.c.s.o.",
                        "o.carm.",
                        "o.f.m.",
                        "o.f.m.cap.",
                        "o.f.m.conv.",
                        "o.m.i.",
                        "o.p.",
                        "o.s.a.",
                        "o.s.b.",
                        "o.s.c.",
                        "o.s.f.",
                        "o.s.u.",
                        "obl.s.b.",
                        "oblsb",
                        "ocso ",
                        " ocso",
                        "ocso",
                        "ofm cap",
                        "ofm conv",
                        //"ofm",
                        " omi ",
                        " omi",
                        ", op",
                        "organizadoras",
                        " osa",
                        "osa ",
                        ", osb",
                        " osb",
                        "osb ",
                        //"osc",
                        "osf ",
                        " osf",
                        //"osu",
                        //"p.",
                        "phd",
                        "ph d",
                        "ph.d.",
                        //"phd",
                        "preface",
                        //"q.",
                        //"r.",
                        "r.s.c.j.",
                        "revised",
                        "romans",
                        "rscj",
                        //"rsm",
                        //"s.",
                        "s.a.",
                        " s.c.",
                        "s.c. ",
                        "s.c.",
                        "s.c.l.",
                        "s.c.n.",
                        "s.d.b.",
                        "s.g.s.",
                        "s.j.",
                        "s.m.",
                        "s.m.a.",
                        "s.n.d.de n.",
                        "s.n.j.m.",
                        "s.p.",
                        "s.s.",
                        "s.s.c.",
                        "s.s.s.",
                        "s.t.d.",
                        //"sa",
                        ", sc",
                        " sc ",
                        "sch p",
                        "sch.p.",
                        //"scl",
                        //"scn",
                        //"sdb",
                        //"sgs",
                        " sj,",
                        ", sj",
                        //"sm",
                        //"sma",
                        "snd de n",
                        "sndden",
                        "snjm",
                        //"sp",
                        //"sr",
                        //"sr.",
                        ", ss",
                        "ssc",
                        "sss",
                        "std",
                        "study",
                        "study guide author:",
                        //"t.",
                        "text edited",
                        " the ",
                        "traductora",
                        "translated by",
                        "translated",
                        "translation",
                        "translators",
                        "translator",
                        //"u.",
                        //"v.",
                        //"w.",
                        " with an ",
                        " with a ",
                        " with ",
                        "written"
                        //"x.",
                        //"y",
                        //"y.",
                        //"z."
                };

                if (!File.Exists(parasitesFile))
                {
                    File.WriteAllLines(parasitesFile, initialParasites.ToArray());
                }

                var parasites = File.ReadAllLines(parasitesFile);

                query = query.ToLowerInvariant();
                foreach (var parasite in parasites)
                {
                    query = query.Replace(parasite, String.Empty);
                }

                string splitterFile = Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.CommonApplicationData), ConfigurationManager.AppSettings["DataFolder"], "MoreByThisAuthorSplitter.txt");
                var initialSplitter = new[] { ",", " and ", ";", ":", "<br>", "<br/>", "<br />" };
                if (!File.Exists(splitterFile))
                {
                    File.WriteAllLines(splitterFile, initialSplitter);
                }

                var splitter = File.ReadAllLines(splitterFile);

                var queryChunks = query.ToLowerInvariant().Split(splitter, StringSplitOptions.RemoveEmptyEntries);

                foreach (var chunk in queryChunks)
                {
                    var queryAssembler = new QueryAssembler();
                    var sanitizedChunk = Regex.Replace(chunk, @"[^\p{Ll}\p{Lu}\p{Lt}\p{Lo}\p{Nd}\p{Pc}]\.", " ").Replace(". ", " ").Trim(' ');
                    if (String.IsNullOrWhiteSpace(sanitizedChunk))
                        continue;
                    queryAssembler.Append(sanitizedChunk);

                    preparedQueries.Add(queryAssembler.Assemble());
                }

                return preparedQueries.ToArray();
            }
Пример #2
0
            public static string Prepare(string query)
            {
                var queryChunks = query.Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries);
                var queryAssembler = new QueryAssembler();

                bool isExactQueryChunk = false;
                string exactQueryChunks = String.Empty;
                foreach (var item in queryChunks)
                {
                    if (isExactQueryChunk || item.StartsWith("\""))
                    {
                        exactQueryChunks = String.Format("{0} {1}", exactQueryChunks, item);

                        if (item.EndsWith("\""))
                        {
                            queryAssembler.Append(exactQueryChunks);
                            exactQueryChunks = String.Empty;
                            isExactQueryChunk = false;
                        }
                        else
                        {
                            isExactQueryChunk = true;
                            continue;
                        }
                    }
                    else
                        queryAssembler.Append(item);
                }

                return queryAssembler.Assemble();
            }