public static string[] Prepare(string query) { List<string> preparedQueries = new List<string>(); string parasitesFile = Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.CommonApplicationData), ConfigurationManager.AppSettings["DataFolder"], "MoreByThisAuthorParasites.txt"); var initialParasites = new List<String>() { "(compilador)", //"a.", //"and", //"and others", "annotated", "artist", "artistic", "associate", "authors", "author:", "author", //"b.", //"br", //"br.", "bishop", " by ", //"c.", "c.c.v.i.", "c.h.s.", "c.i.c.m.", "c.n.d.", "c.p.", "c.pp.s.", "c.s.c.", "c.s.j.", "c.s.sp.", "c.ss.r.", "ccvi", //"chs", //"cicm", //"cnd", "commentary author:", "commentary", "compiladors", "compilador", "compiler", "contributors", "contributor", //"cp", "cpps", //"csc", "csj", "cssp", "cssr", " csv,", ", csv", //"d.", "designers", "designer", "directors", "director", //"dr", //"dr.", //"e.", "edited", "editors", "editor asociado", "editor", "editora", //"f.", "forewords", " foreword ", "foreword", //"fr", //"fr.", "galatians", "general", "guide", //"h.", //"i.", "i.h.m.", "ihm", "iii", "illustrators", "illuminator", "illustrator", "introduced", "introduction", //"j.", //"jr", ", jr.", //"k.", //"l.", "lecturers", "lecturer", //"m.", //"m.s.", //"map", //"ms", //"n.", "notes", "o carm", //"ocarm", //"o.", "o.c.s.o.", "o.carm.", "o.f.m.", "o.f.m.cap.", "o.f.m.conv.", "o.m.i.", "o.p.", "o.s.a.", "o.s.b.", "o.s.c.", "o.s.f.", "o.s.u.", "obl.s.b.", "oblsb", "ocso ", " ocso", "ocso", "ofm cap", "ofm conv", //"ofm", " omi ", " omi", ", op", "organizadoras", " osa", "osa ", ", osb", " osb", "osb ", //"osc", "osf ", " osf", //"osu", //"p.", "phd", "ph d", "ph.d.", //"phd", "preface", //"q.", //"r.", "r.s.c.j.", "revised", "romans", "rscj", //"rsm", //"s.", "s.a.", " s.c.", "s.c. ", "s.c.", "s.c.l.", "s.c.n.", "s.d.b.", "s.g.s.", "s.j.", "s.m.", "s.m.a.", "s.n.d.de n.", "s.n.j.m.", "s.p.", "s.s.", "s.s.c.", "s.s.s.", "s.t.d.", //"sa", ", sc", " sc ", "sch p", "sch.p.", //"scl", //"scn", //"sdb", //"sgs", " sj,", ", sj", //"sm", //"sma", "snd de n", "sndden", "snjm", //"sp", //"sr", //"sr.", ", ss", "ssc", "sss", "std", "study", "study guide author:", //"t.", "text edited", " the ", "traductora", "translated by", "translated", "translation", "translators", "translator", //"u.", //"v.", //"w.", " with an ", " with a ", " with ", "written" //"x.", //"y", //"y.", //"z." }; if (!File.Exists(parasitesFile)) { File.WriteAllLines(parasitesFile, initialParasites.ToArray()); } var parasites = File.ReadAllLines(parasitesFile); query = query.ToLowerInvariant(); foreach (var parasite in parasites) { query = query.Replace(parasite, String.Empty); } string splitterFile = Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.CommonApplicationData), ConfigurationManager.AppSettings["DataFolder"], "MoreByThisAuthorSplitter.txt"); var initialSplitter = new[] { ",", " and ", ";", ":", "<br>", "<br/>", "<br />" }; if (!File.Exists(splitterFile)) { File.WriteAllLines(splitterFile, initialSplitter); } var splitter = File.ReadAllLines(splitterFile); var queryChunks = query.ToLowerInvariant().Split(splitter, StringSplitOptions.RemoveEmptyEntries); foreach (var chunk in queryChunks) { var queryAssembler = new QueryAssembler(); var sanitizedChunk = Regex.Replace(chunk, @"[^\p{Ll}\p{Lu}\p{Lt}\p{Lo}\p{Nd}\p{Pc}]\.", " ").Replace(". ", " ").Trim(' '); if (String.IsNullOrWhiteSpace(sanitizedChunk)) continue; queryAssembler.Append(sanitizedChunk); preparedQueries.Add(queryAssembler.Assemble()); } return preparedQueries.ToArray(); }
public static string Prepare(string query) { var queryChunks = query.Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries); var queryAssembler = new QueryAssembler(); bool isExactQueryChunk = false; string exactQueryChunks = String.Empty; foreach (var item in queryChunks) { if (isExactQueryChunk || item.StartsWith("\"")) { exactQueryChunks = String.Format("{0} {1}", exactQueryChunks, item); if (item.EndsWith("\"")) { queryAssembler.Append(exactQueryChunks); exactQueryChunks = String.Empty; isExactQueryChunk = false; } else { isExactQueryChunk = true; continue; } } else queryAssembler.Append(item); } return queryAssembler.Assemble(); }