public void PickFormatType(string preview, string language) { /// <summary> /// FormatType detection that reads the .TXT preview (input is a text sample with a small number of clippings, separated by /// line jumps "\n"), search for keywords, compare to formats in a dictionary and sets formatInUse accordingly. Note that said /// keywords are defined in each FormatType as (a) Page, Location keyword arrays and (b) critical Keywords/Position/Language custom /// object. You get it once per .TXT file, and set the correct format in Options, the same that will be later sent to Parser Line 2. /// Said format is standardized and its values taken from nice and neat parser types instances. Parser in use is static, managed in /// options. Also note that both in Spanish and English formats there are two types defined by omission (KeyValue <"Something", 1>) /// which signals base types (unsafe), and subtypes defined by the word in position 2, that are safe once recognized. /// </summary> int maxLineCounter = preview.Split('\n').Length; using (var lineReader = new StringReader(preview)) { lineReader.ReadLine(); //Skip first line, starts directly in line 1 where the critical keywords are. string line = lineReader.ReadLine(); string[] split = line.Split(' '); string keyWordPos1 = split[1]; string keyWordPos2 = split[2]; string detectedLanguage = language; FormatType format = null; FormatType.KeyPositionLang KeyPosition1 = new FormatType.KeyPositionLang(keyWordPos1, 1, language); FormatType.KeyPositionLang KeyPosition2 = new FormatType.KeyPositionLang(keyWordPos2, 2, language); FormatType.KeyPositionLang[] FormatKeyPosRead = new FormatType.KeyPositionLang[] { KeyPosition1, KeyPosition2 }; foreach (var KeyPos in FormatKeyPosRead) { bool isSafe = false; format = FormatTypeStorage.GetFormat(KeyPos, out isSafe); if (format != null) { if (!isSafe) { options.SelectedFormat = format; } if (isSafe) { options.SelectedFormat = format; break; } } /* IMPORTANT: On its current state, the program just checks the second line and infers FormatType from * there. Code here is easily modifiable so that in case of the first recognition try failing the second * line of next clipping or successive lines are read. See use of line++, separator and Readline() in * parser for inspiration. */ } } }
public ParserController() { parserENG = ParserENG.MyParserENG; parserSPA = ParserSPA.MyParserSPA; options = new ParserOptions(); ClippingStorage = new ClippingStorage(); //Methods generating a Dictionary of FormatTypes on instantiation. FormatTypeStorage.PopulateFormatList(parserENG.engFormats); FormatTypeStorage.PopulateFormatList(parserSPA.spaFormats); FormatTypeStorage.GenerateFormatTypeStorage(); }