public void PickFormatType(string preview, string language) { /// <summary> /// FormatType detection that reads the .TXT preview (input is a text sample with a small number of clippings, separated by /// line jumps "\n"), search for keywords, compare to formats in a dictionary and sets formatInUse accordingly. Note that said /// keywords are defined in each FormatType as (a) Page, Location keyword arrays and (b) critical Keywords/Position/Language custom /// object. You get it once per .TXT file, and set the correct format in Options, the same that will be later sent to Parser Line 2. /// Said format is standardized and its values taken from nice and neat parser types instances. Parser in use is static, managed in /// options. Also note that both in Spanish and English formats there are two types defined by omission (KeyValue <"Something", 1>) /// which signals base types (unsafe), and subtypes defined by the word in position 2, that are safe once recognized. /// </summary> int maxLineCounter = preview.Split('\n').Length; using (var lineReader = new StringReader(preview)) { lineReader.ReadLine(); //Skip first line, starts directly in line 1 where the critical keywords are. string line = lineReader.ReadLine(); string[] split = line.Split(' '); string keyWordPos1 = split[1]; string keyWordPos2 = split[2]; string detectedLanguage = language; FormatType format = null; FormatType.KeyPositionLang KeyPosition1 = new FormatType.KeyPositionLang(keyWordPos1, 1, language); FormatType.KeyPositionLang KeyPosition2 = new FormatType.KeyPositionLang(keyWordPos2, 2, language); FormatType.KeyPositionLang[] FormatKeyPosRead = new FormatType.KeyPositionLang[] { KeyPosition1, KeyPosition2 }; foreach (var KeyPos in FormatKeyPosRead) { bool isSafe = false; format = FormatTypeStorage.GetFormat(KeyPos, out isSafe); if (format != null) { if (!isSafe) { options.SelectedFormat = format; } if (isSafe) { options.SelectedFormat = format; break; } } /* IMPORTANT: On its current state, the program just checks the second line and infers FormatType from * there. Code here is easily modifiable so that in case of the first recognition try failing the second * line of next clipping or successive lines are read. See use of line++, separator and Readline() in * parser for inspiration. */ } } }
public static FormatType GetFormat(FormatType.KeyPositionLang KeyPosition, out bool isSafe) { /* This method compares the keyword and positions of a KeyPosition objects with the Keys (keyword + position) * in FormatDictionary. When it finds two coinciding keys (both values in each key are equal to both values in dict * it returns the correct FormatType. Otherwise returns null. */ var importedKeyPos = KeyPosition; var importedKeyword = KeyPosition.Keyword; var importedPosition = KeyPosition.Position; var importedLanguage = KeyPosition.Language; bool possibleFormatFound = false; bool safeFormatFound = false; FormatType possibleMatch = null; FormatType safeMatch = null; foreach (var keywordPosLangKeyring in FormatDictionary.Keys) { if (safeFormatFound != true) { var dictionaryKeyPos = keywordPosLangKeyring; var dictionaryKeyword = dictionaryKeyPos.Keyword; var dictionaryPosition = dictionaryKeyPos.Position; var dictionaryLanguage = dictionaryKeyPos.Language; if ((importedKeyword == dictionaryKeyword) && (importedPosition == dictionaryPosition) && importedLanguage == dictionaryLanguage) { switch (importedPosition) { case 1: //Keywords in position 1 catch base formats, while position 2 are subtypes (safe). possibleMatch = FormatDictionary[keywordPosLangKeyring]; possibleFormatFound = true; break; case 2: safeMatch = FormatDictionary[keywordPosLangKeyring]; safeFormatFound = true; break; } } if (safeFormatFound) { break; } } } if (safeFormatFound == true) { isSafe = true; return(safeMatch); } else if ((safeFormatFound == false) && (possibleFormatFound == true)) { isSafe = false; return(possibleMatch); } else { isSafe = false; return(null); } }