Exemplo n.º 1
0
        public void PickFormatType(string preview, string language)
        {
            /// <summary>
            /// FormatType detection that reads the .TXT preview (input is a text sample with a small number of clippings, separated by
            /// line jumps "\n"), search for keywords, compare to formats in a dictionary and sets formatInUse accordingly. Note that said
            /// keywords are defined in each FormatType as (a) Page, Location keyword arrays and (b) critical Keywords/Position/Language custom
            /// object. You get it once per .TXT file, and set the correct format in Options, the same that will be later sent to Parser Line 2.
            /// Said format is standardized and its values taken from nice and neat parser types instances. Parser in use is static, managed in
            /// options. Also note that both in Spanish and English formats there are two types defined by omission (KeyValue <"Something", 1>)
            /// which signals base types (unsafe), and subtypes defined by the word in position 2, that are safe once recognized.
            /// </summary>

            int maxLineCounter = preview.Split('\n').Length;

            using (var lineReader = new StringReader(preview)) {
                lineReader.ReadLine(); //Skip first line, starts directly in line 1 where the critical keywords are.

                string     line             = lineReader.ReadLine();
                string[]   split            = line.Split(' ');
                string     keyWordPos1      = split[1];
                string     keyWordPos2      = split[2];
                string     detectedLanguage = language;
                FormatType format           = null;

                FormatType.KeyPositionLang   KeyPosition1     = new FormatType.KeyPositionLang(keyWordPos1, 1, language);
                FormatType.KeyPositionLang   KeyPosition2     = new FormatType.KeyPositionLang(keyWordPos2, 2, language);
                FormatType.KeyPositionLang[] FormatKeyPosRead = new FormatType.KeyPositionLang[] { KeyPosition1, KeyPosition2 };

                foreach (var KeyPos in FormatKeyPosRead)
                {
                    bool isSafe = false;
                    format = FormatTypeStorage.GetFormat(KeyPos, out isSafe);

                    if (format != null)
                    {
                        if (!isSafe)
                        {
                            options.SelectedFormat = format;
                        }

                        if (isSafe)
                        {
                            options.SelectedFormat = format;
                            break;
                        }
                    }

                    /* IMPORTANT: On its current state, the program just checks the second line and infers FormatType from
                     * there. Code here is easily modifiable so that in case of the first recognition try failing the second
                     *  line of next clipping or successive lines are read. See use of line++, separator and Readline() in
                     *  parser for inspiration.  */
                }
            }
        }
Exemplo n.º 2
0
        public static FormatType GetFormat(FormatType.KeyPositionLang KeyPosition, out bool isSafe)
        {
            /* This method compares the keyword and positions of a KeyPosition objects with the Keys (keyword + position)
             * in FormatDictionary. When it finds two coinciding keys (both values in each key are equal to both values in dict
             * it returns the correct FormatType. Otherwise returns null. */

            var importedKeyPos   = KeyPosition;
            var importedKeyword  = KeyPosition.Keyword;
            var importedPosition = KeyPosition.Position;
            var importedLanguage = KeyPosition.Language;

            bool       possibleFormatFound = false;
            bool       safeFormatFound     = false;
            FormatType possibleMatch       = null;
            FormatType safeMatch           = null;

            foreach (var keywordPosLangKeyring in FormatDictionary.Keys)
            {
                if (safeFormatFound != true)
                {
                    var dictionaryKeyPos   = keywordPosLangKeyring;
                    var dictionaryKeyword  = dictionaryKeyPos.Keyword;
                    var dictionaryPosition = dictionaryKeyPos.Position;
                    var dictionaryLanguage = dictionaryKeyPos.Language;

                    if ((importedKeyword == dictionaryKeyword) && (importedPosition == dictionaryPosition) &&
                        importedLanguage == dictionaryLanguage)
                    {
                        switch (importedPosition)
                        {
                        case 1:     //Keywords in position 1 catch base formats, while position 2 are subtypes (safe).
                            possibleMatch       = FormatDictionary[keywordPosLangKeyring];
                            possibleFormatFound = true;
                            break;

                        case 2:
                            safeMatch       = FormatDictionary[keywordPosLangKeyring];
                            safeFormatFound = true;
                            break;
                        }
                    }

                    if (safeFormatFound)
                    {
                        break;
                    }
                }
            }

            if (safeFormatFound == true)
            {
                isSafe = true;
                return(safeMatch);
            }
            else if ((safeFormatFound == false) && (possibleFormatFound == true))
            {
                isSafe = false;
                return(possibleMatch);
            }
            else
            {
                isSafe = false;
                return(null);
            }
        }