コード例 #1
0
        public void PickFormatType(string preview, string language)
        {
            /// <summary>
            /// FormatType detection that reads the .TXT preview (input is a text sample with a small number of clippings, separated by
            /// line jumps "\n"), search for keywords, compare to formats in a dictionary and sets formatInUse accordingly. Note that said
            /// keywords are defined in each FormatType as (a) Page, Location keyword arrays and (b) critical Keywords/Position/Language custom
            /// object. You get it once per .TXT file, and set the correct format in Options, the same that will be later sent to Parser Line 2.
            /// Said format is standardized and its values taken from nice and neat parser types instances. Parser in use is static, managed in
            /// options. Also note that both in Spanish and English formats there are two types defined by omission (KeyValue <"Something", 1>)
            /// which signals base types (unsafe), and subtypes defined by the word in position 2, that are safe once recognized.
            /// </summary>

            int maxLineCounter = preview.Split('\n').Length;

            using (var lineReader = new StringReader(preview)) {
                lineReader.ReadLine(); //Skip first line, starts directly in line 1 where the critical keywords are.

                string     line             = lineReader.ReadLine();
                string[]   split            = line.Split(' ');
                string     keyWordPos1      = split[1];
                string     keyWordPos2      = split[2];
                string     detectedLanguage = language;
                FormatType format           = null;

                FormatType.KeyPositionLang   KeyPosition1     = new FormatType.KeyPositionLang(keyWordPos1, 1, language);
                FormatType.KeyPositionLang   KeyPosition2     = new FormatType.KeyPositionLang(keyWordPos2, 2, language);
                FormatType.KeyPositionLang[] FormatKeyPosRead = new FormatType.KeyPositionLang[] { KeyPosition1, KeyPosition2 };

                foreach (var KeyPos in FormatKeyPosRead)
                {
                    bool isSafe = false;
                    format = FormatTypeStorage.GetFormat(KeyPos, out isSafe);

                    if (format != null)
                    {
                        if (!isSafe)
                        {
                            options.SelectedFormat = format;
                        }

                        if (isSafe)
                        {
                            options.SelectedFormat = format;
                            break;
                        }
                    }

                    /* IMPORTANT: On its current state, the program just checks the second line and infers FormatType from
                     * there. Code here is easily modifiable so that in case of the first recognition try failing the second
                     *  line of next clipping or successive lines are read. See use of line++, separator and Readline() in
                     *  parser for inspiration.  */
                }
            }
        }
コード例 #2
0
        public ParserController()
        {
            parserENG       = ParserENG.MyParserENG;
            parserSPA       = ParserSPA.MyParserSPA;
            options         = new ParserOptions();
            ClippingStorage = new ClippingStorage();

            //Methods generating a Dictionary of FormatTypes on instantiation.
            FormatTypeStorage.PopulateFormatList(parserENG.engFormats);
            FormatTypeStorage.PopulateFormatList(parserSPA.spaFormats);
            FormatTypeStorage.GenerateFormatTypeStorage();
        }