Esempio n. 1
0
        public T GetRandomEntity <T>(string language) where T : class, IMarcellEntity
        {
            T result;

            do
            {
                byte[] rndBytes = new byte[4];
                m_randomGenerator.GetBytes(rndBytes);
                double dblValue = BitConverter.ToUInt32(rndBytes, 0);
                double factor   = dblValue / uint.MaxValue;

                IndexReader selectedReader;
                if (typeof(T) == typeof(Document))
                {
                    selectedReader = m_documentReader[language];
                }
                else if (typeof(T) == typeof(Section))
                {
                    selectedReader = m_sectionReader[language];
                }
                else if (typeof(T) == typeof(Paragraph))
                {
                    selectedReader = m_paragraphReader[language];
                }
                else if (typeof(T) == typeof(Sentence))
                {
                    selectedReader = m_sentenceReader[language];
                }
                else
                {
                    throw new InvalidOperationException("Unsupported request received!");
                }

                int docNo = (int)(factor * (selectedReader.MaxDoc - 1));
                LuceneNet.Documents.Document outputDoc = selectedReader.Document(docNo);

                if (typeof(T) == typeof(Document))
                {
                    result = outputDoc.ToDocument() as T;
                }
                else if (typeof(T) == typeof(Section))
                {
                    result = outputDoc.ToSection() as T;
                }
                else if (typeof(T) == typeof(Paragraph))
                {
                    result = outputDoc.ToParagraph() as T;
                }
                else if (typeof(T) == typeof(Sentence))
                {
                    result = outputDoc.ToSentence() as T;
                }
                else
                {
                    throw new InvalidOperationException("Unsupported entity type requested!");
                }
            } while (result.TokenCount < 5 || result.RecognitionQuality < 0.10); //Only use sufficiently large text chunks with enough recognized tokens for training

            return(result);
        }