Esempio n. 1
0
 public DBTextSummary(TextSummaryAndStructures textAndStruct)
 {
     _id                = ObjectId.GenerateNewId();
     id                 = textAndStruct.summary.id;
     meta               = textAndStruct.summary.meta;
     idType             = textAndStruct.summary.idType;
     wordsCount         = textAndStruct.summary.wordsCount;
     uniqueWordsCount   = textAndStruct.summary.uniqueWordsCount;
     summaryDurationSec = textAndStruct.summary.summaryDurationSec;
     createdAt          = textAndStruct.summary.createdAt;
     lengthsStructure   = textAndStruct.lengthsStructure;
     frequencyStructure = textAndStruct.frequencyStructure;
     mostFrequentWord   = textAndStruct.summary.mostFrequentWord;
     leastFrequentWord  = textAndStruct.summary.leastFrequentWord;
     longestWord        = textAndStruct.summary.longestWord;
     shortestWord       = textAndStruct.summary.shortestWord;
 }
Esempio n. 2
0
        /// <summary>
        /// This function will fetch the content of document if it is a link that was provided.
        /// It will also ensure that the text is pre-processed.
        ///
        /// The text will then be submitted to the inventory algorithm which will return a data structure
        /// summarizing the result of the inventory.
        /// This data structure is augmented iwith meta data and returned to the caller
        /// </summary>
        /// <param name="textSrc">The Text to be processed or a http link to a document that must be processed</param>
        /// <returns>Awaitable object that eventually resolves into a Data Structure that contains the summary of the inventory algorithm</returns>
        public async Task <TextSummaryAndStructures> Handle(string textSrc)
        {
            string sanitizedText = "";
            string textID        = GetHashString(textSrc); // hash is used as id in the db to cache the result of the processing of the input text
            string textMeta      = "";

            string pattern = @"^(((https?:\/\/)|(www\.))[^\s]+)$";
            Match  m       = Regex.Match(textSrc, pattern, RegexOptions.IgnoreCase);

            // Get and sanitize text
            if (m.Success)
            {
                sanitizedText = await textPreprocessor.FetchCorpus(textSrc, true);

                textMeta = textSrc;
            }
            else
            {
                sanitizedText = textPreprocessor.Sanitize(textSrc);
                textMeta      = textSrc.Substring(0, textSrc.Length < 50 ? textSrc.Length : 50);
            }



            // Prepare for inventory algorithm
            IDictionary <string, int> freqs;
            IDictionary <int, LinkedList <string> > lengths;
            IInventoryItem mostFrequentToken;
            IInventoryItem longestToken;
            IInventoryItem leastFrequentToken;
            IInventoryItem shortestToken;
            int            textLength = 0;

            // Perform inventory on text
            double durationMs = textInventorier.Process(sanitizedText, out freqs, out lengths, out mostFrequentToken, out longestToken, out leastFrequentToken, out shortestToken, out textLength);



            // Serialize data structure for db
            string strFreqs;
            string strLengths;

            textInventorier.Serialize(freqs, lengths, out strFreqs, out strLengths);


            // Pack summary object
            TextSummary textSummary = new TextSummary(
                textID,
                textMeta,
                textLength,
                freqs.Count,
                mostFrequentToken,
                longestToken,
                leastFrequentToken,
                shortestToken,
                durationMs / 1000,
                new List <IQueryResult>()
                //results
                );


            // Pack wrapper for summary object
            TextSummaryAndStructures res = new TextSummaryAndStructures();

            res.frequencyStructure = strFreqs;
            res.lengthsStructure   = strLengths;
            res.summary            = textSummary;

            // return
            return(res);
        }