public DBTextSummary(TextSummaryAndStructures textAndStruct) { _id = ObjectId.GenerateNewId(); id = textAndStruct.summary.id; meta = textAndStruct.summary.meta; idType = textAndStruct.summary.idType; wordsCount = textAndStruct.summary.wordsCount; uniqueWordsCount = textAndStruct.summary.uniqueWordsCount; summaryDurationSec = textAndStruct.summary.summaryDurationSec; createdAt = textAndStruct.summary.createdAt; lengthsStructure = textAndStruct.lengthsStructure; frequencyStructure = textAndStruct.frequencyStructure; mostFrequentWord = textAndStruct.summary.mostFrequentWord; leastFrequentWord = textAndStruct.summary.leastFrequentWord; longestWord = textAndStruct.summary.longestWord; shortestWord = textAndStruct.summary.shortestWord; }
/// <summary> /// This function will fetch the content of document if it is a link that was provided. /// It will also ensure that the text is pre-processed. /// /// The text will then be submitted to the inventory algorithm which will return a data structure /// summarizing the result of the inventory. /// This data structure is augmented iwith meta data and returned to the caller /// </summary> /// <param name="textSrc">The Text to be processed or a http link to a document that must be processed</param> /// <returns>Awaitable object that eventually resolves into a Data Structure that contains the summary of the inventory algorithm</returns> public async Task <TextSummaryAndStructures> Handle(string textSrc) { string sanitizedText = ""; string textID = GetHashString(textSrc); // hash is used as id in the db to cache the result of the processing of the input text string textMeta = ""; string pattern = @"^(((https?:\/\/)|(www\.))[^\s]+)$"; Match m = Regex.Match(textSrc, pattern, RegexOptions.IgnoreCase); // Get and sanitize text if (m.Success) { sanitizedText = await textPreprocessor.FetchCorpus(textSrc, true); textMeta = textSrc; } else { sanitizedText = textPreprocessor.Sanitize(textSrc); textMeta = textSrc.Substring(0, textSrc.Length < 50 ? textSrc.Length : 50); } // Prepare for inventory algorithm IDictionary <string, int> freqs; IDictionary <int, LinkedList <string> > lengths; IInventoryItem mostFrequentToken; IInventoryItem longestToken; IInventoryItem leastFrequentToken; IInventoryItem shortestToken; int textLength = 0; // Perform inventory on text double durationMs = textInventorier.Process(sanitizedText, out freqs, out lengths, out mostFrequentToken, out longestToken, out leastFrequentToken, out shortestToken, out textLength); // Serialize data structure for db string strFreqs; string strLengths; textInventorier.Serialize(freqs, lengths, out strFreqs, out strLengths); // Pack summary object TextSummary textSummary = new TextSummary( textID, textMeta, textLength, freqs.Count, mostFrequentToken, longestToken, leastFrequentToken, shortestToken, durationMs / 1000, new List <IQueryResult>() //results ); // Pack wrapper for summary object TextSummaryAndStructures res = new TextSummaryAndStructures(); res.frequencyStructure = strFreqs; res.lengthsStructure = strLengths; res.summary = textSummary; // return return(res); }