Example #1
0
        private static Dictionary <string, SortedSet <int> > GetUrlToInitialDocWordsIndexesFromGoogleAPIParallel(int[] initialDocIndexes, string[] simplifiedWords)
        {
            var shinglesCount = initialDocIndexes.Length - Shingle.Lenght;
            var tasks         = new Task <List <string> > [shinglesCount + 1];

            for (int i = 0; i <= shinglesCount; i++)
            {
                var query = Shingle.QueryFromWords(simplifiedWords, i);
                tasks[i] = Task <List <string> > .Factory.StartNew(() => GoogleAPIManager.GetGoogleSearchResultsUrls(query));
            }

            Task.WaitAll(tasks);

            Dictionary <string, SortedSet <int> > urlToInitialDocWordsIndexes = new Dictionary <string, SortedSet <int> >();

            for (int i = 0; i <= shinglesCount; i++)
            {
                var urlsForShingle = tasks[i].Result;

                var initialDocIndexesForShingle = new SortedSet <int>();
                for (int j = 0; j < Shingle.Lenght; j++)
                {
                    initialDocIndexesForShingle.Add(initialDocIndexes[i + j]);
                }

                for (int j = 0; j < urlsForShingle.Count; j++)
                {
                    if (urlToInitialDocWordsIndexes.TryGetValue(urlsForShingle[j], out SortedSet <int> initialDocWordsIndexes))
                    {
                        initialDocWordsIndexes.UnionWith(initialDocIndexesForShingle);
                    }
                    else
                    {
                        urlToInitialDocWordsIndexes.Add(urlsForShingle[j], new SortedSet <int>(initialDocIndexesForShingle));
                    }
                }
            }

            return(urlToInitialDocWordsIndexes);
        }
Example #2
0
        //список шинглов
        public static List <string> WordsIndexesToShingleTexts(string[] words, List <int> wordsIndexes)
        {
            List <string> shingleTexts = new List <string>
            {
                Shingle.QueryFromWords(words, wordsIndexes[0])
            };

            for (int i = 1; i < wordsIndexes.Count; i++)
            {
                int overlap;
                if ((overlap = Shingle.Lenght - (wordsIndexes[i] - wordsIndexes[i - 1])) > 0)
                {
                    for (int j = overlap; j < Shingle.Lenght; j++)
                    {
                        shingleTexts[shingleTexts.Count - 1] += " " + words[wordsIndexes[i] + j];
                    }
                }
                else
                {
                    shingleTexts.Add(Shingle.QueryFromWords(words, wordsIndexes[i]));
                }
            }
            return(shingleTexts);
        }