internal static TReturn Execute <TReturn>(RSExecutionConnection connection, ProxyMethod <TReturn> initialMethod, ProxyMethod <TReturn> retryMethod)
 {
     using (MonitoredScope.NewConcat("ProxyMethodInvocation.Execute - Method : ", initialMethod.MethodName))
     {
         if (connection == null)
         {
             throw new ArgumentNullException("connection");
         }
         if (initialMethod == null)
         {
             throw new ArgumentNullException("initialMethod");
         }
         ProxyMethod <TReturn>[] array = (retryMethod != null && !connection.CanUseKatmaiMethods) ? new ProxyMethod <TReturn>[1]
         {
             retryMethod
         } : ((retryMethod != null) ? new ProxyMethod <TReturn>[2]
         {
             initialMethod,
             retryMethod
         } : new ProxyMethod <TReturn>[1]
         {
             initialMethod
         });
         for (int i = 0; i < array.Length; i++)
         {
             ProxyMethod <TReturn> proxyMethod = array[i];
             try
             {
                 if (!string.IsNullOrEmpty(proxyMethod.MethodName))
                 {
                     connection.SetConnectionSSLForMethod(proxyMethod.MethodName);
                 }
                 return(proxyMethod.Method());
             }
             catch (FaultException e)
             {
                 if (i < array.Length - 1 && connection.CheckForDownlevelRetry(e))
                 {
                     connection.MarkAsFailedUsingKatmai();
                     continue;
                 }
                 connection.OnSoapException(e);
                 throw;
             }
             catch (WebException e2)
             {
                 MissingEndpointException.ThrowIfEndpointMissing(e2);
                 throw;
             }
             catch (InvalidOperationException inner)
             {
                 throw new MissingEndpointException(inner);
             }
         }
         throw new InvalidOperationException("Failed to execute method");
     }
 }
Пример #2
0
        public override IEnumerable <string> Filter(ProgramArguments programArgs)
        {
            string wikipediaPath       = @"C:\Users\haabu\Downloads\enwiki-latest-pages-articles.xml\enwiki-latest-pages-articles.xml";
            int    totalArticlesToRead = programArgs.WikipediaEndArticle - programArgs.WikipediaStartArticle;

            using (XmlReader sr = XmlReader.Create(new FileStream(wikipediaPath, FileMode.Open)))
            {
                using (MonitoredScope scope = new MonitoredScope("Skipping wikipedia articles"))
                {
                    for (int i = 0; i < programArgs.WikipediaStartArticle; i++)
                    {
                        bool elementFound = sr.ReadToFollowing("text");
                        if (!elementFound)
                        {
                            break;
                        }
                    }
                }

                for (int i = programArgs.WikipediaStartArticle; i < programArgs.WikipediaEndArticle; i++)
                {
                    bool elementFound = sr.ReadToFollowing("text");
                    if (elementFound)
                    {
                        string pageContents;
                        //using (MonitoredScope scope = new MonitoredScope("Xml Read Element", TraceLevel.Medium))
                        {
                            sr.ReadStartElement();
                            pageContents = sr.ReadContentAsString();
                        }

                        Logger.Log("Read article " + (i - programArgs.WikipediaStartArticle + 1).ToString() + "/" + totalArticlesToRead.ToString());

                        yield return(pageContents);
                    }
                }
            }
        }
 internal static TReturn Execute <TReturn>(RSExecutionConnection connection, ProxyMethod <TReturn> sql16Method, ProxyMethod <TReturn> katmaiMethod, ProxyMethod <TReturn> yukonMethod)
 {
     using (MonitoredScope.NewConcat("ProxyMethodInvocation.Execute - Method : ", katmaiMethod.MethodName))
     {
         if (connection == null)
         {
             throw new ArgumentNullException("connection");
         }
         if (katmaiMethod == null)
         {
             throw new ArgumentNullException("initialMethod");
         }
         bool flag  = yukonMethod != null;
         bool flag2 = katmaiMethod != null;
         ProxyMethod <TReturn>[] array = (flag && !connection.CanUseKatmaiMethods && !connection.CanUseSql16Methods) ? new ProxyMethod <TReturn>[1]
         {
             yukonMethod
         } : ((!(!flag && flag2) || connection.CanUseSql16Methods) ? new ProxyMethod <TReturn>[3]
         {
             sql16Method,
             katmaiMethod,
             yukonMethod
         } : new ProxyMethod <TReturn>[1]
         {
             katmaiMethod
         });
         for (int i = 0; i < array.Length; i++)
         {
             ProxyMethod <TReturn> proxyMethod = array[i];
             try
             {
                 if (!string.IsNullOrEmpty(proxyMethod.MethodName))
                 {
                     connection.SetConnectionSSLForMethod(proxyMethod.MethodName);
                 }
                 return(proxyMethod.Method());
             }
             catch (FaultException e)
             {
                 if (i < array.Length - 1 && connection.CheckForDownlevelRetry(e))
                 {
                     if (connection.m_endpointVersion == EndpointVersion.Katmai)
                     {
                         connection.MarkAsFailedUsingKatmai();
                     }
                     else if (connection.m_endpointVersion == EndpointVersion.Sql16)
                     {
                         connection.MarkAsFailedUsingSql16();
                     }
                     continue;
                 }
                 connection.OnSoapException(e);
                 throw;
             }
             catch (WebException e2)
             {
                 MissingEndpointException.ThrowIfEndpointMissing(e2);
                 throw;
             }
             catch (InvalidOperationException inner)
             {
                 throw new MissingEndpointException(inner);
             }
         }
         throw new InvalidOperationException("Failed to execute method");
     }
 }
Пример #4
0
        public CorrelationMatrix UpdateCorrelationMatrix(CorrelationMatrix existingMatrix, IEnumerable <string> documents)
        {
            WordBreaker     wordBreaker     = new WordBreaker();
            StopWordRemover stopwordRemover = new StopWordRemover();
            SentenceBreaker sb = SentenceBreaker.Instance;

            int i = 1;

            try
            {
                Parallel.ForEach(documents, (documentContents, loopState) => //string documentContents in documents)
                {
                    int documentNumber = Interlocked.Increment(ref i);
                    using (new MonitoredScope("Learning from a document No. " + documentNumber.ToString()))
                    {
                        SStemmer stemmer = new SStemmer();
                        string[] words;
                        //using (MonitoredScope scope = new MonitoredScope("Break Paragraph", TraceLevel.Medium))
                        {
                            words = sb.BreakIntoWords(documentContents);
                        }

                        //using (MonitoredScope scope = new MonitoredScope("Stem Words", TraceLevel.Medium))
                        {
                            words = stemmer.StemWords(words);
                        }

                        //using (MonitoredScope scope = new MonitoredScope("Remove Stop Words", TraceLevel.Medium))
                        {
                            words = stopwordRemover.RemoveStopWords(words);
                        }

                        //using (MonitoredScope scope = new MonitoredScope("Calculate correlation", TraceLevel.Medium))
                        {
                            existingMatrix.Add(words);
                        }
                    }

                    Logger.Log("Finished document number: " + documentNumber.ToString());
                    if (existingMatrix.Words.Count > 100000)
                    {
                        loopState.Break();
                    }
                    //Logger.Log("Finished document number: " + (i++).ToString() + " unique words: " + correlationMatrix.Words.Count + ", pairs: " + correlationMatrix.Matrix.Count);
                });
            }
            finally
            {
                Logger.Log("Unique words: " + existingMatrix.WordsMetadata.Count + ", Pairs: " + existingMatrix.Matrix.Count);
                string filename = "autorss_" + Guid.NewGuid().ToString();
                using (FileStream fs = new FileStream(filename, FileMode.CreateNew))
                {
                    new CorrelationMatrixBinarySerializer().Serialize(fs, existingMatrix);
                }

                Logger.Log("Correlation Matrix saved to file: " + filename);

                filename = "autorss_Scopes_" + Guid.NewGuid().ToString();
                using (FileStream fs = new FileStream(filename, FileMode.CreateNew))
                {
                    MonitoredScope.SerializeStatistics(fs);
                }

                Logger.Log("MonitoredScopes saved to file: " + filename);
            }

            return(existingMatrix);
        }
Пример #5
0
        private static void CalculateCorrelationFromWikipediaDB(ProgramArguments programArgs)
        {
            WordBreaker       wordBreaker       = new WordBreaker();
            StopWordRemover   stopwordRemover   = new StopWordRemover();
            SStemmer          stemmer           = new SStemmer();
            CorrelationMatrix correlationMatrix = new CorrelationMatrix();

            string wikipediaPath = @"C:\Users\haabu\Downloads\enwiki-latest-pages-articles.xml\enwiki-latest-pages-articles.xml";

            using (XmlReader sr = XmlReader.Create(new FileStream(wikipediaPath, FileMode.Open)))
            {
                for (int i = 0; i < programArgs.WikipediaStartArticle; i++)
                {
                    bool elementFound = sr.ReadToFollowing("text");
                    if (!elementFound)
                    {
                        break;
                    }
                }

                for (int i = programArgs.WikipediaStartArticle; i < programArgs.WikipediaEndArticle; i++)
                {
                    bool elementFound = sr.ReadToFollowing("text");
                    if (elementFound)
                    {
                        string pageContents;
                        //using (MonitoredScope scope = new MonitoredScope("Xml Read Element", TraceLevel.Medium))
                        {
                            sr.ReadStartElement();
                            pageContents = sr.ReadContentAsString();
                        }

                        string[] words;
                        //using (MonitoredScope scope = new MonitoredScope("Break Paragraph", TraceLevel.Medium))
                        {
                            words = wordBreaker.BreakParagraph(pageContents);
                        }

                        //using (MonitoredScope scope = new MonitoredScope("Remove Stop Words", TraceLevel.Medium))
                        {
                            words = stopwordRemover.RemoveStopWords(words);
                        }

                        //using (MonitoredScope scope = new MonitoredScope("Stem Words", TraceLevel.Medium))
                        {
                            words = stemmer.StemWords(words);
                        }

                        //using (MonitoredScope scope = new MonitoredScope("Calculate correlation", TraceLevel.Medium))
                        {
                            correlationMatrix.Add(words);
                        }

                        Logger.Log("Finished document number: " + (i + 1).ToString());
                    }
                }
            }

            string filename = "autorss_" + Guid.NewGuid().ToString();

            using (FileStream fs = new FileStream(filename, FileMode.CreateNew))
            {
                BinaryFormatter formatter = new BinaryFormatter();
                formatter.Serialize(fs, correlationMatrix);
            }

            Logger.Log("Saved to file: " + filename);

            filename = "autorss_Scopes_" + Guid.NewGuid().ToString();
            using (FileStream fs = new FileStream(filename, FileMode.CreateNew))
            {
                MonitoredScope.SerializeStatistics(fs);
            }

            Logger.Log("Saved to file: " + filename);
        }