Inheritance: System.Data.Linq.DataContext
コード例 #1
0
ファイル: Engine.cs プロジェクト: selcukgun/bcherry
 public static void ClearAll()
 {
     using (var _db = new EngineDBDataContext())
     {
         _db.Terms.DeleteAllOnSubmit(_db.Terms);
         _db.Scores.DeleteAllOnSubmit(_db.Scores);
         _db.SubmitChanges();
     }
 }
コード例 #2
0
ファイル: Engine.cs プロジェクト: selcukgun/bcherry
        public static void RemoveStory(int storyID)
        {
            using (var _db = new EngineDBDataContext())
            {
                _db.Terms.DeleteAllOnSubmit(_db.Terms.Where(t => t.StoryID == storyID));

                _db.Scores.DeleteAllOnSubmit(_db.Scores.Where(s => s.Story1ID == storyID || s.Story2ID == storyID));

                _db.SubmitChanges();
            }
        }
コード例 #3
0
ファイル: Engine.cs プロジェクト: selcukgun/bcherry
        /// <summary>
        /// Get the documents most related to documentID
        /// </summary>
        /// <param name="documentID"></param>
        /// <param name="currentPageIndex"></param>
        /// <param name="pageSize"></param>
        /// <returns></returns>
        public static IList <int> GetSimilarStories(int storyID, int pageSize, int currentPageIndex)
        {
            using (var _db = new EngineDBDataContext())
            {
                var q = from score in _db.Scores
                        where score.Story1ID == storyID || score.Story2ID == storyID
                        orderby score.Value descending
                        select score.Story1ID == storyID ? score.Story1ID : score.Story2ID;

                return(q.Skip(currentPageIndex * pageSize).Take(pageSize).ToList());
            }
        }
コード例 #4
0
ファイル: Engine.cs プロジェクト: selcukgun/bcherry
        /// <summary>
        /// For each document, sum the weights of that document's terms that occur in
        /// 'searchTerms' and return matches that exceed a certain threshold.
        /// </summary>
        /// <param name="searchTerms"></param>
        /// <param name="currentPageIndex"></param>
        /// <param name="pageSize"></param>
        /// <returns></returns>
        public static IList <int> SearchStories(string[] searchTerms, int pageSize, int currentPageIndex)
        {
            using (var _db = new EngineDBDataContext())
            {
                var q = from term in _db.Terms
                        where searchTerms.Contains(term.Text)
                        group term by term.StoryID
                        into dterms
                        orderby dterms.Sum(t => t.tf) descending
                        select dterms.Key;

                return(q.Skip(currentPageIndex * pageSize).Take(pageSize).ToList());
            }
        }
コード例 #5
0
ファイル: Engine.cs プロジェクト: selcukgun/bcherry
        public static void RescoreAll()
        {
            using (var _db = new EngineDBDataContext())
            {
                var ids = from term in _db.Terms
                          group term by term.StoryID
                          into g
                          select g.Key;

                foreach (int id in ids)
                {
                    ScoreStory(id);
                }
            }
        }
コード例 #6
0
ファイル: Engine.cs プロジェクト: selcukgun/bcherry
        /// <summary>
        /// Store a new document or update an existing one
        /// </summary>
        /// <param name="documentID"></param>
        /// <param name="body"></param>
        public static void StoreStory(int storyID, string text)
        {
            using (var _db = new EngineDBDataContext())
            {
                IList <Term> terms = TermSet(text);

                // grab idfs
                var idfs = from term in _db.Terms
                           where term.StoryID != storyID
                           group term by term.Text
                           into g
                           where terms.Select(t => t.Text).Contains(g.Key)
                           select new { TermVal = g.Key, Count = g.Count() };

                // calculate tfidfs
                double num_docs = (from term in _db.Terms
                                   where term.StoryID != storyID
                                   group term by term.StoryID
                                   into g
                                   select g).Count() + 1;
                foreach (Term term in terms)
                {
                    term.StoryID = storyID;
                    Term term1 = term;
                    var  idf   = 1;
                    // need to refactor this because it's doing a massive SELECT * FROM tbl WHERE x IN ...
                    var idfEntry = idfs.SingleOrDefault(t => t.TermVal.ToLower() == term1.Text.ToLower());
                    if (idfEntry != null)
                    {
                        idf = idfEntry.Count;
                    }
                    term.tfidf = term.tf * Math.Log(num_docs / idf);
                }

                // delete old entries
                _db.Terms.DeleteAllOnSubmit(_db.Terms.Where(t => t.StoryID == storyID));

                // store
                _db.Terms.InsertAllOnSubmit(terms);

                _db.SubmitChanges();
            }

            ScoreStory(storyID);
        }
コード例 #7
0
ファイル: Engine.cs プロジェクト: selcukgun/bcherry
        /* Calculate the cosine similarity measure of ID versus all other documents in the corpus */

        private static void ScoreStory(int storyID)
        {
            // consider passing data context as an argument to the function
            using (var _db = new EngineDBDataContext())
            {
                // delete all existing scores for documentID
                _db.Scores.DeleteAllOnSubmit(from score in _db.Scores
                                             where score.Story1ID == storyID || score.Story2ID == storyID
                                             select score);
                _db.SubmitChanges();

                // grab the principle document's terms/tfidfs
                var terms = from term in _db.Terms
                            where term.StoryID == storyID
                            select new { Text = term.Text, tfidf = term.tfidf };

                // magnitude of the document's vector
                var mag1 = Math.Sqrt(terms.Sum(t => t.tfidf));

                // ids of documents to score against
                var ids = from term in _db.Terms
                          where term.StoryID != storyID
                          group term by term.StoryID
                          into g
                          select g.Key;

                // score against each document
                foreach (int id in ids)
                {
                    var id1 = id;
                    // grab the other document's terms/tfidfs
                    var terms2 = from term in _db.Terms
                                 where term.StoryID == id1
                                 select new { Text = term.Text, tfidf = term.tfidf };

                    var mag2 = Math.Sqrt(terms2.Sum(t => t.tfidf));

                    // calculate the dot product
                    var dot_product = 0.0;
                    foreach (var term in terms)
                    {
                        var term1 = term;
                        var term2 = terms2.SingleOrDefault(t => t.Text.ToLower() == term1.Text.ToLower());
                        if (term2 != null)
                        {
                            dot_product += term1.tfidf * term2.tfidf;
                        }
                    }

                    var euclidean_dist = mag1 * mag2;

                    // linq may complain about overwriting existing keys... we'll see. if so, delete scores before InsertOnSubmit()
                    var score = new Score
                    {
                        Story1ID = storyID,
                        Story2ID = id,
                        Value    = dot_product / euclidean_dist
                    };
                    if (double.IsNaN(score.Value))
                    {
                        score.Value = 0;
                    }
                    _db.Scores.InsertOnSubmit(score);
                }

                _db.SubmitChanges();

                //
            }
        }