Example #1
0
        protected void ExtractQueriesFromDoc(Documents doc, ref QueryResult qr)
        {
            foreach (int page in doc.DocBodyDic.Keys)
            {
                string text = doc.DocBodyDic[page];

                foreach (string searchterm in searchterms)
                {
                    QueryResult.KeywordEntry entry = qr.Entries.FirstOrDefault(t => t.Keyword == searchterm && t.PageNumber == page);

                    if (entry == null)
                    {
                        entry            = new QueryResult.KeywordEntry();
                        entry.Keyword    = searchterm;
                        entry.PageNumber = page;
                        entry.GuidDic    = new Dictionary <string, string>();
                        entry.CommentDic = new Dictionary <string, string>();
                        qr.Entries.Add(entry);
                    }

                    if (text.ToLower().Contains(searchterm.ToLower()))
                    {
                        string[]      bodyWords   = text.Split(' ');
                        string[]      targetWords = Array.FindAll(bodyWords, t => t.ToLower().StartsWith(searchterm.ToLower()));
                        List <string> entryLines  = entry.Contents;

                        int indexOfCurrent = 0;
                        for (int j = 0; j < targetWords.Length; j++)
                        {
                            indexOfCurrent = Array.IndexOf(bodyWords, targetWords[j], indexOfCurrent + 1);
                            List <string> words      = new List <string>();
                            int           rangeStart = indexOfCurrent - 11;
                            int           rangeEnd   = indexOfCurrent + 11;
                            rangeStart = rangeStart < 0 ? 0 : rangeStart;
                            rangeEnd   = rangeEnd >= bodyWords.Length ? bodyWords.Length - 1 : rangeEnd;

                            for (int i = rangeStart; i <= rangeEnd; i++)
                            {
                                words.Add(bodyWords[i]);
                            }

                            string line = string.Join(" ", words.Select(t => t.Replace("\r", string.Empty).Replace("\n", " ")));

                            if (words.Count < 20)
                            {
                                Console.WriteLine("Please check!");
                            }

                            if (!entryLines.Exists(t => t.Contains(line)))
                            {
                                entryLines.Add(line);
                            }
                        }

                        Console.ForegroundColor = ConsoleColor.Green;
                        Console.WriteLine("search term {0} appears {1} times in document {2}...", searchterm, targetWords.Length, doc.DocLocalPath);
                        Console.ResetColor();
                        entry.Contents = entryLines;

                        foreach (string content in entry.Contents)
                        {
                            if (!entry.GuidDic.ContainsKey(content))
                            {
                                entry.GuidDic.Add(content, Guid.NewGuid().ToString());
                            }

                            if (!entry.CommentDic.ContainsKey(content))
                            {
                                entry.CommentDic.Add(content, string.Empty);
                            }
                        }
                    }

                    if (entry.Contents.Count == 0)
                    {
                        qr.Entries.Remove(entry);
                    }
                }

                foreach (string searchTermD in searchTermsDependency)
                {
                    QueryResult.KeywordEntry entry = qr.Entries.FirstOrDefault(t => t.Keyword == searchTermD && t.PageNumber == page);
                    if (entry == null)
                    {
                        entry            = new QueryResult.KeywordEntry();
                        entry.Keyword    = searchTermD;
                        entry.PageNumber = page;
                        qr.Entries.Add(entry);
                    }

                    Console.WriteLine("Search {0}...", searchTermD);
                    if (text.ToLower().Contains(searchTermD.ToLower()))
                    {
                        Console.ForegroundColor = ConsoleColor.Yellow;
                        Console.WriteLine("Dependency search term {0} found...", searchTermD);
                        Console.ResetColor();
                        string[] bodyWords   = text.Split(' ');
                        string[] targetWords = Array.FindAll(bodyWords, t => t.ToLower().StartsWith(searchTermD.ToLower()));

                        if (targetWords.Length != 0)
                        {
                            List <string> entryLines     = entry.Contents;
                            int           indexOfCurrent = 0;

                            for (int j = 0; j < targetWords.Length; j++)
                            {
                                indexOfCurrent = Array.IndexOf(bodyWords, targetWords[j], indexOfCurrent);
                                List <string> words      = new List <string>();
                                int           rangeStart = indexOfCurrent - 11;
                                int           rangeEnd   = indexOfCurrent + 11;
                                rangeStart = rangeStart < 0 ? 0 : rangeStart;
                                rangeEnd   = rangeEnd >= bodyWords.Length ? bodyWords.Length - 1 : rangeEnd;

                                for (int i = rangeStart; i <= rangeEnd; i++)
                                {
                                    words.Add(bodyWords[i]);
                                }

                                string        line         = string.Join(" ", words.Select(t => t.Replace("\r", string.Empty).Replace("\n", " ")));
                                List <string> termsDepends = "marijuana;marihuana;cannabis;Dispensary;Dispensaries;provisioning;Cultivat".Split(';').ToList();
                                if (termsDepends.Exists(t => line.ToLower().Contains(t.ToLower())) && entryLines.Exists(t => t.Contains(line) == false))
                                {
                                    entryLines.Add(line);
                                }
                            }

                            Console.ForegroundColor = ConsoleColor.Green;
                            Console.WriteLine("Search term {0} appears {1} times...", searchTermD, entryLines.Count);
                            Console.ResetColor();
                            entry.Contents = entryLines;

                            foreach (string content in entry.Contents)
                            {
                                if (!entry.GuidDic.ContainsKey(content))
                                {
                                    entry.GuidDic.Add(content, Guid.NewGuid().ToString());
                                }

                                if (!entry.CommentDic.ContainsKey(content))
                                {
                                    entry.CommentDic.Add(content, string.Empty);
                                }
                            }
                        }
                    }

                    if (entry.Contents.Count == 0)
                    {
                        qr.Entries.Remove(entry);
                    }
                }
            }
        }
Example #2
0
        public List <QueryResult> LoadQueriesDoneSQL()
        {
            List <QueryResult> queries = new List <QueryResult>();

            string         connectionString = ConfigurationManager.ConnectionStrings["local"].ConnectionString;
            SqlConnection  localConnection  = new SqlConnection(connectionString);
            SqlDataAdapter queryAdapter     = new SqlDataAdapter(string.Format(@"SELECT q.[QUERY_GUID]
                    ,q.[DOC_GUID]
                    ,[MEETING_DATE]
                    ,[SEARCH_DATE]
                    ,[MEETING_TITLE]
                    ,[MEETING_LOCATION]
                FROM[MunicipalityPublicMeetingDB].[dbo].[QUERY] q
                JOIN DOCUMENT d on q.DOC_GUID = d.DOC_GUID
                WHERE d.CITY_NM='{0}';
                SELECT[ENTRY_GUID]
                    ,[QUERY_GUID]
                    ,[PAGE_NUMBER]
                    ,[KEYWORD]
                    ,[COMMENT]
                    ,[CONTENT]
                FROM[MunicipalityPublicMeetingDB].[dbo].[QUERY_ENTRY]
                WHERE QUERY_GUID in (SELECT [QUERY_GUID]
                FROM [MunicipalityPublicMeetingDB].[dbo].[QUERY] q JOIN DOCUMENT d on 
                q.DOC_GUID = d.DOC_GUID
                WHERE d.CITY_NM='{0}')", this.cityEntity.CityId), localConnection);
            DataSet        queriesDataSet   = new DataSet();

            queryAdapter.Fill(queriesDataSet);

            foreach (DataRow queryRow in queriesDataSet.Tables["Table"].Rows)
            {
                QueryResult qr = new QueryResult();

                string queryId = queryRow["QUERY_GUID"].ToString();
                queryId = string.IsNullOrEmpty(queryId) ? Guid.NewGuid().ToString() : queryId;
                var entryRows = queriesDataSet.Tables["Table1"].Select(string.Format("QUERY_GUID='{0}'", queryId));

                if (entryRows != null)
                {
                    foreach (DataRow entryRow in entryRows)
                    {
                        qr.QueryId     = queryId;
                        qr.MeetingDate = DateTime.Parse(queryRow["MEETING_DATE"].ToString());
                        qr.SearchTime  = DateTime.Parse(queryRow["SEARCH_DATE"].ToString());
                        qr.DocId       = queryRow["DOC_GUID"].ToString();
                        QueryResult.KeywordEntry ke = qr.Entries.FirstOrDefault(t => t.Keyword == entryRow["KEYWORD"].ToString() &&
                                                                                t.PageNumber == int.Parse(entryRow["PAGE_NUMBER"].ToString()));

                        if (ke == null)
                        {
                            ke            = new QueryResult.KeywordEntry();
                            ke.Keyword    = entryRow["KEYWORD"].ToString();
                            ke.PageNumber = int.Parse(entryRow["PAGE_NUMBER"].ToString());
                            ke.CommentDic = new Dictionary <string, string>();
                            ke.GuidDic    = new Dictionary <string, string>();
                            qr.Entries.Add(ke);
                        }

                        string content = entryRow["CONTENT"].ToString();
                        string comment = entryRow["COMMENT"].ToString();
                        string entryId = entryRow["ENTRY_GUID"].ToString();

                        if (ke.CommentDic.ContainsKey(content) == false)
                        {
                            ke.CommentDic.Add(content, entryId);
                            ke.GuidDic.Add(content, entryId);
                        }
                    }
                }

                queries.Add(qr);
            }

            return(queries);
        }