예제 #1
0
 public void DisplayHead(TermDocumentMatrixModel documentMatrixModel)
 {
     Console.WriteLine("==[ Boolean Model ]==");
     Console.WriteLine(string.Format("\nInformation: We use these {0} articles from Medium.com for demonstrating the Boolean Model", documentMatrixModel.DocumentList.Count));
     for (int i = 0; i < documentMatrixModel.DocumentList.Count; i++)
     {
         Console.WriteLine(string.Format("({0}) {1}", i + 1, documentMatrixModel.DocumentList[i].Name));
     }
     Console.WriteLine("The result will show the retrieved document and similarity of Boolean Model");
     Console.WriteLine("\n------------------------------\n");
 }
예제 #2
0
 public string ComputeExpressionOfNot(string value, TermDocumentMatrixModel termdocumentmatrix)
 {
     if (value != "~")
     {
         if (termdocumentmatrix.TermDocumentMatrixDic.ContainsKey(value))
         {
             NotValuesStack.Push(termdocumentmatrix.TermDocumentMatrixDic[value].Where(e => !e.Value).Select(e => e.Key).ToList());
         }
         else
         {
             NotValuesStack.Push(null);
         }
     }
     return("~");
 }
예제 #3
0
        public void DisplayBodyResult(TermDocumentMatrixModel documentMatrix, List <string> result, string query)
        {
            Console.WriteLine("\n\n=[ Retrieved Document ]=");
            if (result.Count > 0)
            {
                ConsoleTableBuilder.From(GetDataTableOfDocumentList(result))
                .WithFormat(ConsoleTableBuilderFormat.Alternative).ExportAndWriteLine();
            }
            else
            {
                Console.WriteLine("*** Result Not Found ***\n");
            }

            //Display similarity
            Console.WriteLine("=[ Similarity ]=");

            ConsoleTableBuilder.From(GetDataTableOfSimilarity(result, documentMatrix, query))
            .WithFormat(ConsoleTableBuilderFormat.Alternative).ExportAndWriteLine();

            Console.WriteLine("------------------------------");
        }
예제 #4
0
        public DataTable GetDataTableOfSimilarity(List <string> documentList, TermDocumentMatrixModel documentMatrix, string query)
        {
            DataTable table = new DataTable();

            //column
            table.Columns.Add("Query", typeof(string));
            string[] flagdocument = new string[documentMatrix.DocumentList.Count + 1];
            flagdocument[0] = query;
            for (int i = 0; i < documentMatrix.DocumentList.Count; i++)
            {
                table.Columns.Add(string.Format("Doc{0}", i + 1), typeof(string));
                if (documentList.Contains(documentMatrix.DocumentList[i].Name))
                {
                    flagdocument[i + 1] = "1";
                }
                else
                {
                    flagdocument[i + 1] = "0";
                }
            }
            table.Rows.Add(flagdocument);
            return(table);
        }
예제 #5
0
        public TermDocumentMatrixModel SettingTermDocumentMatrix(TermDocumentMatrixModel documentMatrix)
        {
            Dictionary <string, Dictionary <string, bool> > tempOfDocumentMatrix = new Dictionary <string, Dictionary <string, bool> >();

            foreach (var item in documentMatrix.IndexTermList)
            {
                Dictionary <string, bool> documentDict = new Dictionary <string, bool>();
                foreach (var indexdocterm in documentMatrix.DocumentList)
                {
                    if (indexdocterm.IndexTermDocuments.Contains(item))
                    {
                        documentDict.Add(indexdocterm.Name, true);
                    }
                    else
                    {
                        documentDict.Add(indexdocterm.Name, false);
                    }
                }
                tempOfDocumentMatrix.Add(item, documentDict);
            }
            documentMatrix.TermDocumentMatrixDic = tempOfDocumentMatrix;
            return(documentMatrix);
        }
예제 #6
0
        public List <string> BooleanExpressionStackProcessing(TermDocumentMatrixModel termDocumentMatrix, string query)
        {
            //case single word
            var splitQuery = query.Split(' ').ToList();

            if (splitQuery.Count() == 1)
            {
                List <string> resultSingleWord = new List <string>();
                if (query.ToCharArray().Contains('~'))
                {
                    string splitOperator = query.Replace("~", string.Empty);
                    if (termDocumentMatrix.TermDocumentMatrixDic.ContainsKey(splitOperator))
                    {
                        resultSingleWord = termDocumentMatrix.TermDocumentMatrixDic[splitOperator].Where(e => !e.Value).Select(e => e.Key).ToList();
                    }
                }
                else
                {
                    if (termDocumentMatrix.TermDocumentMatrixDic.ContainsKey(query))
                    {
                        resultSingleWord = termDocumentMatrix.TermDocumentMatrixDic[query].Where(e => e.Value).Select(e => e.Key).ToList();
                    }
                }
                return(resultSingleWord);
            }
            //case multi word
            char[]         tokens    = query.ToCharArray();
            Stack <string> values    = new Stack <string>();
            Stack <string> operation = new Stack <string>();
            Regex          regex     = new Regex(@"^[a-zA-Z0-9_-]*$");

            for (int i = 0; i < tokens.Length; i++)
            {
                //space
                if (tokens[i] == ' ')
                {
                    continue;
                }

                //value
                if (regex.IsMatch(tokens[i].ToString()))
                {
                    StringBuilder sbuf = new StringBuilder();
                    while (i < tokens.Length && regex.IsMatch(tokens[i].ToString()))
                    {
                        sbuf.Append(tokens[i++]);
                    }
                    if (!String.IsNullOrEmpty(sbuf.ToString()))
                    {
                        values.Push(sbuf.ToString().Trim());
                    }
                    i--;
                }
                else if (tokens[i] == '(')
                {
                    operation.Push(tokens[i].ToString());
                }
                else if (tokens[i] == ')')
                {
                    while (operation.Peek() != "(")
                    {
                        var operationPop = operation.Pop();
                        if (operationPop == "~")
                        {
                            values.Push(ComputeExpressionOfNot(values.Pop(), termDocumentMatrix));
                            //if (values.Count > 2) values.Push(ComputeExpression(operation.Pop(), values.Pop(), values.Pop(), termDocumentMatrix));
                        }
                        else
                        {
                            values.Push(ComputeExpression(operationPop, values.Pop(), values.Pop(), termDocumentMatrix));
                        }
                    }
                    operation.Pop();
                }
                else if (tokens[i] == '&' || tokens[i] == '|' || tokens[i] == '~')
                {
                    while (operation.Count > 0 && hasPrecedence(tokens[i], operation.Peek()))
                    {
                        var operationPop = operation.Pop();
                        if (operationPop == "~")
                        {
                            values.Push(ComputeExpressionOfNot(values.Pop(), termDocumentMatrix));
                            // if (values.Count > 2) values.Push(ComputeExpression(operation.Pop(), values.Pop(), values.Pop(), termDocumentMatrix));
                        }
                        else
                        {
                            values.Push(ComputeExpression(operationPop, values.Pop(), values.Pop(), termDocumentMatrix));
                        }
                    }
                    operation.Push(tokens[i].ToString());
                }
            }

            while (operation.Count > 0)
            {
                var operationPop = operation.Pop();
                if (operationPop == "~")
                {
                    values.Push(ComputeExpressionOfNot(values.Pop(), termDocumentMatrix));
                    //if (operation.Count > 2) values.Push(ComputeExpression(operation.Pop(), values.Pop(), values.Pop(), termDocumentMatrix));
                }
                else if (operationPop == "(")
                {
                    continue;
                }
                else
                {
                    values.Push(ComputeExpression(operationPop, values.Pop(), values.Pop(), termDocumentMatrix));
                }
            }
            return(TempValuesStack.Pop());
        }
예제 #7
0
        public string ComputeExpression(string operation, string value1, string value2, TermDocumentMatrixModel termDocumentMatrix)
        {
            string        result      = "yes";
            List <string> TempResult1 = new List <string>();
            List <string> TempResult2 = new List <string>();
            List <string> TempValue   = new List <string>();

            //setting value for compute
            if (operation == "&" || operation == "|")
            {
                if (value1 == "~")
                {
                    TempResult1 = NotValuesStack.Pop();
                }
                else
                {
                    if (termDocumentMatrix.TermDocumentMatrixDic.ContainsKey(value1))
                    {
                        TempResult1 = termDocumentMatrix.TermDocumentMatrixDic[value1].Where(e => e.Value).Select(e => e.Key).ToList();
                    }
                    else if (value1 == "&" || value1 == "|")
                    {
                        TempResult1 = TempValuesStack.Pop();
                    }
                }

                if (value2 == "~")
                {
                    TempResult2 = NotValuesStack.Pop();
                }
                else
                {
                    if (termDocumentMatrix.TermDocumentMatrixDic.ContainsKey(value2))
                    {
                        TempResult2 = termDocumentMatrix.TermDocumentMatrixDic[value2].Where(e => e.Value).Select(e => e.Key).ToList();
                    }
                    else if (value2 == "&" || value2 == "|")
                    {
                        TempResult2 = TempValuesStack.Pop();
                    }
                }
                //end
            }

            //operation
            if (operation == "&")
            {
                TempValue = TempResult1.Intersect(TempResult2).ToList();
                TempValuesStack.Push(TempValue);
                result = "&";
            }
            else if (operation == "|")
            {
                TempValue = TempResult1.Union(TempResult2).ToList();
                TempValuesStack.Push(TempValue);
                result = "|";
            }
            //end
            return(result);
        }
예제 #8
0
        public List <string> ProcessingQuery(TermDocumentMatrixModel termDocumentMatrix, string query)
        {
            List <string> result = BooleanExpressionStackProcessing(termDocumentMatrix, query);

            return(result);
        }