C# (CSharp) DocumentClassification SgmlParser示例

编程语言: C# (CSharp)

命名空间/包名称: DocumentClassification

类/类型: SgmlParser

hotexamples.com的示例: 2

C# (CSharp) DocumentClassification SgmlParser - 已找到2个示例。这些是从开源项目中提取的最受好评的DocumentClassification.SgmlParser现实C# (CSharp)示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

Close(2)

Next(1)

NextParagraph(1)

示例#1

显示文件

文件： DocumentWindow.xaml.cs 项目： njss/Document-Classification

        public DocumentWindow( LabeledDocumentVector Document, String CorpusPath, List<String> Features )
        {
            InitializeComponent();

            Parser = new SgmlParser(CorpusPath);
            Parser.FilePosition = Document.Document.Location;

            this.Features = Features;
            HeadLine = Document.Document.HeadLine;
            DateLine = Document.Document.DateLine;
            Id = Document.Document.Id;

            StringBuilder builder = new StringBuilder();
            String Value;

            while ( (Value=Parser.NextParagraph()) != null )
            {
                if (Parser.DocID != Id)
                    break;

                builder.Append(Value);
            }
            Parser.Close();

            DocumentContent = builder.ToString();

            FeatureWeights = new Dictionary<string, double>();

            for (int i = 0; i < Features.Count; i++)
                FeatureWeights.Add(Features[i], Document.Document.Vector[i]);

            VectorDataListView.ItemsSource = FeatureWeights;

            this.Title = Document.Document.Id + " Details";
        }

示例#2

显示文件

文件： DocumentClassifier.cs 项目： njss/Document-Classification

        //parses the given corpus and places it in the inverted index
        private void Parse( String DocumentPath )
        {
            InvertedIndex = new SuffixNode(' ');
            invertedIndexWatch.Reset();

            StringBuilder builder = new StringBuilder();
            Parser = new SgmlParser(DocumentPath);
            String value;
            String prevDocId = "";

            while ((value = Parser.Next()) != null)
            {
                if (!StopWordsIndex.HasWord(value) && !isNumber(value) )
                {
                    value = Stem(value);

                    int weight = (isCapital(value[0]))? 2 : 1;

                    invertedIndexWatch.Start();
                    InvertedIndex.Add(value, new DocumentIndex(Parser.DocID, 0),weight);
                    invertedIndexWatch.Stop();
                }

                if (!Documents.Contains(Parser.DocID))
                {
                    Documents[Parser.DocID] = new DocumentVector(Parser.DocID, Parser.HeadLine, Parser.DateLine,Parser.DocumentPosition);
                    DocumentLengths[Parser.DocID] = 0;
                }

                DocumentLengths[Parser.DocID] = ((int)DocumentLengths[Parser.DocID]) + 1;

                //fire an event out to any attatched methods
                if ( prevDocId != Parser.DocID && ParseIteration != null)
                    ParseIteration(this);

                prevDocId = Parser.DocID;
            }
            Parser.Close();
        }