-
Notifications
You must be signed in to change notification settings - Fork 0
/
Semi.cs
471 lines (422 loc) · 13.9 KB
/
Semi.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
/////////////////////////////////////////////////////////////////////////
// Semi.cs - Builds semiExpressions //
// ver 2.0 //
// Language: C#, Visual Studio 10.0, .Net Framework 4.0 //
// Platform: Dell Precision T7400 , Win 7, SP 1 //
// Application: Pr#2 Help, CSE681, Fall 2011 //
// Author: Jim Fawcett, CST 2-187, Syracuse University //
// (315) 443-3948, jfawcett@twcny.rr.com //
/////////////////////////////////////////////////////////////////////////
/*
* Module Operations
* =================
* Semi provides, via class CSemiExp, facilities to extract semiExpressions.
* A semiExpression is a sequence of tokens that is just the right amount
* of information to parse for code analysis. SemiExpressions are token
* sequences that end in "{" or "}" or ";"
*
* CSemiExp works with a private CToker object attached to a specified file.
* It provides a get() function that extracts semiExpressions from the file
* while filtering out comments and merging quotes into single tokens.
*
* Public Interface
* ================
* CSemiExp semi = new CSemiEx;(); // constructs CSemiExp object
* if(semi.open(fileName)) ... // attaches semi to specified file
* semi.close(); // closes file stream
* if(semi.Equals(se)) ... // do these semiExps have same tokens?
* int hc = semi.GetHashCode() // returns hashcode
* if(getSemi()) ... // extracts and stores next semiExp
* int len = semi.count; // length property
* semi.verbose = true; // verbose property - shows tokens
* string tok = semi[2]; // access a semi token
* string tok = semi[1]; // extract token
* semi.flush(); // removes all tokens
* semi.initialize(); // adds ";" to empty semi-expression
* semi.insert(2,tok); // inserts token as third element
* semi.Add(tok); // appends token
* semi.Add(tokArray); // appends array of tokens
* semi.display(); // sends tokens to Console
* string show = semi.displayStr(); // returns tokens as single string
* semi.returnNewLines = false; // property defines newline handling
* // default is true
*/
//
/*
* Build Process
* =============
* Required Files:
* Semi.cs Toker.cs
*
* Compiler Command:
* csc /target:exe /define:TEST_SEMI Semi.cs Toker.cs
*
* Maintenance History
* ===================
* ver 2.0 : 05 Sep 11
* - Converted to new C# property syntax
* - Converted from untyped ArrayList to generic List<string>
* - Simplified display() and displayStr()
* - Added new tests in test stub
* ver 1.9 : 27 Sep 08
* - Changed comments on manual page to say that semi.ReturnNewLines is true by default
* ver 1.8 : 10 Jun 08
* - Aniruddha Gore added Contains function and set returnNewLines as the default
* ver 1.7 : 17 Jun 06
* - added displayNewLines property
* ver 1.6 : 16 Jun 06
* - added CSemi member functions copy(), remove(int i), and remove(string tok).
* ver 1.5 : 12 Jun 05
* - added returnNewLines property
* - modified way get() behaves so that it will not hang on files that
* end with text that have no semiExp terminator.
* ver 1.4 : 30 May 05
* - removed CppCommentFilter, CCommentFilter, SQuoteFilter, DQuoteFilter
* since Toker now returns comments and quotes as tokens.
* - added isComment(string tok) member function
* ver 1.3 : 16 Sep 03
* - removed insert(tokenArray), added Add(tokenArray)
* Since this is a change to public interface it may break some code.
* It simply changes the name of the function to more directly
* describe what it does - append a token array.
* - added overrides of Equals(object) and GetHashCode()
* - completed Manual Page description of public interface
* ver 1.2 : 14 Sep 03
* - cosmetic changes to comments
* - Added formatting of extracted comments (see notes in code below)
* ver 1.1 : 13 Sep 03
* - fixed bug in CppCommentFilter() that caused collection to terminate
* if a C++ comment was on same line as a semiExpression.
* - added calls to semiExp.Add(currTok) in SQuoteFilter() and DQuoteFilter()
* which simplified getSemi().
* - added some functions to create and manipulate semi-expressions.
* ver 1.0 : 31 Aug 03
* - first release
*
* Planned Modifications:
* ----------------------
* - return, or don't return, comments based on discardComments property
* which is now present but inactive.
*/
//
using System;
using System.Collections;
using System.Collections.Generic;
using System.Text;
using CStoker;
namespace CSsemi
{
///////////////////////////////////////////////////////////////////////
// class CSemiExp - filters token stream and collects semiExpressions
public class CSemiExp
{
CToker toker = null;
List<string> semiExp = null;
string currTok = "";
string prevTok = "";
//----< line count property >----------------------------------------
public int lineCount
{
get { return toker.lineCount; }
}
//----< constructor >------------------------------------------------
public CSemiExp()
{
toker = new CToker();
semiExp = new List<string>();
discardComments = true; // not implemented yet
returnNewLines = true;
displayNewLines = false;
}
//----< test for equality >------------------------------------------
override public bool Equals(Object semi)
{
CSemiExp temp = (CSemiExp)semi;
if(temp.count != this.count)
return false;
for(int i=0; i<temp.count && i<this.count; ++i)
if(this[i] != temp[i])
return false;
return true;
}
//---< pos of first str in semi-expression if found, -1 otherwise >--
public int FindFirst(string str)
{
for (int i = 0; i < count - 1; ++i)
if (this[i] == str)
return i;
return -1;
}
//---< pos of last str in semi-expression if found, -1 otherwise >---
public int FindLast(string str)
{
for (int i = this.count - 1; i >= 0; --i)
if (this[i] == str)
return i;
return -1;
}
//----< deprecated: here to avoid breakage with old code >-----------
public int Contains(string str)
{
return FindLast(str);
}
//----< have to override GetHashCode() >-----------------------------
override public System.Int32 GetHashCode()
{
return base.GetHashCode();
}
//----< opens member tokenizer with specified file >-----------------
public bool open(string fileName)
{
return toker.openFile(fileName);
}
//----< close file stream >------------------------------------------
public void close()
{
toker.close();
}
//----< is this the last token in the current semiExpression? >------
bool isTerminator(string tok)
{
switch(tok)
{
case ";" : return true;
case "{" : return true;
case "}" : return true;
default : return false;
}
}
//----< get next token, saving previous token >----------------------
string get()
{
prevTok = currTok;
currTok = toker.getTok();
if(verbose)
Console.Write("{0} ",currTok);
return currTok;
}
//----< is this character a punctuator> >----------------------------
bool IsPunc(char ch)
{
return (Char.IsPunctuation(ch) || Char.IsSymbol(ch));
}
//
//----< are these characters an operator? >--------------------------
//
// Performance issue - C# would not let me make opers static, so
// it is being constructed on every call. This is not desireable,
// but neither is using a static data member that is initialized
// remotely. I will think more about this later.
bool IsOperatorPair(char first, char second)
{
string[] opers = new string[]
{
"/*", "*/", "//", "!=", "==", ">=", "<=", "&&", "||", "--", "++",
"+=", "-=", "*=", "/=", "%=", "&=", "^=", "|=", "<<", ">>",
"\\n", "\\t", "\\r", "\\f"
};
StringBuilder test = new StringBuilder();
test.Append(first).Append(second);
foreach(string oper in opers)
if(oper.Equals(test.ToString()))
return true;
return false;
}
//----< collect semiExpression from filtered token stream >----------
public bool getSemi()
{
semiExp.RemoveRange(0,semiExp.Count); // empty container
do
{
get();
if(currTok == "")
return false; // end of file
if(returnNewLines || currTok != "\n")
semiExp.Add(currTok);
} while(!isTerminator(currTok) || count == 0);
return (semiExp.Count > 0);
}
//----< get length property >----------------------------------------
public int count
{
get { return semiExp.Count; }
}
//----< indexer for semiExpression >---------------------------------
public string this[int i]
{
get { return semiExp[i]; }
set { semiExp[i] = value; }
}
//----< insert token - fails if out of range and returns false>------
public bool insert(int loc, string tok)
{
if(0 <= loc && loc < semiExp.Count)
{
semiExp.Insert(loc,tok);
return true;
}
return false;
}
//----< append token to end of semiExp >-----------------------------
public CSemiExp Add(string token)
{
semiExp.Add(token);
return this;
}
//----< load semiExp from array of strings >-------------------------
public void Add(string [] source)
{
foreach(string tok in source)
semiExp.Add(tok);
}
//--< initialize semiExp with single ";" token - used for testing >--
public bool initialize()
{
if(semiExp.Count > 0)
return false;
semiExp.Add(";");
return true;
}
//----< remove all contents of semiExp >-----------------------------
public void flush()
{
semiExp.RemoveRange(0,semiExp.Count);
}
//----< is this token a comment? >-----------------------------------
public bool isComment(string tok)
{
if(tok.Length > 1)
if(tok[0] == '/')
if(tok[1] == '/' || tok[1] == '*')
return true;
return false;
}
//----< display semiExpression on Console >--------------------------
public void display()
{
Console.Write("\n");
Console.Write(displayStr());
}
//----< return display string >--------------------------------------
public string displayStr()
{
StringBuilder disp = new StringBuilder("");
foreach (string tok in semiExp)
{
disp.Append(tok);
if (tok.IndexOf('\n') != tok.Length-1)
disp.Append(" ");
}
return disp.ToString();
}
//----< announce tokens when verbose is true >-----------------------
public bool verbose
{
get;
set;
}
//----< determines whether new lines are returned with semi >--------
public bool returnNewLines
{
get;
set;
}
//----< determines whether new lines are displayed >-----------------
public bool displayNewLines
{
get;
set;
}
//----< determines whether comments are discarded >------------------
public bool discardComments
{
get;
set;
}
//
//----< make a copy of semiEpression >-------------------------------
public CSemiExp clone()
{
CSemiExp copy = new CSemiExp();
for (int i = 0; i < count; ++i)
{
copy.Add(this[i]);
}
return copy;
}
//----< remove a token from semiExpression >-------------------------
public bool remove(int i)
{
if (0 <= i && i < semiExp.Count)
{
semiExp.RemoveAt(i);
return true;
}
return false;
}
//----< remove a token from semiExpression >-------------------------
public bool remove(string token)
{
if (semiExp.Contains(token))
{
semiExp.Remove(token);
return true;
}
return false;
}
//
#if(TEST_SEMI)
//----< test stub >--------------------------------------------------
[STAThread]
static void Main(string[] args)
{
Console.Write("\n Testing semiExp Operations");
Console.Write("\n ============================\n");
CSemiExp test = new CSemiExp();
test.returnNewLines = true;
test.displayNewLines = true;
string testFile = "../../testSemi.txt";
if(!test.open(testFile))
Console.Write("\n Can't open file {0}",testFile);
while(test.getSemi())
test.display();
test.initialize();
test.insert(0,"this");
test.insert(1,"is");
test.insert(2,"a");
test.insert(3,"test");
test.display();
Console.Write("\n 2nd token = \"{0}\"\n",test[1]);
Console.Write("\n removing first token:");
test.remove(0);
test.display();
Console.Write("\n");
Console.Write("\n removing token \"test\":");
test.remove("test");
test.display();
Console.Write("\n");
Console.Write("\n making copy of semiExpression:");
CSemiExp copy = test.clone();
copy.display();
Console.Write("\n");
if(args.Length == 0)
{
Console.Write("\n Please enter name of file to analyze\n\n");
return;
}
CSemiExp semi = new CSemiExp();
semi.returnNewLines = true;
if(!semi.open(args[0]))
{
Console.Write("\n can't open file {0}\n\n",args[0]);
return;
}
Console.Write("\n Analyzing file {0}",args[0]);
Console.Write("\n ----------------------------------\n");
while(semi.getSemi())
semi.display();
semi.close();
}
#endif
}
}