/Users/lyon/j4p/src/javassist/compiler/Lex.java
|
1 /*
2 * Javassist, a Java-bytecode translator toolkit.
3 * Copyright (C) 1999-2003 Shigeru Chiba. All Rights Reserved.
4 *
5 * The contents of this file are subject to the Mozilla Public License Version
6 * 1.1 (the "License"); you may not use this file except in compliance with
7 * the License. Alternatively, the contents of this file may be used under
8 * the terms of the GNU Lesser General Public License Version 2.1 or later.
9 *
10 * Software distributed under the License is distributed on an "AS IS" basis,
11 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12 * for the specific language governing rights and limitations under the
13 * License.
14 */
15
16 package javassist.compiler;
17
18 class Token {
19 public Token next = null;
20 public int tokenId;
21
22 public long longValue;
23 public double doubleValue;
24 public String textValue;
25 }
26
27 public class Lex implements TokenId {
28 private int lastChar;
29 private StringBuffer textBuffer;
30 private Token currentToken;
31 private Token lookAheadTokens;
32
33 private String input;
34 private int position, maxlen, lineNumber;
35
36 /**
37 * Constructs a lexical analyzer.
38 */
39 public Lex(String s) {
40 lastChar = -1;
41 textBuffer = new StringBuffer();
42 currentToken = new Token();
43 lookAheadTokens = null;
44
45 input = s;
46 position = 0;
47 maxlen = s.length();
48 lineNumber = 0;
49 }
50
51 public int get() {
52 if (lookAheadTokens == null)
53 return get(currentToken);
54 else {
55 Token t;
56 currentToken = t = lookAheadTokens;
57 lookAheadTokens = lookAheadTokens.next;
58 return t.tokenId;
59 }
60 }
61
62 /**
63 * Looks at the next token.
64 */
65 public int lookAhead() {
66 return lookAhead(0);
67 }
68
69 public int lookAhead(int i) {
70 Token tk = lookAheadTokens;
71 if (tk == null) {
72 lookAheadTokens = tk = currentToken; // reuse an object!
73 tk.next = null;
74 get(tk);
75 }
76
77 for (; i-- > 0; tk = tk.next)
78 if (tk.next == null) {
79 Token tk2;
80 tk.next = tk2 = new Token();
81 get(tk2);
82 }
83
84 currentToken = tk;
85 return tk.tokenId;
86 }
87
88 public String getString() {
89 return currentToken.textValue;
90 }
91
92 public long getLong() {
93 return currentToken.longValue;
94 }
95
96 public double getDouble() {
97 return currentToken.doubleValue;
98 }
99
100 private int get(Token token) {
101 int t;
102 do {
103 t = readLine(token);
104 } while (t == '\n');
105 token.tokenId = t;
106 return t;
107 }
108
109 private int readLine(Token token) {
110 int c = getNextNonWhiteChar();
111 if (c < 0)
112 return c;
113 else if (c == '\n') {
114 ++lineNumber;
115 return '\n';
116 } else if (c == '\'')
117 return readCharConst(token);
118 else if (c == '"')
119 return readStringL(token);
120 else if ('0' <= c && c <= '9')
121 return readNumber(c, token);
122 else if (c == '.') {
123 c = getc();
124 if ('0' <= c && c <= '9') {
125 StringBuffer tbuf = textBuffer;
126 tbuf.setLength(0);
127 tbuf.append('.');
128 return readDouble(tbuf, c, token);
129 } else {
130 ungetc(c);
131 return readSeparator('.');
132 }
133 } else if ('A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' || c == '_'
134 || c == '$')
135 return readIdentifier(c, token);
136 else
137 return readSeparator(c);
138 }
139
140 private int getNextNonWhiteChar() {
141 int c;
142 do {
143 c = getc();
144 if (c == '/') {
145 c = getc();
146 if (c == '/')
147 do {
148 c = getc();
149 } while (c != '\n' && c != '\r' && c != -1);
150 else if (c == '*')
151 while (true) {
152 c = getc();
153 if (c == -1)
154 break;
155 else if (c == '*')
156 if ((c = getc()) == '/') {
157 c = ' ';
158 break;
159 } else
160 ungetc(c);
161 }
162 else {
163 ungetc(c);
164 c = '/';
165 }
166 }
167 } while (isBlank(c));
168 return c;
169 }
170
171 private int readCharConst(Token token) {
172 int c;
173 int value = 0;
174 while ((c = getc()) != '\'')
175 if (c == '\\')
176 value = readEscapeChar();
177 else if (c < 0x20) {
178 if (c == '\n')
179 ++lineNumber;
180
181 return BadToken;
182 } else
183 value = c;
184
185 token.longValue = value;
186 return CharConstant;
187 }
188
189 private int readEscapeChar() {
190 int c = getc();
191 if (c == 'n')
192 c = '\n';
193 else if (c == 't')
194 c = '\t';
195 else if (c == 'r')
196 c = '\r';
197 else if (c == 'f')
198 c = '\f';
199 else if (c == '\n')
200 ++lineNumber;
201
202 return c;
203 }
204
205 private int readStringL(Token token) {
206 int c;
207 StringBuffer tbuf = textBuffer;
208 tbuf.setLength(0);
209 for (; ;) {
210 while ((c = getc()) != '"') {
211 if (c == '\\')
212 c = readEscapeChar();
213 else if (c == '\n' || c < 0) {
214 ++lineNumber;
215 return BadToken;
216 }
217
218 tbuf.append((char) c);
219 }
220
221 for (; ;) {
222 c = getc();
223 if (c == '\n')
224 ++lineNumber;
225 else if (!isBlank(c))
226 break;
227 }
228
229 if (c != '"') {
230 ungetc(c);
231 break;
232 }
233 }
234
235 token.textValue = tbuf.toString();
236 return StringL;
237 }
238
239 private int readNumber(int c, Token token) {
240 long value = 0;
241 int c2 = getc();
242 if (c == '0')
243 if (c2 == 'X' || c2 == 'x')
244 for (; ;) {
245 c = getc();
246 if ('0' <= c && c <= '9')
247 value = value * 16 + (long) (c - '0');
248 else if ('A' <= c && c <= 'F')
249 value = value * 16 + (long) (c - 'A' + 10);
250 else if ('a' <= c && c <= 'f')
251 value = value * 16 + (long) (c - 'a' + 10);
252 else {
253 token.longValue = value;
254 if (c == 'L' || c == 'l')
255 return LongConstant;
256 else {
257 ungetc(c);
258 return IntConstant;
259 }
260 }
261 }
262 else if ('0' <= c2 && c2 <= '7') {
263 value = c2 - '0';
264 for (; ;) {
265 c = getc();
266 if ('0' <= c && c <= '7')
267 value = value * 8 + (long) (c - '0');
268 else {
269 token.longValue = value;
270 if (c == 'L' || c == 'l')
271 return LongConstant;
272 else {
273 ungetc(c);
274 return IntConstant;
275 }
276 }
277 }
278 }
279
280 value = c - '0';
281 while ('0' <= c2 && c2 <= '9') {
282 value = value * 10 + c2 - '0';
283 c2 = getc();
284 }
285
286 token.longValue = value;
287 if (c2 == 'F' || c2 == 'f') {
288 token.doubleValue = (double) value;
289 return FloatConstant;
290 } else if (c2 == 'E' || c2 == 'e' || c2 == '.') {
291 StringBuffer tbuf = textBuffer;
292 tbuf.setLength(0);
293 tbuf.append(value);
294 return readDouble(tbuf, c2, token);
295 } else if (c2 == 'L' || c2 == 'l')
296 return LongConstant;
297 else {
298 ungetc(c2);
299 return IntConstant;
300 }
301 }
302
303 private int readDouble(StringBuffer sbuf, int c, Token token) {
304 if (c != 'E' && c != 'e') {
305 sbuf.append((char) c);
306 for (; ;) {
307 c = getc();
308 if ('0' <= c && c <= '9')
309 sbuf.append((char) c);
310 else
311 break;
312 }
313 }
314
315 if (c == 'E' || c == 'e') {
316 sbuf.append((char) c);
317 c = getc();
318 if (c == '+' || c == '-') {
319 sbuf.append((char) c);
320 c = getc();
321 }
322
323 while ('0' <= c && c <= '9') {
324 sbuf.append((char) c);
325 c = getc();
326 }
327 }
328
329 try {
330 token.doubleValue = Double.parseDouble(sbuf.toString());
331 } catch (NumberFormatException e) {
332 return BadToken;
333 }
334
335 if (c == 'F' || c == 'f')
336 return FloatConstant;
337 else {
338 ungetc(c);
339 return DoubleConstant;
340 }
341 }
342
343 // !"#$%&'( )*+,-./0 12345678 9:;<=>?
344 private static final int[] equalOps
345 = {NEQ, 0, 0, 0, MOD_E, AND_E, 0, 0,
346 0, MUL_E, PLUS_E, 0, MINUS_E, 0, DIV_E, 0,
347 0, 0, 0, 0, 0, 0, 0, 0,
348 0, 0, 0, LE, EQ, GE, 0};
349
350 private int readSeparator(int c) {
351 int c2, c3;
352 if ('!' <= c && c <= '?') {
353 int t = equalOps[c - '!'];
354 if (t == 0)
355 return c;
356 else {
357 c2 = getc();
358 if (c == c2)
359 switch (c) {
360 case '=':
361 return EQ;
362 case '+':
363 return PLUSPLUS;
364 case '-':
365 return MINUSMINUS;
366 case '&':
367 return ANDAND;
368 case '<':
369 c3 = getc();
370 if (c3 == '=')
371 return LSHIFT_E;
372 else {
373 ungetc(c3);
374 return LSHIFT;
375 }
376 case '>':
377 c3 = getc();
378 if (c3 == '=')
379 return RSHIFT_E;
380 else if (c3 == '>') {
381 c3 = getc();
382 if (c3 == '=')
383 return ARSHIFT_E;
384 else {
385 ungetc(c3);
386 return ARSHIFT;
387 }
388 } else {
389 ungetc(c3);
390 return RSHIFT;
391 }
392 default :
393 break;
394 }
395 else if (c2 == '=')
396 return t;
397 }
398 } else if (c == '^') {
399 c2 = getc();
400 if (c2 == '=')
401 return EXOR_E;
402 } else if (c == '|') {
403 c2 = getc();
404 if (c2 == '=')
405 return OR_E;
406 else if (c2 == '|')
407 return OROR;
408 } else
409 return c;
410
411 ungetc(c2);
412 return c;
413 }
414
415 private int readIdentifier(int c, Token token) {
416 StringBuffer tbuf = textBuffer;
417 tbuf.setLength(0);
418
419 do {
420 tbuf.append((char) c);
421 c = getc();
422 } while ('A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' || c == '_'
423 || c == '$' || '0' <= c && c <= '9');
424
425 ungetc(c);
426
427 String name = tbuf.toString();
428 int t = ktable.lookup(name);
429 if (t >= 0)
430 return t;
431 else {
432 /* tbuf.toString() is executed quickly since it does not
433 * need memory copy. Using a hand-written extensible
434 * byte-array class instead of StringBuffer is not a good idea
435 * for execution speed. Converting a byte array to a String
436 * object is very slow. Using an extensible char array
437 * might be OK.
438 */
439 token.textValue = name;
440 return Identifier;
441 }
442 }
443
444 private static final KeywordTable ktable = new KeywordTable();
445
446 static {
447 ktable.append("abstract", ABSTRACT);
448 ktable.append("boolean", BOOLEAN);
449 ktable.append("break", BREAK);
450 ktable.append("byte", BYTE);
451 ktable.append("case", CASE);
452 ktable.append("catch", CATCH);
453 ktable.append("char", CHAR);
454 ktable.append("class", CLASS);
455 ktable.append("const", CONST);
456 ktable.append("continue", CONTINUE);
457 ktable.append("default", DEFAULT);
458 ktable.append("do", DO);
459 ktable.append("double", DOUBLE);
460 ktable.append("else", ELSE);
461 ktable.append("extends", EXTENDS);
462 ktable.append("false", FALSE);
463 ktable.append("final", FINAL);
464 ktable.append("finally", FINALLY);
465 ktable.append("float", FLOAT);
466 ktable.append("for", FOR);
467 ktable.append("goto", GOTO);
468 ktable.append("if", IF);
469 ktable.append("implements", IMPLEMENTS);
470 ktable.append("import", IMPORT);
471 ktable.append("instanceof", INSTANCEOF);
472 ktable.append("int", INT);
473 ktable.append("interface", INTERFACE);
474 ktable.append("long", LONG);
475 ktable.append("native", NATIVE);
476 ktable.append("new", NEW);
477 ktable.append("null", NULL);
478 ktable.append("package", PACKAGE);
479 ktable.append("private", PRIVATE);
480 ktable.append("protected", PROTECTED);
481 ktable.append("public", PUBLIC);
482 ktable.append("return", RETURN);
483 ktable.append("short", SHORT);
484 ktable.append("static", STATIC);
485 ktable.append("strict", STRICT);
486 ktable.append("super", SUPER);
487 ktable.append("switch", SWITCH);
488 ktable.append("synchronized", SYNCHRONIZED);
489 ktable.append("this", THIS);
490 ktable.append("throw", THROW);
491 ktable.append("throws", THROWS);
492 ktable.append("transient", TRANSIENT);
493 ktable.append("true", TRUE);
494 ktable.append("try", TRY);
495 ktable.append("void", VOID);
496 ktable.append("volatile", VOLATILE);
497 ktable.append("while", WHILE);
498 }
499
500 private static boolean isBlank(int c) {
501 return c == ' ' || c == '\t' || c == '\f' || c == '\r'
502 || c == '\n';
503 }
504
505 private static boolean isDigit(int c) {
506 return '0' <= c && c <= '9';
507 }
508
509 private void ungetc(int c) {
510 lastChar = c;
511 }
512
513 private int getc() {
514 if (lastChar < 0)
515 if (position < maxlen)
516 return input.charAt(position++);
517 else
518 return -1;
519 else {
520 int c = lastChar;
521 lastChar = -1;
522 return c;
523 }
524 }
525 }
526