2009/05/20 - Apache Shale has been retired.
For more information, please explore the Attic.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21 package org.apache.shale.clay.parser;
22
23 import java.util.ArrayList;
24 import java.util.Iterator;
25 import java.util.Map;
26
27 import org.apache.commons.logging.Log;
28 import org.apache.commons.logging.LogFactory;
29 import org.apache.shale.util.Messages;
30
31 /***
32 * <p>
33 * Tokenizes a portion of the document for attributes. The complete document is
34 * passed by reference and new {@link Token} offsets are created for the name
35 * and value of the discovered attributes.
36 * </p>
37 */
38
39 public class AttributeTokenizer {
40
41 /***
42 * <p>
43 * Message resources for this class.
44 * </p>
45 */
46 private static Messages messages = new Messages(
47 "org.apache.shale.clay.Bundle", AttributeTokenizer.class
48 .getClassLoader());
49
50 /***
51 * <p>
52 * Common logging utility.
53 * </p>
54 */
55 private static Log log;
56 static {
57 log = LogFactory.getLog(AttributeTokenizer.class);
58 }
59
60 /***
61 * <p>
62 * Internal document buffer.
63 * </p>
64 */
65 private StringBuffer buffer = null;
66
67 /***
68 * <p>
69 * Beginning offset of the starting node.
70 * </p>
71 */
72 private int beginOffset = 0;
73
74 /***
75 * <p>
76 * Ending offset of the starting node.
77 * </p>
78 */
79 private int endOffset = 0;
80
81 /***
82 * <p>Line number the target node is located in.</p>
83 */
84 private int lineNumber = 0;
85
86 /***
87 * <p>Line begining document offset where the target node is located.</p>
88 */
89 private int lineBeginOffset = 0;
90
91
92 /***
93 * <p>
94 * Overloaded constructor that is passed the complete document and the
95 * starting and ending offset of the node body within the document.
96 * </p>
97 *
98 * @param buffer document
99 * @param beginOffset start index of node body in the document
100 * @param endOffset end index of node body in the document
101 * @param lineNumber line number of the node within the document
102 * @param lineBeginOffset index in the document that the line begins
103 */
104 public AttributeTokenizer(StringBuffer buffer, int beginOffset,
105 int endOffset, int lineNumber, int lineBeginOffset) {
106 this.buffer = buffer;
107 this.beginOffset = beginOffset;
108 this.endOffset = endOffset;
109 this.lineBeginOffset = lineBeginOffset;
110 this.lineNumber = lineNumber;
111
112 }
113
114 /***
115 * <p>
116 * Inner class implementing the {@link Token} interface. This class will
117 * define an attribute's key and value offsets
118 * </p>
119 */
120 private class TokenOffset implements Token {
121 /***
122 * <p>Starting offset of the token.</p>
123 */
124 private int beginOffset = 0;
125
126 /***
127 * <p>Ending offset of the token.</p>
128 */
129 private int endOffset = 0;
130
131 /***
132 * @param beginOffset token start index
133 * @param endOffset token end index
134 */
135 public TokenOffset(int beginOffset, int endOffset) {
136 this.beginOffset = beginOffset;
137 this.endOffset = endOffset;
138 }
139
140 /***
141 * @return starting offset of the token in the document
142 */
143 public int getBeginOffset() {
144 return beginOffset;
145 }
146
147 /***
148 * @return ending offset of the token in the document
149 */
150 public int getEndOffset() {
151 return endOffset;
152 }
153
154 /***
155 * @return parsed document
156 */
157 public StringBuffer getDocument() {
158 return buffer;
159 }
160
161 /***
162 * @return token text between the beginOffset and endOffset
163 */
164 public String getRawText() {
165 String pickel = null;
166 try {
167 pickel = buffer.substring(beginOffset, endOffset);
168 } catch (RuntimeException e) {
169 log.error(toString(), e);
170 throw e;
171 }
172 return pickel;
173 }
174
175 /***
176 * @return line number the token is found on within the document
177 */
178 public int getLineNumber() {
179 return lineNumber;
180 }
181
182 /***
183 * @return offset within the document that the token line is found
184 */
185 public int getLineBeginOffset() {
186 return lineBeginOffset;
187 }
188
189 /***
190 * @return description of the token
191 */
192 public String toString() {
193 return messages.getMessage("node.token.range",
194 new Object[] {
195 new Integer(beginOffset),
196 new Integer(endOffset),
197 new Integer(lineNumber),
198 new Integer(lineBeginOffset)});
199 }
200
201 }
202
203 /***
204 * <p>
205 * This inner class implements the <code>Map.Entry</code> interfaces. It
206 * holds a reference to the key and value parts of an attribute. Both the
207 * key and value attributes are {@link Token} instances.
208 * </p>
209 */
210 private class AttributeEntry implements Map.Entry {
211 /***
212 * <p>Token offset of the attribute key.</p>
213 */
214 private TokenOffset key = null;
215
216 /***
217 * <p>Token offset of the attribute value.</p>
218 */
219 private TokenOffset value = null;
220
221 /***
222 * <p>
223 * Overloaded constructor is passed a {@link Token} for the key and
224 * value attributes.
225 * </p>
226 *
227 * @param key token key offset
228 * @param value token value offset
229 */
230 public AttributeEntry(TokenOffset key, TokenOffset value) {
231 this.key = key;
232 this.value = value;
233 }
234
235 /***
236 * <p>
237 * Returns the attribute name {@link Token} offset.
238 * </p>
239 *
240 * @return TokenOffset for the attribute key
241 */
242 public Object getKey() {
243 return key;
244 }
245
246 /***
247 * <p>
248 * Returns the attribute value {@link Token} offset.
249 * </p>
250 *
251 * @return TokenOffset of the attribute value
252 */
253 public Object getValue() {
254 return value;
255 }
256
257 /***
258 * <p>
259 * Sets the attribute value {@link Token} offset.
260 * </p>
261 *
262 * @param value TokenOffset value
263 * @return value token offset
264 */
265 public Object setValue(Object value) {
266 this.value = (TokenOffset) value;
267 return value;
268 }
269
270 /***
271 * @return description of the attribute
272 */
273 public String toString() {
274 StringBuffer buff = new StringBuffer();
275 TokenOffset key = (TokenOffset) getKey();
276 TokenOffset value = (TokenOffset) getValue();
277
278 buff.append("key: [").append((key != null ? key.getRawText() : null))
279 .append("]\n").append("value: [")
280 .append((value != null ? value.getRawText() : null))
281 .append("]");
282
283 return buff.toString();
284 }
285 }
286
287 /***
288 * <p>
289 * The current offset within the <code>beginOffset</code> and
290 * <code>endOffset</code> of the Node within the document.
291 */
292 private int currOffset = 0;
293
294 /***
295 * <p>
296 * Builds an <code>ArrayList</code> of
297 * {@link AttributeTokenizer.AttributeEntry} instances identifying
298 * name and value pairs.
299 * </p>
300 *
301 * @param tokenIndex populated attribute offset of a beging node body
302 */
303 protected synchronized void parse(ArrayList tokenIndex) {
304 currOffset = beginOffset;
305
306 if (log.isDebugEnabled()) {
307 log.debug(messages.getMessage("attribute.range", new Object[] {
308 new Integer(beginOffset), new Integer(endOffset) }));
309 }
310
311 while (currOffset < endOffset) {
312
313 int startOffset = currOffset;
314 while (Character.isWhitespace(buffer.charAt(currOffset))) {
315 currOffset++;
316 }
317
318 if (log.isDebugEnabled()) {
319 if (currOffset > startOffset) {
320 log.debug(messages
321 .getMessage("attribute.skip.space",
322 new Object[] { new Integer(currOffset
323 - startOffset) }));
324 }
325 }
326
327
328 TokenOffset key = nextToken(currOffset, " ", "=", true);
329 if (key == null) {
330 break;
331 }
332
333 boolean skipValue = false;
334 currOffset++;
335 String delim = " ";
336 String otherDelim = "\"";
337 if (currOffset < buffer.length()
338 && buffer.charAt(currOffset) == '"') {
339
340
341 delim = "\"";
342 otherDelim = " ";
343 currOffset++;
344 } else if (currOffset < buffer.length() && currOffset > 0
345 && buffer.charAt(currOffset - 1) == ' ') {
346
347
348 currOffset--;
349 skipValue = true;
350 }
351
352 TokenOffset value = null;
353 if (!skipValue) {
354 value = nextToken(currOffset, delim, otherDelim, false);
355 }
356
357 tokenIndex.add(new AttributeEntry(key, value));
358
359 currOffset++;
360 key = null;
361 value = null;
362 }
363
364 if (log.isDebugEnabled()) {
365 log.debug(messages.getMessage("attributes.total.found",
366 new Object[] { new Integer(tokenIndex.size()) }));
367 }
368
369 }
370
371 /***
372 * <p>
373 * Returns the next {@link Token} given an <code>startOffset</code> and a
374 * <code>endDelim</code>.
375 * </p>
376 *
377 * @param startOffset begining offset in the document
378 * @param endDelim primary token delimiter
379 * @param otherDelim secondary token delimiter
380 * @param isKey looking for an attribute name not a value
381 * @return next token offset
382 */
383 protected TokenOffset nextToken(int startOffset, String endDelim, String otherDelim, boolean isKey) {
384
385
386
387
388
389 if (isKey) {
390 int offsetEnd = Math.min(buffer.indexOf(endDelim, startOffset), endOffset);
391 int offsetOther = Math.min(buffer.indexOf(otherDelim, startOffset), endOffset);
392 if (offsetEnd == -1) {
393 currOffset = offsetOther;
394 } else if (offsetOther == -1) {
395 currOffset = offsetOther;
396 } else {
397 currOffset = Math.min(offsetEnd, offsetOther);
398 }
399 } else {
400 currOffset = Math.min(buffer.indexOf(endDelim, startOffset), endOffset);
401
402 if (currOffset == -1) {
403 currOffset = Math.min(buffer.indexOf(otherDelim, startOffset), endOffset);
404 }
405 }
406
407
408 if (currOffset == -1) {
409 currOffset = endOffset;
410 }
411
412
413
414 if (currOffset > -1 && currOffset <= endOffset && startOffset < currOffset) {
415
416 int e = currOffset;
417
418 if (buffer.charAt(e - 1) == '"'
419 || (Character.isWhitespace(buffer.charAt(e - 1))
420 && buffer.charAt(e - 1) != ' ')) {
421 --e;
422 }
423
424
425 TokenOffset value = new TokenOffset(startOffset, e);
426
427 if (log.isDebugEnabled()) {
428 log.debug(messages.getMessage("attribute.token.range",
429 new Object[] { new Integer(startOffset),
430 new Integer(e) }));
431 }
432
433 return value;
434 }
435
436 return null;
437 }
438
439 /***
440 * <p>Inner class implementing the <code>Iterator</code>
441 * interface. This class is a decorator of a <code>ArrayList</code>
442 * of nodes.
443 * </p>
444 */
445 private class TokenIterator implements Iterator {
446
447 /***
448 * <p>All the attribute entry tokens in the node body.</p>
449 */
450 private ArrayList tokenIndex = null;
451
452 /***
453 * <p>Internal <code>tokenIndex</code> iterator.</p>
454 */
455 private Iterator ti = null;
456
457 /***
458 * <p>Constructor parses the node body into a collection of
459 * {@link AttributeTokenizer.AttributeEntry}.
460 * </p>
461 */
462 public TokenIterator() {
463 tokenIndex = new ArrayList();
464 parse(tokenIndex);
465 ti = tokenIndex.iterator();
466 }
467
468 /***
469 * <p>Retuns <code>true</code> if there are more
470 * {@link AttributeTokenizer.AttributeEntry} in the collection.
471 * </p>
472 *
473 * @return <code>true</code> if there are more tokens
474 */
475 public boolean hasNext() {
476 return ti.hasNext();
477 }
478
479 /***
480 * <p>Retuns the next {@link AttributeTokenizer.AttributeEntry}
481 * in the collection.
482 * </p>
483 *
484 * @return returns the next token
485 */
486 public Object next() {
487 Map.Entry attribute = (Map.Entry) ti.next();
488 return attribute;
489 }
490
491 /***
492 * <p>This method is not implemented.</p>
493 *
494 * @deprecated
495 */
496 public void remove() {
497
498 }
499 }
500
501 /***
502 * <p>Returns an instance of an <code>Iterator</code> that
503 * will enumerate attributes in the document where the attributes
504 * are represented by a {@link AttributeTokenizer.AttributeEntry} instance.
505 * </p>
506 *
507 * @return returns a {@link AttributeTokenizer.TokenIterator} iterator.
508 */
509 public Iterator iterator() {
510 return new TokenIterator();
511 }
512
513 }