2009/05/20 - Apache Shale has been retired.
For more information, please explore the Attic.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21 package org.apache.shale.clay.parser;
22
23 import java.util.Iterator;
24 import java.util.List;
25 import java.util.Map;
26 import java.util.TreeMap;
27
28 import org.apache.commons.logging.Log;
29 import org.apache.commons.logging.LogFactory;
30 import org.apache.shale.util.Messages;
31
32 /***
33 * <p>Parses the document into a tree of nodes using the
34 * {@link NodeTokenizer}. Nodes are defined by a token or
35 * offset range in the document, {@link Token}. Attributes in beginning
36 * nodes are also parsed into token offsets by the {@link AttributeTokenizer}.
37 * <br><br>
38 * A document tree is built representing nodes in the target document. The
39 * document can be a HTML fragment that is not well-formed or an XML
40 * fragment of a XHTML document.
41 * </p>
42 *
43 */
44 public class Parser {
45
46 /***
47 * <p>Common logging utility instance.</p>
48 */
49 private static Log log;
50 static {
51 log = LogFactory.getLog(Parser.class);
52 }
53
54 /***
55 * <p>
56 * Message resources for this class.
57 * </p>
58 */
59 private static Messages messages = new Messages(
60 "org.apache.shale.clay.Bundle", NodeTokenizer.class
61 .getClassLoader());
62
63
64 /***
65 * <p>This inner class is a subclass of a <code>TreeMap</code>.
66 * This wrapper handles the attribute key and value parts represented
67 * as a {@link Token} offsets. The value part of the attribute will be
68 * represented by a offset until is accessed using the <code>get</code>
69 * method to avoid creating a bunch of <code>String</code> instances.
70 * </p>
71 *
72 */
73 private class Attributes extends TreeMap implements Map {
74
75 /***
76 * <p>Unique serial id.</p>
77 */
78 private static final long serialVersionUID = 3906654111096190000L;
79
80 /***
81 * <p> Returns the value of the attribute using a offset
82 * range within the parsed document. The parameter <code>key</code>
83 * value is converted into a case neutral value.
84 * </p>
85 *
86 * @param key attribute name
87 * @return attribute value
88 */
89 public Object get(Object key) {
90 String tmp = (String) key;
91 if (tmp != null) {
92 tmp = tmp.toLowerCase();
93 }
94
95 Token e = (Token) super.get(tmp);
96 return (e != null) ? e.getRawText() : null;
97 }
98
99 /***
100 * <p>This method is overridden and not implemented. The
101 * <code>add</code> method should be used for this specific
102 * implementation. The <code>value</code> attribute's internal
103 * type realizes {@link Token}, but the <code>get</code> method
104 * will return a <code>String</code> object.
105 * </p>
106 *
107 * @deprecated
108 * @param key not supported
109 * @param value not supported
110 * @return not supported
111 */
112 public Object put(Object key, Object value) {
113
114 return null;
115 }
116
117 /***
118 * <p>Adds a attribute to the collection. The attribute is
119 * represented by two {@link Token} object for its key and
120 * value parts.
121 * </p>
122 *
123 * @param e token to be added
124 *
125 */
126 public void add(Map.Entry e) {
127
128 String key = ((Token) e.getKey()).getRawText();
129 if (key != null) {
130 key = key.toLowerCase();
131 }
132
133 super.put(key, e.getValue());
134 }
135
136 /***
137 * <p>This method is overridden to convert the key into a neutral
138 * case so that the <code>Map</code> access method will be case
139 * insensitive.</p>
140 *
141 * @param key attribute name
142 * @return <code>true</code> if attribute exists
143 */
144 public boolean containsKey(Object key) {
145 String tmp = (String) key;
146 if (tmp != null) {
147 tmp = tmp.toLowerCase();
148 }
149
150 return super.containsKey(tmp);
151 }
152
153
154
155 }
156
157 /***
158 * <p>This array of HTML tags can have optional ending tags.</p>
159 */
160 private static final String[] OPTIONAL_ENDING_TAG = {"TR", "TH", "TD", "LI", "DT", "DD", "LH", "OPTION"};
161 /***
162 * <p>This array of parent tags is cross referenced by the
163 * <code>OPTIONAL_ENDING_TAG</code> array.</p>
164 */
165 private static final String[][] TAG_PARENTS = {{"TABLE", "TBODY"}, {"TR"}, {"TR"},
166 {"OL", "UL"}, {"DL"}, {"DL"}, {"DL"}, {"SELECT"}};
167
168 /***
169 * <p>
170 * Determines if a HTML nodeName is a type of tag that can optionally have a
171 * ending tag.
172 * </p>
173 *
174 * @param nodeName the name of the html node
175 * @return <code>true</code> if the nodeName is in the
176 * <code>OPTIONAL-ENDING_TAG</code> array; otherwise, <code>false</code> is returned
177 */
178 protected boolean isOptionalEndingTag(String nodeName) {
179 if (nodeName != null) {
180 for (int i = 0; i < OPTIONAL_ENDING_TAG.length; i++) {
181 if (OPTIONAL_ENDING_TAG[i].equalsIgnoreCase(nodeName)) {
182 return true;
183 }
184 }
185 }
186
187 return false;
188 }
189
190 /***
191 * <p>
192 * Checks to see if a optional ending tag has a valid parent. This is use to
193 * detect a implicit ending tag
194 * </p>
195 *
196 * @param nodeName of the optional ending tag
197 * @param parentNodeName name of the parent
198 * @return <code>true</code> if the parentNodeName is a valid parent for
199 * the nodeName; otherwise, a <code>false</code> value is returned
200 */
201 protected boolean isValidOptionalEndingTagParent(String nodeName,
202 String parentNodeName) {
203 if (nodeName != null && parentNodeName != null) {
204 for (int i = 0; i < OPTIONAL_ENDING_TAG.length; i++) {
205 if (OPTIONAL_ENDING_TAG[i].equalsIgnoreCase(nodeName)) {
206 for (int j = 0; j < TAG_PARENTS[i].length; j++) {
207 if (TAG_PARENTS[i][j].equalsIgnoreCase(parentNodeName)) {
208 return true;
209 }
210 }
211 break;
212 }
213 }
214 }
215 return false;
216 }
217
218 /***
219 * @param current top of the stack
220 * @param node ending node
221 * @return begining node
222 */
223 protected Node findBeginingNode(Node current, Node node) {
224
225 pop: while (true) {
226 if (current == null) {
227 break pop;
228 }
229
230 if (isNodeNameEqual(current, node)) {
231
232
233
234 current.setWellFormed(true);
235
236
237
238 current = current.getParent();
239
240 break pop;
241 }
242
243 if (isOptionalEndingTag(current.getName())) {
244 current.setWellFormed(true);
245 }
246
247 if (current.getParent() == null) {
248 throw new RuntimeException(
249 messages.getMessage("parser.unmatched.endtoken",
250 new Object[] {node.getToken(), node.getToken().getRawText()}));
251 }
252
253 current = current.getParent();
254
255 }
256
257 return current;
258
259 }
260
261 /***
262 * <p>Starting remove block delimiter. It must be a self contained commment.<p>
263 */
264 private static final String BEGIN_REMOVE_TOKEN = "<!-- ### clay:remove ### -->";
265
266 /***
267 * <p>Ending remove block delimiter.</p>
268 */
269 private static final String END_REMOVE_TOKEN = "<!-- ### /clay:remove ### -->";
270
271 /***
272 * <p>The start of the comment token used to override the template
273 * encoding type.</p>
274 */
275 public static final String START_CHARSET_TOKEN = "<!-- ### clay:page ";
276
277 /***
278 * <p>The end of the comment token used to override the template
279 * encoding type.</p>
280 */
281 public static final String END_CHARSET_TOKEN = "/### -->";
282
283 /***
284 * <p>
285 * Parse a document fragment into graphs of {@link Node}. The resulting
286 * type is a list because the fragment might not be well-formed.
287 * </p>
288 *
289 * @param document input source
290 * @return collection of {@link Node}
291 */
292 public List parse(StringBuffer document) {
293
294 boolean isWithinRemoveBlock = false;
295 Node root = new Node(null);
296 Node current = root;
297 current.setName("namingContainer");
298 root.setWellFormed(true);
299
300 NodeTokenizer t = new NodeTokenizer(document);
301 Iterator i = t.iterator();
302 next: while (i.hasNext()) {
303 Token token = (Token) i.next();
304 Node node = buildNode(token);
305
306
307
308 if (node.isComment() && node.isStart() && node.isEnd()) {
309
310
311 if (!isWithinRemoveBlock && node.getToken().getRawText().startsWith(START_CHARSET_TOKEN)) {
312 continue next;
313 }
314
315 if (isWithinRemoveBlock && node.getToken().getRawText().equals(END_REMOVE_TOKEN)) {
316
317 isWithinRemoveBlock = false;
318 continue next;
319
320 } else if (node.getToken().getRawText().equals(BEGIN_REMOVE_TOKEN)) {
321
322 isWithinRemoveBlock = true;
323 continue next;
324
325 } else if (isWithinRemoveBlock) {
326 continue next;
327 }
328 } else if (isWithinRemoveBlock) {
329 continue next;
330 }
331
332
333
334
335 if ((node.isComment() || node.isCdata()) && node.isStart()) {
336
337
338 boolean isCommentBlock = node.isComment();
339 boolean isCdataBlock = node.isCdata();
340
341
342 if (!node.isEnd()) {
343
344 trash: while (i.hasNext()) {
345 token = (Token) i.next();
346 Node bodyNode = buildNode(token);
347
348 if (((bodyNode.isComment() && isCommentBlock)
349 || (bodyNode.isCdata() && isCdataBlock)) && bodyNode.isEnd()) {
350 node.addChild(bodyNode);
351 node.setEnd(true);
352 node.setWellFormed(true);
353 break trash;
354 } else {
355
356 node.setComment(isCommentBlock);
357 node.setCdata(isCdataBlock);
358 node.setWellFormed(true);
359 node.addChild(bodyNode);
360 }
361 }
362
363 }
364
365 current.addChild(node);
366 continue next;
367
368 }
369
370
371 if (!node.isStart() && node.isEnd()) {
372
373 current = findBeginingNode(current, node);
374
375 } else if (node.isStart() && !node.isEnd()) {
376
377
378
379
380
381
382
383
384
385
386
387 if (isOptionalEndingTag(current.getName())
388 && current.isStart() && !current.isEnd()
389 && current.getParent() != null
390 && isValidOptionalEndingTagParent(node.getName(), current.getParent().getName())) {
391
392 current.setWellFormed(true);
393 current.getParent().addChild(node);
394 current = node;
395
396 } else {
397
398
399
400 if (isOptionalEndingTag(node.getName())
401 && isValidOptionalEndingTagParent(current.getName(), node.getName())) {
402
403 current = this.findBeginingNode(current, node);
404 current.addChild(node);
405 current = node;
406
407 } else {
408
409
410 current.addChild(node);
411 current = node;
412 }
413 }
414 } else {
415 if (current != null) {
416 current.addChild(node);
417 } else {
418 current = node;
419 }
420 }
421
422 }
423
424 t = null;
425 i = null;
426
427 simpleWellFormedCheck(root);
428
429 return root.getChildren();
430
431 }
432
433 /***
434 * <p>A simple check to make sure that all nodes have been terminated including
435 * tags with optional ending tags.</p>
436 *
437 * @param node root markup
438 */
439 private void simpleWellFormedCheck(Node node) {
440 if (node.getName() != null && !node.isWellFormed()) {
441 throw new RuntimeException(
442 messages.getMessage("parser.unmatched.begintoken",
443 new Object[] {node.getToken(), node.getToken().getRawText()}));
444 }
445
446 if (!node.isComment() && !node.isCdata()) {
447 Iterator ci = node.getChildren().iterator();
448 while (ci.hasNext()) {
449 simpleWellFormedCheck((Node) (ci.next()));
450 }
451 }
452 }
453
454 /***
455 * <p>Compares two {@link Node} instances by <code>name</code>.
456 * This method is used to match a beginning tag with an ending tag
457 * while building the document stack. Returns <code>true</code> if
458 * the node <code>name</code> properties are the same.
459 * </p>
460 *
461 * @param node1 first node
462 * @param node2 secnod node
463 * @return <code>true</code> if they are the same
464 *
465 */
466 protected boolean isNodeNameEqual(Node node1, Node node2) {
467 boolean f = false;
468
469 if (node1 != null && node2 != null) {
470 if (node1.getName() != null && node2.getName() != null) {
471 if (node1.getName().equalsIgnoreCase(node2.getName())) {
472 if (node1.getQname() == null && node2.getQname() == null) {
473 f = true;
474 } else if (node1.getQname() != null
475 && node2.getQname() != null
476 && node1.getQname().equalsIgnoreCase(node2.getQname())) {
477 f = true;
478 }
479 }
480 }
481 }
482
483 if (log.isDebugEnabled()) {
484 StringBuffer msg = new StringBuffer();
485 msg.append("matching nodes (").append(node1.getName()).append(
486 (f ? "==" : "!=")).append(node2.getName()).append(")");
487 log.debug(msg.toString());
488 }
489 return f;
490 }
491
492 /***
493 * <p>Table of self terminating Html tags.</p>
494 */
495 private static final String[] SELF_TERMINATING = {"META", "LINK", "HR",
496 "BASEFONT", "IMG", "PARAM", "BR", "AREA", "INPUT", "ISINDEX",
497 "BASE"};
498
499 /***
500 * <p>
501 * Checks to see if the nodeName is within the <code>SELF_TERMINATING</code>
502 * table of values.
503 * </p>
504 *
505 * @param nodeName to check for self termination
506 * @return <code>true</code> if is self terminating otherwise
507 * <code>false</code>
508 */
509 protected boolean isSelfTerminating(String nodeName) {
510
511 if (nodeName != null) {
512 for (int i = 0; i < SELF_TERMINATING.length; i++) {
513 if (SELF_TERMINATING[i].equalsIgnoreCase(nodeName)) {
514 return true;
515 }
516 }
517 }
518
519 return false;
520 }
521
522 /***
523 * <p>This is a factory method that builds a {@link Node} from a
524 * {@link Token}.
525 * </p>
526 *
527 * @param token node offset in the document
528 * @return node that describes the structure of the token
529 */
530 protected Node buildNode(Token token) {
531
532 Node node = new Node(token);
533
534 discoverNodeShape(node);
535 discoverNodeName(node);
536 discoverNodeAttributes(node);
537 discoverNodeOverrides(node);
538
539 return node;
540 }
541
542
543 /***
544 * <p>Declare an array of {@link Parser.Rule}s that validate an ending {@link Token}.</p>
545 */
546 private static final Rule[] END_TAG_RULES = {new Rule('<', true, 0, true),
547 new Rule('/', true, 1, true),
548 new Rule('>', false, -1, true)};
549
550 /***
551 * <p>Declare an array of {@link Parser.Rule}s that validate self terminating {@link Token}.</p>
552 */
553 private static final Rule[] SELF_TERM_TAG_RULES = {new Rule('<', true, 0, true),
554 new Rule('/', false, -2, true),
555 new Rule('>', false, -1, true)};
556 /***
557 * <p>Declare an array of {@link Parser.Rule}s that validate self contained comment {@link Token}.</p>
558 */
559 private static final Rule[] SELF_CONTAINED_COMMENT_RULES = {new Rule('<', true, 0, true),
560 new Rule('!', true, 1, true),
561 new Rule('-', true, 2, true),
562 new Rule('-', true, 3, true),
563 new Rule('>', false, -1, true),
564 new Rule('-', false, -2, true),
565 new Rule('-', false, -3, true)};
566
567
568 /***
569 * <p>Declare an array of {@link Parser.Rule}s that validate self contained CDATA {@link Token}.</p>
570 */
571 private static final Rule[] SELF_CONTAINED_CDATA_RULES = {new Rule('<', true, 0, true),
572 new Rule('!', true, 1, true),
573 new Rule('[', true, 2, true),
574 new Rule('C', true, 3, true),
575 new Rule('D', true, 4, true),
576 new Rule('A', true, 5, true),
577 new Rule('T', true, 6, true),
578 new Rule('A', true, 7, true),
579 new Rule('[', true, 8, true),
580 new Rule('>', false, -1, true),
581 new Rule(']', false, -2, true),
582 new Rule(']', false, -3, true)};
583
584 /***
585 * <p>Declare an array of {@link Parser.Rule}s that validate a begin CDATA {@link Token}.</p>
586 */
587 public static final Rule[] BEGIN_CDATA_RULES = {new Rule('<', true, 0, true),
588 new Rule('!', true, 1, true),
589 new Rule('[', true, 2, true),
590 new Rule('C', true, 3, true),
591 new Rule('D', true, 4, true),
592 new Rule('A', true, 5, true),
593 new Rule('T', true, 6, true),
594 new Rule('A', true, 7, true),
595 new Rule('[', true, 8, true)};
596
597 /***
598 * <p>Declare an array of {@link Parser.Rule}s that validate an end CDATA {@link Token}.</p>
599 */
600 public static final Rule[] END_CDATA_RULES = {new Rule('>', false, -1, true),
601 new Rule(']', false, -2, true),
602 new Rule(']', false, -3, true)};
603
604
605 /***
606 * <p>Declare an array of {@link Parser.Rule}s that validate a begin comment {@link Token}.</p>
607 */
608 public static final Rule[] BEGIN_COMMENT_TAG_RULES = {new Rule('<', true, 0, true),
609 new Rule('!', true, 1, true),
610 new Rule('-', true, 2, true),
611 new Rule('-', true, 3, true)};
612
613 /***
614 * <p>Declare an array of {@link Parser.Rule}s that validate an end comment {@link Token}.</p>
615 */
616 public static final Rule[] END_COMMENT_TAG_RULES = {new Rule('>', false, -1, true),
617 new Rule('-', false, -2, true),
618 new Rule('-', false, -3, true)};
619
620 /***
621 * <p>Declare an array of {@link Parser.Rule}s that validate document type {@link Token}.</p>
622 */
623 public static final Rule[] DOCTYPE_TAG_RULES = {new Rule('<', true, 0, true),
624 new Rule('!', true, 1, true),
625 new Rule('>', false, -1, true)};
626
627 /***
628 * <p>Declare an array of {@link Parser.Rule}s that validate a begining {@link Token}.</p>
629 */
630 public static final Rule[] BEGIN_TAG_RULES = {new Rule('<', true, 0, true),
631 new Rule('-', true, 1, false),
632 new Rule('/', true, 1, false),
633 new Rule('?', true, 1, false),
634 new Rule('%', true, 1, false),
635 new Rule('>', false, -1, true)};
636
637
638 /***
639 * <p>Declare an array of {@link Parser.Rule}s that validate JSP block {@link Token}.</p>
640 */
641 private static final Rule[] JSP_RULES = {new Rule('<', true, 0, true),
642 new Rule('%', true, 1, true),
643 new Rule('>', false, -1, true),
644 new Rule('%', false, -2, true)};
645
646
647 /***
648 * <p>Declare an array of {@link Parser.Shape}s further defined by {@link Parser.Rule}s
649 * that are used to determine the type of {@link Node} the {@link Token} defines.</p>
650 */
651 private static final Shape[] NODE_SHAPES = {
652 new Shape(true, true, false, true, SELF_CONTAINED_CDATA_RULES),
653 new Shape(true, false, false, true, BEGIN_CDATA_RULES),
654 new Shape(false, true, false, true, END_CDATA_RULES),
655 new Shape(false, true, false, false, END_TAG_RULES),
656 new Shape(true, true, false, false, SELF_TERM_TAG_RULES),
657 new Shape(true, true, true, false, SELF_CONTAINED_COMMENT_RULES),
658 new Shape(true, false, true, false, BEGIN_COMMENT_TAG_RULES),
659 new Shape(false, true, true, false, END_COMMENT_TAG_RULES),
660 new Shape(true, true, true, false, DOCTYPE_TAG_RULES),
661 new Shape(true, false, false, false, BEGIN_TAG_RULES),
662 new Shape(true, true, true, false, JSP_RULES)};
663
664
665 /***
666 * <p>Determine if the {@link Node} is a starting, ending, or body text
667 * tag. The array of {@link Parser.Shape}s are used to determine the type of
668 * {@link Node} the {@link Token} representes.</p>
669 *
670 * @param node target node
671 */
672 protected void discoverNodeShape(Node node) {
673 Token token = node.getToken();
674
675 nextShape: for (int i = 0; i < NODE_SHAPES.length; i++) {
676
677 int maxBeginOffset = 0;
678 int minEndOffset = Integer.MAX_VALUE;
679
680 Shape shape = NODE_SHAPES[i];
681
682 Rule[] rules = shape.getRules();
683 for (int j = 0; j < rules.length; j++) {
684
685
686 int n = (rules[j].isBegin ? token.getBeginOffset() : token.getEndOffset()) + rules[j].getOffset();
687
688 if (rules[j].isBegin) {
689 maxBeginOffset = Math.max(n, maxBeginOffset);
690 } else {
691 minEndOffset = Math.min(n, minEndOffset);
692 }
693
694
695
696 if (n > token.getDocument().length() || n < 0) {
697 continue nextShape;
698 }
699
700
701 boolean match = false;
702 if (rules[j].isEqual) {
703 match = (token.getDocument().charAt(n) == rules[j].getMnemonic());
704 } else {
705 match = (token.getDocument().charAt(n) != rules[j].getMnemonic());
706 }
707
708 if (!match) {
709 continue nextShape;
710 }
711 }
712
713
714 if (minEndOffset <= maxBeginOffset) {
715 continue nextShape;
716 }
717
718 node.setStart(shape.isStart());
719 node.setEnd(shape.isEnd());
720 node.setComment(shape.isComment());
721 node.setCdata(shape.isCdata);
722
723 break nextShape;
724 }
725
726 }
727
728 /***
729 * <p>Extracts the node name from the {@link Token} if the {@link Node}
730 * is a starting or ending tag.</p>
731 *
732 * @param node target
733 */
734 protected void discoverNodeName(Node node) {
735 Token token = node.getToken();
736
737 if (node.isStart() || node.isEnd()) {
738
739
740 if (node.isComment()) {
741
742 node.setName("--");
743
744 } else if (node.isCdata()) {
745
746 node.setName("[CDATA[");
747
748 } else {
749
750
751
752
753 int etb = (node.isStart() && node.isEnd()) ? (token.getEndOffset() - 2)
754 : (token.getEndOffset() - 1);
755
756
757 int s = (!node.isStart() && node.isEnd()) ? token.getBeginOffset() + 2
758 : token.getBeginOffset() + 1;
759
760
761 int e = -1;
762 indexOf: for (int i = s; i < etb; i++) {
763 if (Character.isWhitespace(token.getDocument().charAt(i))) {
764 e = i;
765 break indexOf;
766 }
767 }
768
769
770 if (e == -1) {
771 e = etb;
772 }
773
774
775 String nodeName = token.getDocument().substring(s, e);
776
777 e = nodeName.indexOf(':');
778 if (e > -1) {
779 node.setQname(nodeName.substring(0, e));
780 }
781 node.setName(nodeName.substring(e + 1));
782 }
783
784 }
785
786 }
787
788 /***
789 * <p>If the {@link Node} is a starting tag and not a comment,
790 * use the {@link AttributeTokenizer} to realize the node attributes.</p>
791 *
792 * @param node target
793 */
794 protected void discoverNodeAttributes(Node node) {
795 Token token = node.getToken();
796 Attributes attributes = this.new Attributes();
797 node.setAttributes(attributes);
798
799
800 if (node.isStart() && (!node.isComment() && !node.isCdata())) {
801
802 int e = (node.isStart() && node.isEnd()) ? (token.getEndOffset() - 2)
803 : (token.getEndOffset() - 1);
804
805 int s = -1;
806 indexOf: for (int i = token.getBeginOffset() + 2; i < e; i++) {
807 if (Character.isWhitespace(token.getDocument().charAt(i))) {
808 s = i;
809 break indexOf;
810 }
811 }
812
813 if (s > -1 && s < e) {
814
815
816 AttributeTokenizer tokenizer = new AttributeTokenizer(token
817 .getDocument(), s, e, token.getLineNumber(), token.getLineBeginOffset());
818 Iterator at = tokenizer.iterator();
819 while (at.hasNext()) {
820 Map.Entry attribute = (Map.Entry) at.next();
821 attributes.add(attribute);
822 }
823 }
824
825 }
826
827 }
828
829 /***
830 * <p>Explicitly sets the <code>isEnd</code> {@link Node} property to <code>true</code> for
831 * self terminating tags. Sets the {@link Node}'s <code>isWellFormed</code> property
832 * to <code>true</code> if the <code>isStart</code> and <code>isEnd</code>
833 * {@link Node} properties are <code>true</code>.</p>
834 *
835 * @param node target
836 */
837 protected void discoverNodeOverrides(Node node) {
838
839 if (node.isStart() && isSelfTerminating(node.getName())) {
840 node.setEnd(true);
841 }
842
843
844 if (node.isStart() && node.isEnd()) {
845 node.setWellFormed(true);
846 }
847
848 }
849
850 /***
851 * <p>Defines a parsing {@link Parser.Rule} used to determine
852 * the {@link Parser.Shape} of a {@link Node}.</p>
853 */
854 static class Rule {
855 /***
856 * <p>The target char to check for in the {@link Token} document.</p>
857 */
858 private char mnemonic = ' ';
859
860 /***
861 * <p>A boolen flag that indicates if the <code>offset</code> is from
862 * the begining of the {@link Token} offset or the ending offset.</p>
863 */
864 private boolean isBegin = false;
865
866 /***
867 * <p>The offset from the start or end of the {@link Token} that the
868 * <code>mnemonic</code> should be found.</p>
869 */
870 private int offset = 0;
871 /***
872 * <p>A boolean value that determines the relational operator used
873 * to compare the <code>mnemonic</code> to the {@link Token} begin
874 * or ending offset plus the {@link Parser.Rule} offset. If the value
875 * is <code>true</code> the equals operator is used; otherwise,
876 * the not equals operator is used in the comparison.</p>
877 */
878 private boolean isEqual = false;
879
880 /***
881 * <p>Overloaded constructor for the immutable object.</p>
882 * @param mnemonic character looked for in the token
883 * @param isBegin boolean that determines if the begining or ending of the Token is used
884 * @param offset the offset from the begin or ending Token
885 * @param isEqual boolean that determines if the = or != operator is used to check the mnemonic
886 */
887 public Rule(char mnemonic, boolean isBegin, int offset, boolean isEqual) {
888 this.mnemonic = mnemonic;
889 this.isBegin = isBegin;
890 this.offset = offset;
891 this.isEqual = isEqual;
892 }
893 /***
894 * <p>Returns the character looked for in the {@link Token}.</p>
895 *
896 * @return searched for token
897 */
898 public char getMnemonic() {
899 return mnemonic;
900 }
901 /***
902 * <p>Returns <code>true</code> if the <code>mnemonic</code> is at the
903 * begin or end of the token plus the <code>offset</code>.</p>
904 *
905 * @return <code>true</code> search from the start
906 */
907 public boolean isBegin() {
908 return isBegin;
909 }
910 /***
911 * <p>Returns a positive or negative offset from the begin or ending
912 * {@link Token} offset withing the document.</p>
913 *
914 * @return offset for the begining or ending of the token
915 */
916 public int getOffset() {
917 return offset;
918 }
919
920 /***
921 * <p>Returns <code>true</code> if the equal relational operator is
922 * used for the <code>mnemonic</code> comparison; otherwise the not
923 * equal operator is used.</p>
924 *
925 * @return use relational operator
926 */
927 public boolean isEqual() {
928 return isEqual;
929 }
930 }
931
932 /***
933 * <p>This class defines the shape of the {@link Node} by characterizing
934 * if the {@link Token} is a begin, end or comment tag.</p>
935 */
936 static class Shape {
937
938 /***
939 * <p>If <code>true</code> it indicates a starting node.</p>
940 */
941 private boolean isStart = false;
942
943 /***
944 * <p>If <code>true</code> it indicates an ending node.</p>
945 */
946 private boolean isEnd = false;
947
948 /***
949 * <p>If <code>true</code> it indicates a comment node.</p>
950 */
951 private boolean isComment = false;
952
953
954 /***
955 * <p>If <code>true</code> it indicates a CDATA node.</p>
956 */
957 private boolean isCdata = false;
958
959
960 /***
961 * <p>An array of {@link Parser.Rule}s used to determine if the
962 * {@link Node} matches the {@link Parser.Shape}.</p>
963 */
964 private Rule[] rules = null;
965
966 /***
967 * <p>Overloaded constructor used to instantiate the immutable object.</p>
968 *
969 * @param isStart starting node
970 * @param isEnd ending node
971 * @param isComment comment node
972 * @param isCdata cdata node
973 * @param rules define the node
974 */
975 public Shape(boolean isStart, boolean isEnd, boolean isComment, boolean isCdata, Rule[] rules) {
976 this.isStart = isStart;
977 this.isEnd = isEnd;
978 this.isComment = isComment;
979 this.isCdata = isCdata;
980 this.rules = rules;
981 }
982
983 /***
984 * <p>Returns <code>true</code> if the {@link Token} is a starting tag.</p>
985 *
986 * @return is a starting tag
987 */
988 public boolean isStart() {
989 return isStart;
990 }
991 /***
992 * <p>Returns <code>true</code> if the {@link Token} is an ending tag.</p>
993 *
994 * @return is a ending tag
995 */
996 public boolean isEnd() {
997 return isEnd;
998 }
999 /***
1000 * <p>Returns <code>true</code> if the {@link Token} is a comment tag.</p>
1001 *
1002 * @return is a comment
1003 */
1004 public boolean isComment() {
1005 return isComment;
1006 }
1007 /***
1008 * <p>Returns <code>true</code> if the {@link Token} is a CDATA tag.</p>
1009 *
1010 * @return is a cdata
1011 */
1012 public boolean isCdata() {
1013 return isCdata;
1014 }
1015
1016 /***
1017 * <p>Returns the {@link Parser.Rule}s that define the <code>isStart</code>,
1018 * <code>isEnd</code> and <code>isComment</code> characteristics.</p>
1019 *
1020 * @return rules defining the type of node
1021 */
1022 public Rule[] getRules() {
1023 return rules;
1024 }
1025 }
1026
1027 }