blob: e5d953c15a8420337ad9fa493fba78bf251a0715 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
|
/*
* To change this template, choose Tools | Templates
* and open the template in the editor.
*/
package Parser;
import org.htmlparser.Tag;
import org.htmlparser.visitors.NodeVisitor;
import testapp.data.FrameItem;
import testapp.data.IframeItem;
import testapp.data.ImageItem;
import testapp.data.Item;
import testapp.data.LinkItem;
import testapp.data.ScriptItem;
import testapp.data.StyleItem;
import java.util.logging.*;
/**
*
* @author alex
*/
class Visitor extends NodeVisitor {
private Item currentItem;
public Visitor(Item initialItem) {
currentItem = initialItem;
}
/// This methode will be called for every Tag found in the document
@Override
public void visitTag(Tag currentTag) {
String tagName = currentTag.getTagName();
/// Search for a link and create a link item
String hrefAttribute = currentTag.getAttribute("HREF");
if (hrefAttribute != null && hrefAttribute.length() > 0) {
if (tagName.equals("A")) {
LinkItem li = new LinkItem(hrefAttribute,currentItem);
currentItem.add(li);
}
}
/// Search for a source and create a specific item depending on the source type
String srcAttribute = currentTag.getAttribute("SRC");
if (srcAttribute != null && srcAttribute.length() > 0) {
if (tagName.equals("IFRAME")) {
IframeItem iframeItem;
iframeItem = new IframeItem(srcAttribute,currentItem);
//currentItem.add(iframeItem);
startNewHtmlParser(iframeItem);
} else if (tagName.equals("IMG")) {
ImageItem ii = new ImageItem(srcAttribute,currentItem);
currentItem.add(ii);
} else if (tagName.equals("FRAME")) {
FrameItem frameItem;
frameItem = new FrameItem(srcAttribute,currentItem);
currentItem.add(frameItem);
startNewHtmlParser(frameItem);
} else if (tagName.equals("SCRIPT")) {
JavascriptParser javascriptParser = new JavascriptParser(currentItem);
currentItem.add(javascriptParser.parse(srcAttribute));
} else if (tagName.equals("STYLE")) {
CssParser cssParser = new CssParser(currentItem);
currentItem.add(cssParser.parse(srcAttribute));
}
} else {
if (tagName.equals("STYLE")) {
StyleItem styleItem = new StyleItem("",currentItem);
currentItem.add(styleItem);
CssParser cssParser = new CssParser(styleItem);
cssParser.parseCode(currentTag.getText());
}
if (tagName.equals("SCRIPT")) {
ScriptItem scriptItem = new ScriptItem("",currentItem);
currentItem.add(scriptItem);
JavascriptParser javascriptParser = new JavascriptParser(scriptItem);
javascriptParser.parseCode(currentTag.getText(), this.currentItem.getUrl());
}
}
}
/// Start a new html parser for nested html pages
private void startNewHtmlParser(Item currentItem) {
///restict the search dept of the html page to 10s external pages
HtmlParser htmlParser = new HtmlParser();
currentItem.urlToValidUrl(false);
htmlParser.parse(currentItem);
}
}
|