summaryrefslogtreecommitdiffstats
path: root/Master/Agile Software Development/TestApp/src/Parser
diff options
context:
space:
mode:
Diffstat (limited to 'Master/Agile Software Development/TestApp/src/Parser')
-rw-r--r--Master/Agile Software Development/TestApp/src/Parser/CssParser.java41
-rw-r--r--Master/Agile Software Development/TestApp/src/Parser/HtmlParser.java56
-rw-r--r--Master/Agile Software Development/TestApp/src/Parser/JavascriptParser.java44
-rw-r--r--Master/Agile Software Development/TestApp/src/Parser/UrlAnalyzer.java161
-rw-r--r--Master/Agile Software Development/TestApp/src/Parser/Visitor.java96
5 files changed, 398 insertions, 0 deletions
diff --git a/Master/Agile Software Development/TestApp/src/Parser/CssParser.java b/Master/Agile Software Development/TestApp/src/Parser/CssParser.java
new file mode 100644
index 0000000..9b11a1e
--- /dev/null
+++ b/Master/Agile Software Development/TestApp/src/Parser/CssParser.java
@@ -0,0 +1,41 @@
+/*
+ * To change this template, choose Tools | Templates
+ * and open the template in the editor.
+ */
+
+package Parser;
+
+import testapp.data.ExternalCssItem;
+import testapp.data.Item;
+import java.util.logging.*;
+
+/**
+ *
+ * @author robb
+ */
+public class CssParser {
+ private Item rootNode;
+ private String srcUrl;
+
+ public CssParser(Item rootNode) {
+ this.rootNode=rootNode;
+ }
+
+ public ExternalCssItem parse(String url) {
+ // Todo: http client to recieve url and put contents into code
+ String code=new String();
+ return parseCode(code);
+ }
+
+ public ExternalCssItem parseCode(String code) {
+ ExternalCssItem externalCssItem=new ExternalCssItem(srcUrl,rootNode);
+ rootNode.add(externalCssItem);
+
+ // check for @import and mark Suspicious
+ if(code.contains("@import")) {
+ externalCssItem.setSuspiciousCode();
+ }
+
+ return externalCssItem;
+ }
+}
diff --git a/Master/Agile Software Development/TestApp/src/Parser/HtmlParser.java b/Master/Agile Software Development/TestApp/src/Parser/HtmlParser.java
new file mode 100644
index 0000000..96ad2b2
--- /dev/null
+++ b/Master/Agile Software Development/TestApp/src/Parser/HtmlParser.java
@@ -0,0 +1,56 @@
+/*
+ * To change this template, choose Tools | Templates
+ * and open the template in the editor.
+ */
+package Parser;
+
+import java.util.logging.Level;
+import java.util.logging.Logger;
+import org.htmlparser.util.NodeList;
+import org.htmlparser.util.ParserException;
+import testapp.data.Item;
+import testapp.data.PageItem;
+import java.util.logging.*;
+
+/**
+ *
+ * @author
+ */
+public class HtmlParser //extends NodeVisitor {
+{
+
+ public HtmlParser() {
+ }
+
+ public Item parse(String url) {
+ try {
+ PageItem rootNode = new PageItem(url,null);
+
+ org.htmlparser.Parser parser = new org.htmlparser.Parser(rootNode.getUrl());
+ NodeList nodeList = parser.parse(null);
+
+ nodeList.visitAllNodesWith(new Visitor(rootNode));
+
+ return rootNode;
+
+ } catch (ParserException ex) {
+ Logger.getLogger(HtmlParser.class.getName()).log(Level.SEVERE, null, ex);
+ return null;
+ }
+ }
+
+ public Item parse(Item rootItem) {
+ try {
+ org.htmlparser.Parser parser = new org.htmlparser.Parser(rootItem.getUrl());
+ NodeList nodeList = parser.parse(null);
+
+ nodeList.visitAllNodesWith(new Visitor(rootItem));
+
+ return rootItem;
+
+ } catch (ParserException ex) {
+ Logger.getLogger(HtmlParser.class.getName()).log(Level.SEVERE, null, ex);
+ return null;
+ }
+ }
+}
diff --git a/Master/Agile Software Development/TestApp/src/Parser/JavascriptParser.java b/Master/Agile Software Development/TestApp/src/Parser/JavascriptParser.java
new file mode 100644
index 0000000..4cfe0ec
--- /dev/null
+++ b/Master/Agile Software Development/TestApp/src/Parser/JavascriptParser.java
@@ -0,0 +1,44 @@
+/*
+ * To change this template, choose Tools | Templates
+ * and open the template in the editor.
+ */
+
+package Parser;
+
+import testapp.data.ExternalJavascriptItem;
+import testapp.data.Item;
+import java.util.logging.*;
+
+public class JavascriptParser {
+ private Item rootNode;
+
+ private static final Logger logger = Logger.getLogger(JavascriptParser.class.getName());
+
+
+ public JavascriptParser(Item rootNode) {
+ this.rootNode=rootNode;
+ }
+
+ public ExternalJavascriptItem parse(String url) {
+ // Todo: http client to recieve url and put contents into code
+ String code=new String();
+ return parseCode(code, url);
+ }
+
+ public ExternalJavascriptItem parseCode(String code, String url) {
+ // the only evil js code should be
+ //document.write('<script type="text/javascript" src="'+ jsFile + '"></scr' + 'ipt>');
+ // => nearly impossible to parse, so we search only for "src" string
+ // if found, code is marked suspicious
+ // could be tricked by spliting the string!
+ //
+
+ ExternalJavascriptItem externalJavascriptItem=new ExternalJavascriptItem(url,rootNode);
+ rootNode.add(externalJavascriptItem);
+ if(code.contains("src")) {
+ externalJavascriptItem.setSuspiciousCode();
+ }
+
+ return externalJavascriptItem;
+ }
+}
diff --git a/Master/Agile Software Development/TestApp/src/Parser/UrlAnalyzer.java b/Master/Agile Software Development/TestApp/src/Parser/UrlAnalyzer.java
new file mode 100644
index 0000000..c543849
--- /dev/null
+++ b/Master/Agile Software Development/TestApp/src/Parser/UrlAnalyzer.java
@@ -0,0 +1,161 @@
+/*
+ * To change this template, choose Tools | Templates
+ * and open the template in the editor.
+ */
+
+package Parser;
+
+import java.util.HashMap;
+import javax.swing.JOptionPane;
+
+/**
+ *
+ * @author sven
+ */
+public class UrlAnalyzer {
+ public static final String PROTOCOL_HTTP = "http";
+ public static final String PROTOCOL_HTTPS = "https";
+ public static final String PROTOCOL_FTP = "ftp";
+ public static final String PROTOCOL_FILE = "file";
+ public static final String NO_PROTOCOL = "NO_PROTOCOL";
+ public static final String NO_HOST = "NO_HOST";
+ public static final String NO_VALID_PATH = "NO_VALID_PATH";
+ public static final String NO_VALID_FILE = "NO_VALID_FILE";
+ public static final String PROTOCOL_SEPARATOR = "://";
+ public static final String PATH_SEPARATOR = "/";
+ public static final String PARAMS_INDICATOR = "?";
+ public static final String ANCHOR = "#";
+ public static final String PARAMS_SEPARATOR = "&";
+ public static final String PARAMS_ASSIGN = "=";
+ public static final String[] VALID_PROTOCOLS =
+ {PROTOCOL_HTTP,PROTOCOL_HTTPS,PROTOCOL_FTP,PROTOCOL_FILE};
+ public static final String NO_ANCHOR = "NO_ANCHOR";
+
+ public static HashMap<String, String> getParams(String url) {
+ HashMap<String,String> result = new HashMap<String,String>();
+
+ if(url!=null) {
+ String paramStr = url.substring(url.indexOf(PARAMS_INDICATOR)+1);
+ String[] params = paramStr.split(PARAMS_SEPARATOR);
+ for(String param : params) {
+ String[] paramKeyValue = param.split(PARAMS_ASSIGN);
+ if (paramKeyValue.length > 1) {
+ result.put(paramKeyValue[0], paramKeyValue[1]);
+ } else {
+ result.put(paramKeyValue[0],null);
+ }
+ }
+ }
+ return result;
+ }
+
+ public static final String getProtocol(final String url) {
+ if(url==null)
+ return NO_PROTOCOL;
+
+ for (String proto : VALID_PROTOCOLS) {
+ if (url.startsWith(proto)) {
+ return proto;
+ }
+ }
+ return new String(NO_PROTOCOL);
+ }
+ public static final String getHostname(final String url) {
+ if(url==null)
+ return NO_HOST;
+
+ if (!url.contains(PROTOCOL_SEPARATOR)) {
+ return NO_HOST;
+ }
+ int hostStart = url.indexOf(PROTOCOL_SEPARATOR)+PROTOCOL_SEPARATOR.length();
+ int hostEnd = url.indexOf(PATH_SEPARATOR, hostStart);
+ if ( hostEnd <= 1 ) {
+ return NO_HOST;
+ }
+ return url.substring(hostStart, hostEnd);
+ }
+ public static final String getFilename(final String url) {
+ if(url==null)
+ return "";
+
+ String rest = url;
+ if (url.contains(PROTOCOL_SEPARATOR)) {
+ int protoEnd = url.indexOf(PROTOCOL_SEPARATOR)+PROTOCOL_SEPARATOR.length();
+ rest = url.substring(protoEnd);
+ if ( (rest.indexOf(PATH_SEPARATOR)) == -1) {
+ return NO_VALID_FILE;
+ }
+ rest = url.substring(protoEnd);
+ }
+ int fileStart = rest.lastIndexOf(PATH_SEPARATOR);
+ String res = (fileStart != -1) ? rest.substring(fileStart+1) : rest;
+ if (hasParams(res)) {
+ res = res.substring(0, res.indexOf(PARAMS_INDICATOR));
+ }
+ if (hasAnchor(res)) {
+ res = res.substring(0, res.indexOf(ANCHOR));
+ }
+ return res;
+ }
+ public static final String getPath(String url) {
+ if(url==null)
+ return NO_VALID_PATH;
+
+ int pathStart = 0;
+ int pathEnd = url.lastIndexOf(PATH_SEPARATOR);
+
+ if (pathEnd < 1) {
+ // item at root level
+ // e. g. foobar.html oder cool.gifv
+ return PATH_SEPARATOR;
+ }
+ if (url.contains(PROTOCOL_SEPARATOR)) {
+ // we have a full url
+ // e.g. http://foo.bar.com/dir/subdir/index.html
+ pathStart = url.indexOf(PATH_SEPARATOR,url.indexOf(PROTOCOL_SEPARATOR)+PROTOCOL_SEPARATOR.length());
+ if (pathStart < 0) {
+ return NO_VALID_PATH;
+ }
+ } else {
+ // we have a relative path on site
+ // e. g.: img/coolimg.jpg
+ // or res/img/anotherimg.gif
+ pathStart = 0;
+ pathEnd = url.lastIndexOf(PATH_SEPARATOR);
+ }
+ return url.substring(pathStart,pathEnd)+PATH_SEPARATOR;
+ }
+ public static final String getAnchor(String url) {
+ if(url==null)
+ return NO_ANCHOR;
+
+ return (hasAnchor(url)) ? url.substring(url.indexOf(ANCHOR)+1) : null;
+ }
+ public static final boolean hasParams(String url) {
+ if(url==null)
+ return false;
+
+ return url.contains(PARAMS_INDICATOR);
+ }
+ public static final boolean hasAnchor(String url) {
+ if(url==null)
+ return false;
+
+ return url.contains(ANCHOR);
+ }
+ public static final String stripProtocol(String url) {
+ return url.substring(url.indexOf(PROTOCOL_SEPARATOR)+PROTOCOL_SEPARATOR.length());
+ }
+
+ public static final boolean validateURL(String url)
+ {
+ if( url.contains(PROTOCOL_HTTP) || url.contains(PROTOCOL_HTTPS) || url.contains(PROTOCOL_FTP) || url.contains(PROTOCOL_FILE) )
+ if(url.contains(PROTOCOL_SEPARATOR))
+ if(url.contains("."))
+ return true;
+
+ JOptionPane.showMessageDialog(null, "Falsche Url. Bitte geben Sie eine korrekte Url ein!");
+
+ return false;
+ }
+}
diff --git a/Master/Agile Software Development/TestApp/src/Parser/Visitor.java b/Master/Agile Software Development/TestApp/src/Parser/Visitor.java
new file mode 100644
index 0000000..e5d953c
--- /dev/null
+++ b/Master/Agile Software Development/TestApp/src/Parser/Visitor.java
@@ -0,0 +1,96 @@
+/*
+ * To change this template, choose Tools | Templates
+ * and open the template in the editor.
+ */
+package Parser;
+
+import org.htmlparser.Tag;
+import org.htmlparser.visitors.NodeVisitor;
+import testapp.data.FrameItem;
+import testapp.data.IframeItem;
+import testapp.data.ImageItem;
+import testapp.data.Item;
+import testapp.data.LinkItem;
+import testapp.data.ScriptItem;
+import testapp.data.StyleItem;
+import java.util.logging.*;
+
+/**
+ *
+ * @author alex
+ */
+class Visitor extends NodeVisitor {
+
+ private Item currentItem;
+
+ public Visitor(Item initialItem) {
+ currentItem = initialItem;
+ }
+
+ /// This methode will be called for every Tag found in the document
+ @Override
+ public void visitTag(Tag currentTag) {
+
+ String tagName = currentTag.getTagName();
+
+
+ /// Search for a link and create a link item
+ String hrefAttribute = currentTag.getAttribute("HREF");
+ if (hrefAttribute != null && hrefAttribute.length() > 0) {
+ if (tagName.equals("A")) {
+ LinkItem li = new LinkItem(hrefAttribute,currentItem);
+ currentItem.add(li);
+ }
+ }
+
+ /// Search for a source and create a specific item depending on the source type
+ String srcAttribute = currentTag.getAttribute("SRC");
+ if (srcAttribute != null && srcAttribute.length() > 0) {
+ if (tagName.equals("IFRAME")) {
+ IframeItem iframeItem;
+ iframeItem = new IframeItem(srcAttribute,currentItem);
+
+ //currentItem.add(iframeItem);
+ startNewHtmlParser(iframeItem);
+ } else if (tagName.equals("IMG")) {
+ ImageItem ii = new ImageItem(srcAttribute,currentItem);
+ currentItem.add(ii);
+ } else if (tagName.equals("FRAME")) {
+ FrameItem frameItem;
+ frameItem = new FrameItem(srcAttribute,currentItem);
+ currentItem.add(frameItem);
+ startNewHtmlParser(frameItem);
+ } else if (tagName.equals("SCRIPT")) {
+ JavascriptParser javascriptParser = new JavascriptParser(currentItem);
+ currentItem.add(javascriptParser.parse(srcAttribute));
+ } else if (tagName.equals("STYLE")) {
+ CssParser cssParser = new CssParser(currentItem);
+ currentItem.add(cssParser.parse(srcAttribute));
+ }
+ } else {
+ if (tagName.equals("STYLE")) {
+ StyleItem styleItem = new StyleItem("",currentItem);
+ currentItem.add(styleItem);
+ CssParser cssParser = new CssParser(styleItem);
+ cssParser.parseCode(currentTag.getText());
+ }
+
+ if (tagName.equals("SCRIPT")) {
+ ScriptItem scriptItem = new ScriptItem("",currentItem);
+ currentItem.add(scriptItem);
+ JavascriptParser javascriptParser = new JavascriptParser(scriptItem);
+ javascriptParser.parseCode(currentTag.getText(), this.currentItem.getUrl());
+ }
+ }
+
+
+ }
+
+ /// Start a new html parser for nested html pages
+ private void startNewHtmlParser(Item currentItem) {
+ ///restict the search dept of the html page to 10s external pages
+ HtmlParser htmlParser = new HtmlParser();
+ currentItem.urlToValidUrl(false);
+ htmlParser.parse(currentItem);
+ }
+}