diff options
Diffstat (limited to 'Master/Agile Software Development/TestApp/src/Parser')
5 files changed, 398 insertions, 0 deletions
diff --git a/Master/Agile Software Development/TestApp/src/Parser/CssParser.java b/Master/Agile Software Development/TestApp/src/Parser/CssParser.java new file mode 100644 index 0000000..9b11a1e --- /dev/null +++ b/Master/Agile Software Development/TestApp/src/Parser/CssParser.java @@ -0,0 +1,41 @@ +/* + * To change this template, choose Tools | Templates + * and open the template in the editor. + */ + +package Parser; + +import testapp.data.ExternalCssItem; +import testapp.data.Item; +import java.util.logging.*; + +/** + * + * @author robb + */ +public class CssParser { + private Item rootNode; + private String srcUrl; + + public CssParser(Item rootNode) { + this.rootNode=rootNode; + } + + public ExternalCssItem parse(String url) { + // Todo: http client to recieve url and put contents into code + String code=new String(); + return parseCode(code); + } + + public ExternalCssItem parseCode(String code) { + ExternalCssItem externalCssItem=new ExternalCssItem(srcUrl,rootNode); + rootNode.add(externalCssItem); + + // check for @import and mark Suspicious + if(code.contains("@import")) { + externalCssItem.setSuspiciousCode(); + } + + return externalCssItem; + } +} diff --git a/Master/Agile Software Development/TestApp/src/Parser/HtmlParser.java b/Master/Agile Software Development/TestApp/src/Parser/HtmlParser.java new file mode 100644 index 0000000..96ad2b2 --- /dev/null +++ b/Master/Agile Software Development/TestApp/src/Parser/HtmlParser.java @@ -0,0 +1,56 @@ +/* + * To change this template, choose Tools | Templates + * and open the template in the editor. + */ +package Parser; + +import java.util.logging.Level; +import java.util.logging.Logger; +import org.htmlparser.util.NodeList; +import org.htmlparser.util.ParserException; +import testapp.data.Item; +import testapp.data.PageItem; +import java.util.logging.*; + +/** + * + * @author + */ +public class HtmlParser //extends NodeVisitor { +{ + + public HtmlParser() { + } + + public Item parse(String url) { + try { + PageItem rootNode = new PageItem(url,null); + + org.htmlparser.Parser parser = new org.htmlparser.Parser(rootNode.getUrl()); + NodeList nodeList = parser.parse(null); + + nodeList.visitAllNodesWith(new Visitor(rootNode)); + + return rootNode; + + } catch (ParserException ex) { + Logger.getLogger(HtmlParser.class.getName()).log(Level.SEVERE, null, ex); + return null; + } + } + + public Item parse(Item rootItem) { + try { + org.htmlparser.Parser parser = new org.htmlparser.Parser(rootItem.getUrl()); + NodeList nodeList = parser.parse(null); + + nodeList.visitAllNodesWith(new Visitor(rootItem)); + + return rootItem; + + } catch (ParserException ex) { + Logger.getLogger(HtmlParser.class.getName()).log(Level.SEVERE, null, ex); + return null; + } + } +} diff --git a/Master/Agile Software Development/TestApp/src/Parser/JavascriptParser.java b/Master/Agile Software Development/TestApp/src/Parser/JavascriptParser.java new file mode 100644 index 0000000..4cfe0ec --- /dev/null +++ b/Master/Agile Software Development/TestApp/src/Parser/JavascriptParser.java @@ -0,0 +1,44 @@ +/* + * To change this template, choose Tools | Templates + * and open the template in the editor. + */ + +package Parser; + +import testapp.data.ExternalJavascriptItem; +import testapp.data.Item; +import java.util.logging.*; + +public class JavascriptParser { + private Item rootNode; + + private static final Logger logger = Logger.getLogger(JavascriptParser.class.getName()); + + + public JavascriptParser(Item rootNode) { + this.rootNode=rootNode; + } + + public ExternalJavascriptItem parse(String url) { + // Todo: http client to recieve url and put contents into code + String code=new String(); + return parseCode(code, url); + } + + public ExternalJavascriptItem parseCode(String code, String url) { + // the only evil js code should be + //document.write('<script type="text/javascript" src="'+ jsFile + '"></scr' + 'ipt>'); + // => nearly impossible to parse, so we search only for "src" string + // if found, code is marked suspicious + // could be tricked by spliting the string! + // + + ExternalJavascriptItem externalJavascriptItem=new ExternalJavascriptItem(url,rootNode); + rootNode.add(externalJavascriptItem); + if(code.contains("src")) { + externalJavascriptItem.setSuspiciousCode(); + } + + return externalJavascriptItem; + } +} diff --git a/Master/Agile Software Development/TestApp/src/Parser/UrlAnalyzer.java b/Master/Agile Software Development/TestApp/src/Parser/UrlAnalyzer.java new file mode 100644 index 0000000..c543849 --- /dev/null +++ b/Master/Agile Software Development/TestApp/src/Parser/UrlAnalyzer.java @@ -0,0 +1,161 @@ +/* + * To change this template, choose Tools | Templates + * and open the template in the editor. + */ + +package Parser; + +import java.util.HashMap; +import javax.swing.JOptionPane; + +/** + * + * @author sven + */ +public class UrlAnalyzer { + public static final String PROTOCOL_HTTP = "http"; + public static final String PROTOCOL_HTTPS = "https"; + public static final String PROTOCOL_FTP = "ftp"; + public static final String PROTOCOL_FILE = "file"; + public static final String NO_PROTOCOL = "NO_PROTOCOL"; + public static final String NO_HOST = "NO_HOST"; + public static final String NO_VALID_PATH = "NO_VALID_PATH"; + public static final String NO_VALID_FILE = "NO_VALID_FILE"; + public static final String PROTOCOL_SEPARATOR = "://"; + public static final String PATH_SEPARATOR = "/"; + public static final String PARAMS_INDICATOR = "?"; + public static final String ANCHOR = "#"; + public static final String PARAMS_SEPARATOR = "&"; + public static final String PARAMS_ASSIGN = "="; + public static final String[] VALID_PROTOCOLS = + {PROTOCOL_HTTP,PROTOCOL_HTTPS,PROTOCOL_FTP,PROTOCOL_FILE}; + public static final String NO_ANCHOR = "NO_ANCHOR"; + + public static HashMap<String, String> getParams(String url) { + HashMap<String,String> result = new HashMap<String,String>(); + + if(url!=null) { + String paramStr = url.substring(url.indexOf(PARAMS_INDICATOR)+1); + String[] params = paramStr.split(PARAMS_SEPARATOR); + for(String param : params) { + String[] paramKeyValue = param.split(PARAMS_ASSIGN); + if (paramKeyValue.length > 1) { + result.put(paramKeyValue[0], paramKeyValue[1]); + } else { + result.put(paramKeyValue[0],null); + } + } + } + return result; + } + + public static final String getProtocol(final String url) { + if(url==null) + return NO_PROTOCOL; + + for (String proto : VALID_PROTOCOLS) { + if (url.startsWith(proto)) { + return proto; + } + } + return new String(NO_PROTOCOL); + } + public static final String getHostname(final String url) { + if(url==null) + return NO_HOST; + + if (!url.contains(PROTOCOL_SEPARATOR)) { + return NO_HOST; + } + int hostStart = url.indexOf(PROTOCOL_SEPARATOR)+PROTOCOL_SEPARATOR.length(); + int hostEnd = url.indexOf(PATH_SEPARATOR, hostStart); + if ( hostEnd <= 1 ) { + return NO_HOST; + } + return url.substring(hostStart, hostEnd); + } + public static final String getFilename(final String url) { + if(url==null) + return ""; + + String rest = url; + if (url.contains(PROTOCOL_SEPARATOR)) { + int protoEnd = url.indexOf(PROTOCOL_SEPARATOR)+PROTOCOL_SEPARATOR.length(); + rest = url.substring(protoEnd); + if ( (rest.indexOf(PATH_SEPARATOR)) == -1) { + return NO_VALID_FILE; + } + rest = url.substring(protoEnd); + } + int fileStart = rest.lastIndexOf(PATH_SEPARATOR); + String res = (fileStart != -1) ? rest.substring(fileStart+1) : rest; + if (hasParams(res)) { + res = res.substring(0, res.indexOf(PARAMS_INDICATOR)); + } + if (hasAnchor(res)) { + res = res.substring(0, res.indexOf(ANCHOR)); + } + return res; + } + public static final String getPath(String url) { + if(url==null) + return NO_VALID_PATH; + + int pathStart = 0; + int pathEnd = url.lastIndexOf(PATH_SEPARATOR); + + if (pathEnd < 1) { + // item at root level + // e. g. foobar.html oder cool.gifv + return PATH_SEPARATOR; + } + if (url.contains(PROTOCOL_SEPARATOR)) { + // we have a full url + // e.g. http://foo.bar.com/dir/subdir/index.html + pathStart = url.indexOf(PATH_SEPARATOR,url.indexOf(PROTOCOL_SEPARATOR)+PROTOCOL_SEPARATOR.length()); + if (pathStart < 0) { + return NO_VALID_PATH; + } + } else { + // we have a relative path on site + // e. g.: img/coolimg.jpg + // or res/img/anotherimg.gif + pathStart = 0; + pathEnd = url.lastIndexOf(PATH_SEPARATOR); + } + return url.substring(pathStart,pathEnd)+PATH_SEPARATOR; + } + public static final String getAnchor(String url) { + if(url==null) + return NO_ANCHOR; + + return (hasAnchor(url)) ? url.substring(url.indexOf(ANCHOR)+1) : null; + } + public static final boolean hasParams(String url) { + if(url==null) + return false; + + return url.contains(PARAMS_INDICATOR); + } + public static final boolean hasAnchor(String url) { + if(url==null) + return false; + + return url.contains(ANCHOR); + } + public static final String stripProtocol(String url) { + return url.substring(url.indexOf(PROTOCOL_SEPARATOR)+PROTOCOL_SEPARATOR.length()); + } + + public static final boolean validateURL(String url) + { + if( url.contains(PROTOCOL_HTTP) || url.contains(PROTOCOL_HTTPS) || url.contains(PROTOCOL_FTP) || url.contains(PROTOCOL_FILE) ) + if(url.contains(PROTOCOL_SEPARATOR)) + if(url.contains(".")) + return true; + + JOptionPane.showMessageDialog(null, "Falsche Url. Bitte geben Sie eine korrekte Url ein!"); + + return false; + } +} diff --git a/Master/Agile Software Development/TestApp/src/Parser/Visitor.java b/Master/Agile Software Development/TestApp/src/Parser/Visitor.java new file mode 100644 index 0000000..e5d953c --- /dev/null +++ b/Master/Agile Software Development/TestApp/src/Parser/Visitor.java @@ -0,0 +1,96 @@ +/* + * To change this template, choose Tools | Templates + * and open the template in the editor. + */ +package Parser; + +import org.htmlparser.Tag; +import org.htmlparser.visitors.NodeVisitor; +import testapp.data.FrameItem; +import testapp.data.IframeItem; +import testapp.data.ImageItem; +import testapp.data.Item; +import testapp.data.LinkItem; +import testapp.data.ScriptItem; +import testapp.data.StyleItem; +import java.util.logging.*; + +/** + * + * @author alex + */ +class Visitor extends NodeVisitor { + + private Item currentItem; + + public Visitor(Item initialItem) { + currentItem = initialItem; + } + + /// This methode will be called for every Tag found in the document + @Override + public void visitTag(Tag currentTag) { + + String tagName = currentTag.getTagName(); + + + /// Search for a link and create a link item + String hrefAttribute = currentTag.getAttribute("HREF"); + if (hrefAttribute != null && hrefAttribute.length() > 0) { + if (tagName.equals("A")) { + LinkItem li = new LinkItem(hrefAttribute,currentItem); + currentItem.add(li); + } + } + + /// Search for a source and create a specific item depending on the source type + String srcAttribute = currentTag.getAttribute("SRC"); + if (srcAttribute != null && srcAttribute.length() > 0) { + if (tagName.equals("IFRAME")) { + IframeItem iframeItem; + iframeItem = new IframeItem(srcAttribute,currentItem); + + //currentItem.add(iframeItem); + startNewHtmlParser(iframeItem); + } else if (tagName.equals("IMG")) { + ImageItem ii = new ImageItem(srcAttribute,currentItem); + currentItem.add(ii); + } else if (tagName.equals("FRAME")) { + FrameItem frameItem; + frameItem = new FrameItem(srcAttribute,currentItem); + currentItem.add(frameItem); + startNewHtmlParser(frameItem); + } else if (tagName.equals("SCRIPT")) { + JavascriptParser javascriptParser = new JavascriptParser(currentItem); + currentItem.add(javascriptParser.parse(srcAttribute)); + } else if (tagName.equals("STYLE")) { + CssParser cssParser = new CssParser(currentItem); + currentItem.add(cssParser.parse(srcAttribute)); + } + } else { + if (tagName.equals("STYLE")) { + StyleItem styleItem = new StyleItem("",currentItem); + currentItem.add(styleItem); + CssParser cssParser = new CssParser(styleItem); + cssParser.parseCode(currentTag.getText()); + } + + if (tagName.equals("SCRIPT")) { + ScriptItem scriptItem = new ScriptItem("",currentItem); + currentItem.add(scriptItem); + JavascriptParser javascriptParser = new JavascriptParser(scriptItem); + javascriptParser.parseCode(currentTag.getText(), this.currentItem.getUrl()); + } + } + + + } + + /// Start a new html parser for nested html pages + private void startNewHtmlParser(Item currentItem) { + ///restict the search dept of the html page to 10s external pages + HtmlParser htmlParser = new HtmlParser(); + currentItem.urlToValidUrl(false); + htmlParser.parse(currentItem); + } +} |
