Commit ec10efac authored by mkoscher's avatar mkoscher
Browse files

Merge branch 'testCases' into 'master'

Add Test Resources

See merge request mkoscher/cc-assignmen-1!3
parents 08e51bd8 525e342a
......@@ -60,18 +60,22 @@ public class Crawler {
}
*/
public void analyze(String url, int depth) {
Document webpage = null;
try {
webpage = Jsoup.connect(url).get();
}catch (IOException e){
}
public Report analyze(Document webpage, int depth) {
report.setLinks(getAllLinks(webpage, depth));
report.setImageCount(countImages(webpage));
report.setVideoCount(countVideos(webpage));
report.setWordCount(countWords(webpage));
report.setBrokenLinks(findBrokenLinks(getAllLinks(webpage, depth)));
return report;
}
public Document loadDocumentFromWebpage(String url){
try {
return Jsoup.connect(url).get();
}catch (IOException e){
}
return null;
}
private Integer countWords(Document webpage){
......
......@@ -14,6 +14,7 @@ public class Main {
Integer depth = 2;
Crawler crawler = new Crawler();
Report report = new Report();
Document webpage;
options.addOption(urlArg);
options.addOption(depthArg);
......@@ -30,7 +31,8 @@ public class Main {
}
if(cmd.hasOption("url")){
crawler.analyzeWebpage(cmd.getOptionValue("url"), depth);
webpage = crawler.loadDocumentFromWebpage(cmd.getOptionValue("url"));
crawler.analyze(webpage, depth);
}
System.out.println("Count Links: " + crawler.countLinks);
......
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.junit.Assert;
import org.junit.Rule;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.InputStream;
import java.io.PrintStream;
import java.io.*;
import java.nio.file.Path;
import java.nio.file.Paths;
import static org.junit.Assert.assertEquals;
public class CrawlerTest {
public static Crawler crawler1;
public static Crawler crawler1 = new Crawler();
public static Crawler crawler2;
public static Report report1;
public static Report report2;
public static Document webpage;
@BeforeAll
public static void init(){
Path resourceDirectory = Paths.get("src","test","resources","testPage.html");
File input = new File(resourceDirectory.toUri());
try {
webpage = Jsoup.parse(input, "UTF-8", "http://example.com/");
}catch (Exception ex){
}
crawler1 = new Crawler();
crawler2 = new Crawler();
report1 = new Report();
......@@ -30,15 +40,43 @@ public class CrawlerTest {
@Test
public void testAnalyzeEmptyURL(){
Assert.assertThrows(IllegalArgumentException.class, () ->{
crawler1.analyze("", 1);
crawler1.analyze(null, 1);
});
}
@Test
public void testWordCount(){
crawler1.analyze("https://www.aau.at/", 1);
crawler2.analyze("https://www.aau.at/", 1);
Report report;
report = crawler1.analyze(webpage, 1);
Assert.assertEquals(305L,report.getWordCount().longValue());
}
@Test
public void testImageCount(){
Report report;
report = crawler1.analyze(webpage, 1);
Assert.assertEquals(2L,report.getImageCount().longValue());
}
@Test
public void testVideoCount(){
Report report;
report = crawler1.analyze(webpage, 1);
Assert.assertEquals(1L,report.getVideoCount().longValue());
}
@Test
public void testLinkCount(){
Report report;
report = crawler1.analyze(webpage, 1);
Assert.assertEquals(3L,report.getLinks().size());
}
@Test
public void testBrokenLinkCount(){
Report report;
report = crawler1.analyze(webpage, 1);
Assert.assertEquals(1L, report.getBrokenLinks().size());
}
@AfterEach
......
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>Title</title>
</head>
<body>
<h1>Test Page</h1>
<img src="aauLogo.png">
<p>
Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat,
sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet.
Duis autem vel eum iriure dolor in hendrerit in vulputate velit esse molestie consequat, vel illum dolore eu feugiat nulla facilisis at vero eros et accumsan et iusto odio dignissim qui blandit praesent luptatum zzril delenit augue duis dolore te feugait nulla facilisi. Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam nonummy nibh euismod tincidunt ut laoreet dolore magna aliquam erat volutpat.
Ut wisi enim ad minim veniam, quis nostrud exerci tation ullamcorper suscipit lobortis nisl ut aliquip ex ea commodo consequat. Duis autem vel eum iriure dolor in hendrerit in vulputate velit esse molestie consequat, vel illum dolore eu feugiat nulla facilisis at vero eros et accumsan et iusto odio dignissim qui blandit praesent luptatum zzril delenit augue duis dolore te feugait nulla facilisi.
Nam liber tempor cum soluta nobis eleifend option congue nihil imperdiet doming id quod mazim placerat facer
</p>
<img src="aauLogo.png">
<ul>
<li><a href="testPage.html">Test Link 1</a> </li>
<li><a href="testPage.html">Test Link 2</a> </li>
<li><a href="https://broken.link">Broken Link</a> </li>
</ul>
<video src="testVideo.mp4"></video>
</body>
</html>
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment