Commit 18cbdfcb authored by mkoscher's avatar mkoscher
Browse files

Merge branch 'michi/InitSoupAndPOM' into 'master'

Add Dependencies and Test Jsoup

See merge request mkoscher/cc-assignmen-1!1
parents 8ee1d93f c27571a0
......@@ -13,4 +13,24 @@
<maven.compiler.target>11</maven.compiler.target>
</properties>
<dependencies>
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.13.1</version>
</dependency>
<dependency>
<groupId>commons-cli</groupId>
<artifactId>commons-cli</artifactId>
<version>1.4</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.13.2</version>
<scope>test</scope>
</dependency>
</dependencies>
</project>
\ No newline at end of file
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import java.io.IOException;
import java.util.List;
public class Crawler {
public void analyzeWebpage(String url, Integer depth){
// Document webpage = Jsoup.connect(url).get();
// webpage.getElementsByTag("img").size();
Document webpage = null;
try {
webpage = Jsoup.connect("https://www.aau.at").get();
} catch (IOException e) {
e.printStackTrace();
}
Integer countImg = webpage.getElementsByTag("img").size();
System.out.println("Images Counted: "+countImg);
webpage.getElementsByTag("a").forEach(link -> {
System.out.println(link.attr("href"));
});
System.out.println("Count LInks: " + webpage.getElementsByTag("a").size());
}
private Integer countWords(Document webpage){
// TODO Implement
/*
org.jsoup.nodes.Document dom = Jsoup.parse(html);
String text = dom.text();
return text.split(" ").length;
*/
return 0;
}
private Integer countLinks(Document webpage){
// TODO Implement
return 0;
}
private Integer countImages(Document webpage){
// TODO Implement
return 0;
}
private Integer countVideos(Document webpage){
// TODO Implement
return 0;
}
private List<String> getLinks(Document webpage){
// TODO Implement
return null;
}
private List<String> findBrokenLinks(List<String> links){
// TODO Implement
return null;
}
private boolean checkLink(String url){
// TODO Implement
return false;
}
}
import org.apache.commons.cli.*;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import java.io.IOException;
public class Main {
public static void main(String[] args){
Options options = new Options();
CommandLine cmd = null;
Option urlArg = Option.builder().longOpt("url").argName("u").hasArg().desc("URL to parse").build();
Option depthArg = Option.builder().longOpt("depth").argName("d").hasArg().desc("Depth to parse").build();
Integer depth = 2;
Crawler crawler = new Crawler();
options.addOption(urlArg);
options.addOption(depthArg);
CommandLineParser parser = new DefaultParser();
try {
cmd = parser.parse(options, args);
} catch (ParseException e) {
e.printStackTrace();
}
if(cmd.hasOption("depth")){
depth = Integer.valueOf(cmd.getOptionValue("depth"));
}
if(cmd.hasOption("url")){
crawler.analyzeWebpage(cmd.getOptionValue("url"), depth);
}
}
}
import java.util.List;
public class Report {
private Integer imageCount;
private Integer videoCount;
private Integer wordCount;
private List<String> links;
private List<String> brokenLinks;
public Integer getImageCount() {
return imageCount;
}
public void setImageCount(Integer imageCount) {
this.imageCount = imageCount;
}
public Integer getVideoCount() {
return videoCount;
}
public void setVideoCount(Integer videoCount) {
this.videoCount = videoCount;
}
public Integer getWordCount() {
return wordCount;
}
public void setWordCount(Integer wordCount) {
this.wordCount = wordCount;
}
public List<String> getLinks() {
return links;
}
public void setLinks(List<String> links) {
this.links = links;
}
public List<String> getBrokenLinks() {
return brokenLinks;
}
public void setBrokenLinks(List<String> brokenLinks) {
this.brokenLinks = brokenLinks;
}
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment