Commit 0100c3d7 authored by mkoscher's avatar mkoscher
Browse files

Merge branch 'michi/InitSoupAndPOM' into 'master'

Implement simple Methods

See merge request mkoscher/cc-assignmen-1!2
parents 18cbdfcb 9b442ff0
......@@ -2,6 +2,8 @@ import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import java.io.IOException;
import java.util.ArrayList;
import java.util.LinkedList;
import java.util.List;
public class Crawler {
......@@ -17,52 +19,51 @@ public class Crawler {
} catch (IOException e) {
e.printStackTrace();
}
Integer countImg = webpage.getElementsByTag("img").size();
System.out.println("Images Counted: "+countImg);
webpage.getElementsByTag("a").forEach(link -> {
System.out.println(link.attr("href"));
});
System.out.println("Count LInks: " + webpage.getElementsByTag("a").size());
// Integer countImg = webpage.getElementsByTag("img").size();
// System.out.println("Images Counted: "+countImg);
//
// webpage.getElementsByTag("a").forEach(link -> {
// System.out.println(link.attr("href"));
// });
//
// System.out.println("Count LInks: " + webpage.getElementsByTag("a").size());
System.out.println("Words: " + countWords(webpage));
System.out.println("Images: " + countImages(webpage));
System.out.println("Videos: " + countVideos(webpage));
System.out.println("Links: " + getLinks(webpage).size());
}
private Integer countWords(Document webpage){
// TODO Implement
/*
org.jsoup.nodes.Document dom = Jsoup.parse(html);
String text = dom.text();
return text.split(" ").length;
*/
return 0;
}
private Integer countLinks(Document webpage){
// TODO Implement
return 0;
String text = webpage.text();
return text.split(" ").length;
}
private Integer countImages(Document webpage){
// TODO Implement
return 0;
return webpage.getElementsByTag("img").size();
}
private Integer countVideos(Document webpage){
// TODO Implement
return 0;
return webpage.getElementsByTag("video").size();
}
private List<String> getLinks(Document webpage){
// TODO Implement
return null;
List<String> links = new LinkedList<>();
webpage.getElementsByTag("a").forEach(link -> {
links.add(link.attr("href"));
});
return links;
}
private List<String> findBrokenLinks(List<String> links){
// TODO Implement
return null;
List<String> brokenLinks = new ArrayList<>();
links.forEach(link -> {
if(checkLink(link)){
brokenLinks.add(link);
}
});
return brokenLinks;
}
private boolean checkLink(String url){
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment