Commit 39d264fa authored by mirako's avatar mirako
Browse files

CheckLink

parent 0100c3d7
import org.jsoup.Connection;
import org.jsoup.HttpStatusException;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import java.io.IOException;
import java.net.UnknownHostException;
import java.util.ArrayList;
import java.util.LinkedList;
import java.util.List;
......@@ -14,7 +17,6 @@ public class Crawler {
Document webpage = null;
try {
webpage = Jsoup.connect("https://www.aau.at").get();
} catch (IOException e) {
e.printStackTrace();
......@@ -26,13 +28,15 @@ public class Crawler {
// System.out.println(link.attr("href"));
// });
//
// System.out.println("Count LInks: " + webpage.getElementsByTag("a").size());
System.out.println("Count LInks: " + webpage.getElementsByTag("a").size());
System.out.println("Words: " + countWords(webpage));
System.out.println("Images: " + countImages(webpage));
System.out.println("Videos: " + countVideos(webpage));
System.out.println("Links: " + getLinks(webpage).size());
//System.out.println("Broken Links: " + findBrokenLinks(getLinks(webpage)).size());
}
private Integer countWords(Document webpage){
......@@ -67,7 +71,24 @@ public class Crawler {
}
private boolean checkLink(String url){
// TODO Implement
return false;
/**
int i = 0;
try {
Connection.Response response = Jsoup.connect(url).ignoreContentType(true).execute();
return false;
} catch (HttpStatusException e1) {
i++;
System.out.println(i);
return true;
} catch (UnknownHostException e2) {
i++;
System.out.println(i);
return true;
} catch (IOException e) {
e.printStackTrace();
}
*/
return true;
}
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment