Commit 6b8653d5 authored by mirako's avatar mirako
Browse files

CheckLink

parent 39d264fa
......@@ -10,6 +10,11 @@ import java.util.LinkedList;
import java.util.List;
public class Crawler {
public int countLinks;
public int countWords;
public int countImages;
public int countVideos;
public int getLinks;
public void analyzeWebpage(String url, Integer depth){
// Document webpage = Jsoup.connect(url).get();
......@@ -28,14 +33,20 @@ public class Crawler {
// System.out.println(link.attr("href"));
// });
//
countLinks = webpage.getElementsByTag("a").size();
countWords = countWords(webpage);
countImages = countImages(webpage);
countVideos = countVideos(webpage);
getLinks = getLinks(webpage).size();
System.out.println("Count LInks: " + webpage.getElementsByTag("a").size());
System.out.println("Words: " + countWords(webpage));
System.out.println("Images: " + countImages(webpage));
System.out.println("Videos: " + countVideos(webpage));
System.out.println("Links: " + getLinks(webpage).size());
//System.out.println("Broken Links: " + findBrokenLinks(getLinks(webpage)).size());
System.out.println("Broken Links: " + findBrokenLinks(getLinks(webpage)).size());
}
......@@ -71,23 +82,18 @@ public class Crawler {
}
private boolean checkLink(String url){
/**
int i = 0;
try {
Connection.Response response = Jsoup.connect(url).ignoreContentType(true).execute();
return false;
} catch (HttpStatusException e1) {
i++;
System.out.println(i);
return true;
} catch (UnknownHostException e2) {
i++;
System.out.println(i);
return true;
} catch (IOException e) {
e.printStackTrace();
}
*/
return true;
}
......
......@@ -23,6 +23,7 @@ public class Main {
e.printStackTrace();
}
assert cmd != null;
if(cmd.hasOption("depth")){
depth = Integer.valueOf(cmd.getOptionValue("depth"));
}
......@@ -31,5 +32,13 @@ public class Main {
crawler.analyzeWebpage(cmd.getOptionValue("url"), depth);
}
System.out.println("Count Links: " + crawler.countLinks);
System.out.println("Words: " + crawler.countWords);
System.out.println("Images: " + crawler.countImages);
System.out.println("Videos: " + crawler.countVideos);
System.out.println("Links: " + crawler.getLinks);
}
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment