Skip to content
GitLab
Menu
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
mkoscher
CC Assignmen 1
Commits
39d264fa
Commit
39d264fa
authored
Mar 31, 2021
by
mirako
Browse files
CheckLink
parent
0100c3d7
Changes
4
Hide whitespace changes
Inline
Side-by-side
src/main/java/Crawler.java
View file @
39d264fa
import
org.jsoup.Connection
;
import
org.jsoup.HttpStatusException
;
import
org.jsoup.Jsoup
;
import
org.jsoup.nodes.Document
;
import
java.io.IOException
;
import
java.net.UnknownHostException
;
import
java.util.ArrayList
;
import
java.util.LinkedList
;
import
java.util.List
;
...
...
@@ -14,7 +17,6 @@ public class Crawler {
Document
webpage
=
null
;
try
{
webpage
=
Jsoup
.
connect
(
"https://www.aau.at"
).
get
();
}
catch
(
IOException
e
)
{
e
.
printStackTrace
();
...
...
@@ -26,13 +28,15 @@ public class Crawler {
// System.out.println(link.attr("href"));
// });
//
//
System.out.println("Count LInks: " + webpage.getElementsByTag("a").size());
System
.
out
.
println
(
"Count LInks: "
+
webpage
.
getElementsByTag
(
"a"
).
size
());
System
.
out
.
println
(
"Words: "
+
countWords
(
webpage
));
System
.
out
.
println
(
"Images: "
+
countImages
(
webpage
));
System
.
out
.
println
(
"Videos: "
+
countVideos
(
webpage
));
System
.
out
.
println
(
"Links: "
+
getLinks
(
webpage
).
size
());
//System.out.println("Broken Links: " + findBrokenLinks(getLinks(webpage)).size());
}
private
Integer
countWords
(
Document
webpage
){
...
...
@@ -67,7 +71,24 @@ public class Crawler {
}
private
boolean
checkLink
(
String
url
){
// TODO Implement
return
false
;
/**
int i = 0;
try {
Connection.Response response = Jsoup.connect(url).ignoreContentType(true).execute();
return false;
} catch (HttpStatusException e1) {
i++;
System.out.println(i);
return true;
} catch (UnknownHostException e2) {
i++;
System.out.println(i);
return true;
} catch (IOException e) {
e.printStackTrace();
}
*/
return
true
;
}
}
target/classes/Crawler.class
View file @
39d264fa
No preview for this file type
target/classes/Main.class
View file @
39d264fa
No preview for this file type
target/classes/Report.class
View file @
39d264fa
No preview for this file type
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment