Skip to content
GitLab
Menu
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
mkoscher
CC Assignmen 1
Commits
9b442ff0
Commit
9b442ff0
authored
Mar 24, 2021
by
Michael Koscher
Browse files
Implement simple Methods
parent
c27571a0
Changes
3
Hide whitespace changes
Inline
Side-by-side
src/main/java/Crawler.java
View file @
9b442ff0
...
...
@@ -2,6 +2,8 @@ import org.jsoup.Jsoup;
import
org.jsoup.nodes.Document
;
import
java.io.IOException
;
import
java.util.ArrayList
;
import
java.util.LinkedList
;
import
java.util.List
;
public
class
Crawler
{
...
...
@@ -17,52 +19,51 @@ public class Crawler {
}
catch
(
IOException
e
)
{
e
.
printStackTrace
();
}
Integer
countImg
=
webpage
.
getElementsByTag
(
"img"
).
size
();
System
.
out
.
println
(
"Images Counted: "
+
countImg
);
webpage
.
getElementsByTag
(
"a"
).
forEach
(
link
->
{
System
.
out
.
println
(
link
.
attr
(
"href"
));
});
System
.
out
.
println
(
"Count LInks: "
+
webpage
.
getElementsByTag
(
"a"
).
size
());
// Integer countImg = webpage.getElementsByTag("img").size();
// System.out.println("Images Counted: "+countImg);
//
// webpage.getElementsByTag("a").forEach(link -> {
// System.out.println(link.attr("href"));
// });
//
// System.out.println("Count LInks: " + webpage.getElementsByTag("a").size());
System
.
out
.
println
(
"Words: "
+
countWords
(
webpage
));
System
.
out
.
println
(
"Images: "
+
countImages
(
webpage
));
System
.
out
.
println
(
"Videos: "
+
countVideos
(
webpage
));
System
.
out
.
println
(
"Links: "
+
getLinks
(
webpage
).
size
());
}
private
Integer
countWords
(
Document
webpage
){
// TODO Implement
/*
org.jsoup.nodes.Document dom = Jsoup.parse(html);
String text = dom.text();
return text.split(" ").length;
*/
return
0
;
}
private
Integer
countLinks
(
Document
webpage
){
// TODO Implement
return
0
;
String
text
=
webpage
.
text
();
return
text
.
split
(
" "
).
length
;
}
private
Integer
countImages
(
Document
webpage
){
// TODO Implement
return
0
;
return
webpage
.
getElementsByTag
(
"img"
).
size
();
}
private
Integer
countVideos
(
Document
webpage
){
// TODO Implement
return
0
;
return
webpage
.
getElementsByTag
(
"video"
).
size
();
}
private
List
<
String
>
getLinks
(
Document
webpage
){
// TODO Implement
return
null
;
List
<
String
>
links
=
new
LinkedList
<>();
webpage
.
getElementsByTag
(
"a"
).
forEach
(
link
->
{
links
.
add
(
link
.
attr
(
"href"
));
});
return
links
;
}
private
List
<
String
>
findBrokenLinks
(
List
<
String
>
links
){
// TODO Implement
return
null
;
List
<
String
>
brokenLinks
=
new
ArrayList
<>();
links
.
forEach
(
link
->
{
if
(
checkLink
(
link
)){
brokenLinks
.
add
(
link
);
}
});
return
brokenLinks
;
}
private
boolean
checkLink
(
String
url
){
...
...
target/classes/Crawler.class
View file @
9b442ff0
No preview for this file type
target/classes/Report.class
0 → 100644
View file @
9b442ff0
File added
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment