added basic websraper
This commit is contained in:
parent
7d3a9106dc
commit
7dad75333c
4
.gitignore
vendored
4
.gitignore
vendored
@ -1,5 +1,7 @@
|
|||||||
.db/
|
.db/
|
||||||
|
todo
|
||||||
|
*temp.java
|
||||||
|
./src/main/java/dev/ksan/etfoglasiserver/temp.java
|
||||||
|
|
||||||
HELP.md
|
HELP.md
|
||||||
.gradle
|
.gradle
|
||||||
|
|||||||
@ -33,6 +33,8 @@ dependencies {
|
|||||||
runtimeOnly 'org.postgresql:postgresql'
|
runtimeOnly 'org.postgresql:postgresql'
|
||||||
testImplementation 'org.springframework.boot:spring-boot-starter-test'
|
testImplementation 'org.springframework.boot:spring-boot-starter-test'
|
||||||
testRuntimeOnly 'org.junit.platform:junit-platform-launcher'
|
testRuntimeOnly 'org.junit.platform:junit-platform-launcher'
|
||||||
|
|
||||||
|
implementation("net.sourceforge.htmlunit:htmlunit:2.70.0")
|
||||||
}
|
}
|
||||||
|
|
||||||
generateJava {
|
generateJava {
|
||||||
|
|||||||
@ -1,17 +1,9 @@
|
|||||||
package dev.ksan.etfoglasiserver;
|
package dev.ksan.etfoglasiserver;
|
||||||
|
|
||||||
import java.sql.Connection;
|
import dev.ksan.etfoglasiserver.service.Scraper;
|
||||||
import java.sql.DriverManager;
|
import java.util.Scanner;
|
||||||
import java.sql.PreparedStatement;
|
|
||||||
import java.sql.SQLException;
|
|
||||||
import java.time.LocalDate;
|
|
||||||
import java.time.LocalDateTime;
|
|
||||||
|
|
||||||
import org.springframework.boot.CommandLineRunner;
|
|
||||||
import org.springframework.boot.SpringApplication;
|
import org.springframework.boot.SpringApplication;
|
||||||
import org.springframework.boot.autoconfigure.SpringBootApplication;
|
import org.springframework.boot.autoconfigure.SpringBootApplication;
|
||||||
import org.springframework.context.annotation.Bean;
|
|
||||||
import org.springframework.jdbc.core.JdbcTemplate;
|
|
||||||
|
|
||||||
@SpringBootApplication
|
@SpringBootApplication
|
||||||
public class EtfoglasiServerApplication {
|
public class EtfoglasiServerApplication {
|
||||||
@ -19,7 +11,36 @@ public class EtfoglasiServerApplication {
|
|||||||
public static void main(String[] args) {
|
public static void main(String[] args) {
|
||||||
SpringApplication.run(EtfoglasiServerApplication.class, args);
|
SpringApplication.run(EtfoglasiServerApplication.class, args);
|
||||||
|
|
||||||
|
boolean running = true;
|
||||||
|
|
||||||
|
//temp.run();
|
||||||
|
System.out.println("EtfoglasiServerApplication started");
|
||||||
|
|
||||||
|
Scraper scraper = new Scraper();
|
||||||
|
Thread webClientThread = new Thread(scraper, "WebClientThread");
|
||||||
|
Scanner scanner = new Scanner(System.in);
|
||||||
|
webClientThread.start();
|
||||||
|
|
||||||
|
try {
|
||||||
|
while (running) {
|
||||||
|
|
||||||
|
String command = scanner.nextLine();
|
||||||
|
|
||||||
|
switch (command) {
|
||||||
|
case "stop":
|
||||||
|
scraper.stop();
|
||||||
|
webClientThread.interrupt();
|
||||||
|
running = false;
|
||||||
|
System.out.println("Stopping...");
|
||||||
|
break;
|
||||||
|
case "list":
|
||||||
|
System.out.println();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (Exception e) {
|
||||||
|
e.printStackTrace();
|
||||||
|
} finally {
|
||||||
|
scanner.close();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@ -25,6 +25,32 @@ public class Entry {
|
|||||||
|
|
||||||
public Entry() {}
|
public Entry() {}
|
||||||
|
|
||||||
|
public Entry(
|
||||||
|
String title,
|
||||||
|
LocalDateTime time_published,
|
||||||
|
String info_entry,
|
||||||
|
String paragraph,
|
||||||
|
String filepath) {
|
||||||
|
this.title = title;
|
||||||
|
this.time_published = time_published;
|
||||||
|
this.info_entry = info_entry;
|
||||||
|
this.paragraph = paragraph;
|
||||||
|
this.filepath = filepath;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Entry(
|
||||||
|
String title,
|
||||||
|
LocalDateTime time_published,
|
||||||
|
String info_entry,
|
||||||
|
List<String> paragraph,
|
||||||
|
String filepath) {
|
||||||
|
this.title = title;
|
||||||
|
this.time_published = time_published;
|
||||||
|
this.info_entry = info_entry;
|
||||||
|
this.paragraph = String.join("\n", paragraph);
|
||||||
|
this.filepath = filepath;
|
||||||
|
}
|
||||||
|
|
||||||
public Entry(EntryDTO entry) {
|
public Entry(EntryDTO entry) {
|
||||||
this.title = entry.getTitle();
|
this.title = entry.getTitle();
|
||||||
this.paragraph = entry.getParagraph();
|
this.paragraph = entry.getParagraph();
|
||||||
@ -56,6 +82,7 @@ public class Entry {
|
|||||||
public void setTitle(String title) {
|
public void setTitle(String title) {
|
||||||
this.title = title;
|
this.title = title;
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getTitle() {
|
public String getTitle() {
|
||||||
return title;
|
return title;
|
||||||
}
|
}
|
||||||
|
|||||||
128
src/main/java/dev/ksan/etfoglasiserver/service/Scraper.java
Normal file
128
src/main/java/dev/ksan/etfoglasiserver/service/Scraper.java
Normal file
@ -0,0 +1,128 @@
|
|||||||
|
package dev.ksan.etfoglasiserver.service;
|
||||||
|
|
||||||
|
import com.gargoylesoftware.htmlunit.BrowserVersion;
|
||||||
|
import com.gargoylesoftware.htmlunit.WebClient;
|
||||||
|
import com.gargoylesoftware.htmlunit.html.DomElement;
|
||||||
|
import com.gargoylesoftware.htmlunit.html.HtmlAnchor;
|
||||||
|
import com.gargoylesoftware.htmlunit.html.HtmlElement;
|
||||||
|
import com.gargoylesoftware.htmlunit.html.HtmlPage;
|
||||||
|
import dev.ksan.etfoglasiserver.model.Entry;
|
||||||
|
import java.time.LocalDateTime;
|
||||||
|
import java.time.format.DateTimeFormatter;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.logging.ConsoleHandler;
|
||||||
|
import java.util.logging.Handler;
|
||||||
|
import java.util.logging.Level;
|
||||||
|
import java.util.logging.Logger;
|
||||||
|
|
||||||
|
public class Scraper implements Runnable {
|
||||||
|
private static List<Entry> entries = new ArrayList<>();
|
||||||
|
|
||||||
|
private WebClient webClient;
|
||||||
|
private volatile boolean running = true;
|
||||||
|
|
||||||
|
public Scraper() {
|
||||||
|
this.webClient = new WebClient(BrowserVersion.CHROME);
|
||||||
|
webClient.getOptions().setJavaScriptEnabled(true);
|
||||||
|
webClient.getOptions().setCssEnabled(false);
|
||||||
|
webClient.getOptions().setThrowExceptionOnScriptError(false);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static String getTextOrEmpty(HtmlElement parent, String xPath) {
|
||||||
|
HtmlElement element = parent.getFirstByXPath(xPath);
|
||||||
|
return element == null ? "" : element.asNormalizedText();
|
||||||
|
}
|
||||||
|
|
||||||
|
static List<Entry> getEntries() {
|
||||||
|
synchronized (entries) {
|
||||||
|
return new ArrayList<>(entries);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
DateTimeFormatter formatter = DateTimeFormatter.ofPattern("dd.MM.yyyy HH:mm:ss");
|
||||||
|
|
||||||
|
private void configureHtmlUnitLogging() {
|
||||||
|
Logger htmlUnitLogger = Logger.getLogger("com.gargoylesoftware.htmlunit");
|
||||||
|
htmlUnitLogger.setLevel(Level.SEVERE);
|
||||||
|
Handler consoleHandler = new ConsoleHandler();
|
||||||
|
consoleHandler.setLevel(Level.SEVERE);
|
||||||
|
htmlUnitLogger.addHandler(consoleHandler);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void run() {
|
||||||
|
configureHtmlUnitLogging();
|
||||||
|
while (running && !Thread.currentThread().isInterrupted()) {
|
||||||
|
|
||||||
|
try {
|
||||||
|
System.out.println("Performing WebClient task...");
|
||||||
|
|
||||||
|
HtmlPage mainPage = webClient.getPage("https://efee.etf.unibl.org/oglasi/");
|
||||||
|
webClient.waitForBackgroundJavaScript(1000);
|
||||||
|
|
||||||
|
List<DomElement> rawToggles = mainPage.getByXPath("//a[@href='#']");
|
||||||
|
List<HtmlAnchor> toggles = new ArrayList<>();
|
||||||
|
for (DomElement el : rawToggles) {
|
||||||
|
if (el instanceof HtmlAnchor) {
|
||||||
|
toggles.add((HtmlAnchor) el);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
int ul_idSelection = 1;
|
||||||
|
for (HtmlAnchor anchor : toggles) {
|
||||||
|
String groupName = anchor.asNormalizedText().split("\n")[0].trim();
|
||||||
|
System.out.println("Group name: " + groupName);
|
||||||
|
HtmlPage updatedPage = anchor.click();
|
||||||
|
webClient.waitForBackgroundJavaScript(1000);
|
||||||
|
|
||||||
|
String ul_id = "ul_id_" + Integer.toString(ul_idSelection);
|
||||||
|
|
||||||
|
DomElement rawElement = updatedPage.getElementById(ul_id);
|
||||||
|
HtmlElement listElement =
|
||||||
|
rawElement instanceof HtmlElement ? (HtmlElement) rawElement : null;
|
||||||
|
|
||||||
|
if (listElement == null) {
|
||||||
|
System.out.println("An element with id " + ul_id + " was not found");
|
||||||
|
ul_idSelection++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
List<HtmlElement> items = listElement.getElementsByTagName("li");
|
||||||
|
for (HtmlElement item : items) {
|
||||||
|
String title = getTextOrEmpty(item, ".//h1");
|
||||||
|
String date = getTextOrEmpty(item, ".//h2[1]");
|
||||||
|
String info = getTextOrEmpty(item, ".//h2[2]");
|
||||||
|
List<String> paragraphs = new ArrayList<>();
|
||||||
|
List<HtmlElement> pTags = item.getByXPath(".//p");
|
||||||
|
for (HtmlElement pTag : pTags) {
|
||||||
|
paragraphs.add(pTag.asNormalizedText());
|
||||||
|
}
|
||||||
|
// Entry entry = new Entry(title, groupName, date, info, paragraphs);
|
||||||
|
Entry entry =
|
||||||
|
new Entry(title, LocalDateTime.parse(date, formatter), info, paragraphs, null);
|
||||||
|
entry.setParagraph(paragraphs);
|
||||||
|
System.out.println(entry);
|
||||||
|
|
||||||
|
Thread.sleep(2000);
|
||||||
|
entries.add(entry);
|
||||||
|
}
|
||||||
|
|
||||||
|
ul_idSelection++;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Thread.sleep(20000);
|
||||||
|
|
||||||
|
} catch (Exception e) {
|
||||||
|
e.printStackTrace();
|
||||||
|
System.out.println("ERROR: " + e.getMessage());
|
||||||
|
} finally {
|
||||||
|
this.webClient.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
System.out.println("WebScraper thread stopped");
|
||||||
|
}
|
||||||
|
|
||||||
|
public void stop() {
|
||||||
|
running = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -44,6 +44,9 @@ public class UserService {
|
|||||||
|
|
||||||
public void updateUser(UserCreationDTO user) {
|
public void updateUser(UserCreationDTO user) {
|
||||||
Optional<User> existingUserOpt = userRepo.findByEmail(user.getEmail());
|
Optional<User> existingUserOpt = userRepo.findByEmail(user.getEmail());
|
||||||
|
if(userRepo.findByEmail(user.getNewEmail()).isPresent()) {
|
||||||
|
throw new RuntimeException("Email taken");
|
||||||
|
}
|
||||||
if (userRepo.findByEmail(user.getEmail()).isPresent()) {
|
if (userRepo.findByEmail(user.getEmail()).isPresent()) {
|
||||||
|
|
||||||
if (this.isValidEmail(user.getEmail())) {
|
if (this.isValidEmail(user.getEmail())) {
|
||||||
@ -51,18 +54,16 @@ public class UserService {
|
|||||||
if (this.isValidPassword(user.getPassword())) {
|
if (this.isValidPassword(user.getPassword())) {
|
||||||
User existingUser = existingUserOpt.get();
|
User existingUser = existingUserOpt.get();
|
||||||
|
|
||||||
/*
|
|
||||||
if(user.getNewEmail() == null){
|
|
||||||
|
|
||||||
existingUser.setEmail(user.getEmail());
|
if(user.getNewEmail() != null && user.getNewEmail() != existingUser.getEmail()) {
|
||||||
}else{
|
|
||||||
|
|
||||||
existingUser.setEmail(user.getNewEmail());
|
existingUser.setEmail(user.getNewEmail());
|
||||||
}
|
}else{
|
||||||
|
|
||||||
*/
|
|
||||||
|
|
||||||
existingUser.setEmail(user.getEmail());
|
existingUser.setEmail(user.getEmail());
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
existingUser.setPassword(user.getPassword());
|
existingUser.setPassword(user.getPassword());
|
||||||
userRepo.save(existingUser);
|
userRepo.save(existingUser);
|
||||||
} else throw new RuntimeException("Password too short");
|
} else throw new RuntimeException("Password too short");
|
||||||
|
|||||||
30
src/main/java/dev/ksan/etfoglasiserver/temp.java
Normal file
30
src/main/java/dev/ksan/etfoglasiserver/temp.java
Normal file
@ -0,0 +1,30 @@
|
|||||||
|
package dev.ksan.etfoglasiserver;
|
||||||
|
|
||||||
|
import org.springframework.http.HttpEntity;
|
||||||
|
import org.springframework.http.HttpHeaders;
|
||||||
|
import org.springframework.http.MediaType;
|
||||||
|
import org.springframework.web.client.RestTemplate;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
public class temp {
|
||||||
|
private static final String URL = "http://localhost:8080/subject";
|
||||||
|
|
||||||
|
public static void run() {
|
||||||
|
System.out.println("started to add valjda");
|
||||||
|
List<String> subjects = List.of(
|
||||||
|
|
||||||
|
);
|
||||||
|
|
||||||
|
RestTemplate restTemplate = new RestTemplate();
|
||||||
|
HttpHeaders headers = new HttpHeaders();
|
||||||
|
headers.setContentType(MediaType.APPLICATION_JSON);
|
||||||
|
|
||||||
|
for (String name : subjects) {
|
||||||
|
String json = "{ \"name\": \"" + name + "\", \"code\": \"\" }";
|
||||||
|
HttpEntity<String> request = new HttpEntity<>(json, headers);
|
||||||
|
restTemplate.postForObject(URL, request, Void.class);
|
||||||
|
System.out.println("Added subject: " + name);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
Loading…
x
Reference in New Issue
Block a user