added basic websraper
This commit is contained in:
parent
7d3a9106dc
commit
7dad75333c
4
.gitignore
vendored
4
.gitignore
vendored
@ -1,5 +1,7 @@
|
||||
.db/
|
||||
|
||||
todo
|
||||
*temp.java
|
||||
./src/main/java/dev/ksan/etfoglasiserver/temp.java
|
||||
|
||||
HELP.md
|
||||
.gradle
|
||||
|
||||
@ -33,6 +33,8 @@ dependencies {
|
||||
runtimeOnly 'org.postgresql:postgresql'
|
||||
testImplementation 'org.springframework.boot:spring-boot-starter-test'
|
||||
testRuntimeOnly 'org.junit.platform:junit-platform-launcher'
|
||||
|
||||
implementation("net.sourceforge.htmlunit:htmlunit:2.70.0")
|
||||
}
|
||||
|
||||
generateJava {
|
||||
|
||||
@ -1,17 +1,9 @@
|
||||
package dev.ksan.etfoglasiserver;
|
||||
|
||||
import java.sql.Connection;
|
||||
import java.sql.DriverManager;
|
||||
import java.sql.PreparedStatement;
|
||||
import java.sql.SQLException;
|
||||
import java.time.LocalDate;
|
||||
import java.time.LocalDateTime;
|
||||
|
||||
import org.springframework.boot.CommandLineRunner;
|
||||
import dev.ksan.etfoglasiserver.service.Scraper;
|
||||
import java.util.Scanner;
|
||||
import org.springframework.boot.SpringApplication;
|
||||
import org.springframework.boot.autoconfigure.SpringBootApplication;
|
||||
import org.springframework.context.annotation.Bean;
|
||||
import org.springframework.jdbc.core.JdbcTemplate;
|
||||
|
||||
@SpringBootApplication
|
||||
public class EtfoglasiServerApplication {
|
||||
@ -19,7 +11,36 @@ public class EtfoglasiServerApplication {
|
||||
public static void main(String[] args) {
|
||||
SpringApplication.run(EtfoglasiServerApplication.class, args);
|
||||
|
||||
boolean running = true;
|
||||
|
||||
//temp.run();
|
||||
System.out.println("EtfoglasiServerApplication started");
|
||||
|
||||
Scraper scraper = new Scraper();
|
||||
Thread webClientThread = new Thread(scraper, "WebClientThread");
|
||||
Scanner scanner = new Scanner(System.in);
|
||||
webClientThread.start();
|
||||
|
||||
try {
|
||||
while (running) {
|
||||
|
||||
String command = scanner.nextLine();
|
||||
|
||||
switch (command) {
|
||||
case "stop":
|
||||
scraper.stop();
|
||||
webClientThread.interrupt();
|
||||
running = false;
|
||||
System.out.println("Stopping...");
|
||||
break;
|
||||
case "list":
|
||||
System.out.println();
|
||||
}
|
||||
}
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
} finally {
|
||||
scanner.close();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -25,6 +25,32 @@ public class Entry {
|
||||
|
||||
public Entry() {}
|
||||
|
||||
public Entry(
|
||||
String title,
|
||||
LocalDateTime time_published,
|
||||
String info_entry,
|
||||
String paragraph,
|
||||
String filepath) {
|
||||
this.title = title;
|
||||
this.time_published = time_published;
|
||||
this.info_entry = info_entry;
|
||||
this.paragraph = paragraph;
|
||||
this.filepath = filepath;
|
||||
}
|
||||
|
||||
public Entry(
|
||||
String title,
|
||||
LocalDateTime time_published,
|
||||
String info_entry,
|
||||
List<String> paragraph,
|
||||
String filepath) {
|
||||
this.title = title;
|
||||
this.time_published = time_published;
|
||||
this.info_entry = info_entry;
|
||||
this.paragraph = String.join("\n", paragraph);
|
||||
this.filepath = filepath;
|
||||
}
|
||||
|
||||
public Entry(EntryDTO entry) {
|
||||
this.title = entry.getTitle();
|
||||
this.paragraph = entry.getParagraph();
|
||||
@ -56,6 +82,7 @@ public class Entry {
|
||||
public void setTitle(String title) {
|
||||
this.title = title;
|
||||
}
|
||||
|
||||
public String getTitle() {
|
||||
return title;
|
||||
}
|
||||
|
||||
128
src/main/java/dev/ksan/etfoglasiserver/service/Scraper.java
Normal file
128
src/main/java/dev/ksan/etfoglasiserver/service/Scraper.java
Normal file
@ -0,0 +1,128 @@
|
||||
package dev.ksan.etfoglasiserver.service;
|
||||
|
||||
import com.gargoylesoftware.htmlunit.BrowserVersion;
|
||||
import com.gargoylesoftware.htmlunit.WebClient;
|
||||
import com.gargoylesoftware.htmlunit.html.DomElement;
|
||||
import com.gargoylesoftware.htmlunit.html.HtmlAnchor;
|
||||
import com.gargoylesoftware.htmlunit.html.HtmlElement;
|
||||
import com.gargoylesoftware.htmlunit.html.HtmlPage;
|
||||
import dev.ksan.etfoglasiserver.model.Entry;
|
||||
import java.time.LocalDateTime;
|
||||
import java.time.format.DateTimeFormatter;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.logging.ConsoleHandler;
|
||||
import java.util.logging.Handler;
|
||||
import java.util.logging.Level;
|
||||
import java.util.logging.Logger;
|
||||
|
||||
public class Scraper implements Runnable {
|
||||
private static List<Entry> entries = new ArrayList<>();
|
||||
|
||||
private WebClient webClient;
|
||||
private volatile boolean running = true;
|
||||
|
||||
public Scraper() {
|
||||
this.webClient = new WebClient(BrowserVersion.CHROME);
|
||||
webClient.getOptions().setJavaScriptEnabled(true);
|
||||
webClient.getOptions().setCssEnabled(false);
|
||||
webClient.getOptions().setThrowExceptionOnScriptError(false);
|
||||
}
|
||||
|
||||
private static String getTextOrEmpty(HtmlElement parent, String xPath) {
|
||||
HtmlElement element = parent.getFirstByXPath(xPath);
|
||||
return element == null ? "" : element.asNormalizedText();
|
||||
}
|
||||
|
||||
static List<Entry> getEntries() {
|
||||
synchronized (entries) {
|
||||
return new ArrayList<>(entries);
|
||||
}
|
||||
}
|
||||
|
||||
DateTimeFormatter formatter = DateTimeFormatter.ofPattern("dd.MM.yyyy HH:mm:ss");
|
||||
|
||||
private void configureHtmlUnitLogging() {
|
||||
Logger htmlUnitLogger = Logger.getLogger("com.gargoylesoftware.htmlunit");
|
||||
htmlUnitLogger.setLevel(Level.SEVERE);
|
||||
Handler consoleHandler = new ConsoleHandler();
|
||||
consoleHandler.setLevel(Level.SEVERE);
|
||||
htmlUnitLogger.addHandler(consoleHandler);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void run() {
|
||||
configureHtmlUnitLogging();
|
||||
while (running && !Thread.currentThread().isInterrupted()) {
|
||||
|
||||
try {
|
||||
System.out.println("Performing WebClient task...");
|
||||
|
||||
HtmlPage mainPage = webClient.getPage("https://efee.etf.unibl.org/oglasi/");
|
||||
webClient.waitForBackgroundJavaScript(1000);
|
||||
|
||||
List<DomElement> rawToggles = mainPage.getByXPath("//a[@href='#']");
|
||||
List<HtmlAnchor> toggles = new ArrayList<>();
|
||||
for (DomElement el : rawToggles) {
|
||||
if (el instanceof HtmlAnchor) {
|
||||
toggles.add((HtmlAnchor) el);
|
||||
}
|
||||
}
|
||||
int ul_idSelection = 1;
|
||||
for (HtmlAnchor anchor : toggles) {
|
||||
String groupName = anchor.asNormalizedText().split("\n")[0].trim();
|
||||
System.out.println("Group name: " + groupName);
|
||||
HtmlPage updatedPage = anchor.click();
|
||||
webClient.waitForBackgroundJavaScript(1000);
|
||||
|
||||
String ul_id = "ul_id_" + Integer.toString(ul_idSelection);
|
||||
|
||||
DomElement rawElement = updatedPage.getElementById(ul_id);
|
||||
HtmlElement listElement =
|
||||
rawElement instanceof HtmlElement ? (HtmlElement) rawElement : null;
|
||||
|
||||
if (listElement == null) {
|
||||
System.out.println("An element with id " + ul_id + " was not found");
|
||||
ul_idSelection++;
|
||||
continue;
|
||||
}
|
||||
|
||||
List<HtmlElement> items = listElement.getElementsByTagName("li");
|
||||
for (HtmlElement item : items) {
|
||||
String title = getTextOrEmpty(item, ".//h1");
|
||||
String date = getTextOrEmpty(item, ".//h2[1]");
|
||||
String info = getTextOrEmpty(item, ".//h2[2]");
|
||||
List<String> paragraphs = new ArrayList<>();
|
||||
List<HtmlElement> pTags = item.getByXPath(".//p");
|
||||
for (HtmlElement pTag : pTags) {
|
||||
paragraphs.add(pTag.asNormalizedText());
|
||||
}
|
||||
// Entry entry = new Entry(title, groupName, date, info, paragraphs);
|
||||
Entry entry =
|
||||
new Entry(title, LocalDateTime.parse(date, formatter), info, paragraphs, null);
|
||||
entry.setParagraph(paragraphs);
|
||||
System.out.println(entry);
|
||||
|
||||
Thread.sleep(2000);
|
||||
entries.add(entry);
|
||||
}
|
||||
|
||||
ul_idSelection++;
|
||||
}
|
||||
|
||||
// Thread.sleep(20000);
|
||||
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
System.out.println("ERROR: " + e.getMessage());
|
||||
} finally {
|
||||
this.webClient.close();
|
||||
}
|
||||
}
|
||||
System.out.println("WebScraper thread stopped");
|
||||
}
|
||||
|
||||
public void stop() {
|
||||
running = false;
|
||||
}
|
||||
}
|
||||
@ -44,6 +44,9 @@ public class UserService {
|
||||
|
||||
public void updateUser(UserCreationDTO user) {
|
||||
Optional<User> existingUserOpt = userRepo.findByEmail(user.getEmail());
|
||||
if(userRepo.findByEmail(user.getNewEmail()).isPresent()) {
|
||||
throw new RuntimeException("Email taken");
|
||||
}
|
||||
if (userRepo.findByEmail(user.getEmail()).isPresent()) {
|
||||
|
||||
if (this.isValidEmail(user.getEmail())) {
|
||||
@ -51,18 +54,16 @@ public class UserService {
|
||||
if (this.isValidPassword(user.getPassword())) {
|
||||
User existingUser = existingUserOpt.get();
|
||||
|
||||
/*
|
||||
if(user.getNewEmail() == null){
|
||||
|
||||
existingUser.setEmail(user.getEmail());
|
||||
}else{
|
||||
if(user.getNewEmail() != null && user.getNewEmail() != existingUser.getEmail()) {
|
||||
|
||||
existingUser.setEmail(user.getNewEmail());
|
||||
}
|
||||
|
||||
*/
|
||||
}else{
|
||||
|
||||
existingUser.setEmail(user.getEmail());
|
||||
}
|
||||
|
||||
|
||||
existingUser.setPassword(user.getPassword());
|
||||
userRepo.save(existingUser);
|
||||
} else throw new RuntimeException("Password too short");
|
||||
|
||||
30
src/main/java/dev/ksan/etfoglasiserver/temp.java
Normal file
30
src/main/java/dev/ksan/etfoglasiserver/temp.java
Normal file
@ -0,0 +1,30 @@
|
||||
package dev.ksan.etfoglasiserver;
|
||||
|
||||
import org.springframework.http.HttpEntity;
|
||||
import org.springframework.http.HttpHeaders;
|
||||
import org.springframework.http.MediaType;
|
||||
import org.springframework.web.client.RestTemplate;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
public class temp {
|
||||
private static final String URL = "http://localhost:8080/subject";
|
||||
|
||||
public static void run() {
|
||||
System.out.println("started to add valjda");
|
||||
List<String> subjects = List.of(
|
||||
|
||||
);
|
||||
|
||||
RestTemplate restTemplate = new RestTemplate();
|
||||
HttpHeaders headers = new HttpHeaders();
|
||||
headers.setContentType(MediaType.APPLICATION_JSON);
|
||||
|
||||
for (String name : subjects) {
|
||||
String json = "{ \"name\": \"" + name + "\", \"code\": \"\" }";
|
||||
HttpEntity<String> request = new HttpEntity<>(json, headers);
|
||||
restTemplate.postForObject(URL, request, Void.class);
|
||||
System.out.println("Added subject: " + name);
|
||||
}
|
||||
}
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user