it all seems to work somewhat well
This commit is contained in:
5
pom.xml
5
pom.xml
@@ -22,6 +22,11 @@
|
|||||||
<artifactId>json</artifactId>
|
<artifactId>json</artifactId>
|
||||||
<version>20250517</version>
|
<version>20250517</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>xyz.nextn</groupId>
|
||||||
|
<artifactId>simple-levenshtein-distance</artifactId>
|
||||||
|
<version>1.0.0</version>
|
||||||
|
</dependency>
|
||||||
</dependencies>
|
</dependencies>
|
||||||
<build>
|
<build>
|
||||||
<plugins>
|
<plugins>
|
||||||
|
|||||||
@@ -4,12 +4,24 @@ import com.microsoft.playwright.ElementHandle;
|
|||||||
|
|
||||||
public class BonusItem {
|
public class BonusItem {
|
||||||
String title;
|
String title;
|
||||||
|
String description = "";
|
||||||
String bonusText;
|
String bonusText;
|
||||||
String category;
|
String category;
|
||||||
String imageURL;
|
String imageURL;
|
||||||
|
|
||||||
|
String moreInfoURL;
|
||||||
|
|
||||||
|
float originalPrice = 0.0f;
|
||||||
|
float bonusPrice = 0.0f;
|
||||||
|
|
||||||
BonusItem(ElementHandle element) {
|
BonusItem(ElementHandle element) {
|
||||||
this.title = element.getAttribute("title");
|
this.title = element.getAttribute("title");
|
||||||
|
this.moreInfoURL = "https://www.ah.nl"+element.getAttribute("href");
|
||||||
|
|
||||||
|
ElementHandle descriptionElement = element.querySelector("[data-testhook=\"card-description\"]");
|
||||||
|
if (descriptionElement != null) {
|
||||||
|
this.description = descriptionElement.innerText();
|
||||||
|
}
|
||||||
|
|
||||||
ElementHandle bonusElement = element.querySelector(".promotion-label-base_textContainer__DFx6D");
|
ElementHandle bonusElement = element.querySelector(".promotion-label-base_textContainer__DFx6D");
|
||||||
this.bonusText = bonusElement.innerHTML().replaceAll("<[^>]*>", " ");
|
this.bonusText = bonusElement.innerHTML().replaceAll("<[^>]*>", " ");
|
||||||
@@ -17,7 +29,20 @@ public class BonusItem {
|
|||||||
ElementHandle categoryContainer = element.evaluateHandle("el => el.closest('section')").asElement();
|
ElementHandle categoryContainer = element.evaluateHandle("el => el.closest('section')").asElement();
|
||||||
this.category = categoryContainer.getAttribute("id");
|
this.category = categoryContainer.getAttribute("id");
|
||||||
|
|
||||||
|
|
||||||
this.imageURL = element.querySelector(".promotion-card-image_img__Ca5n8").getAttribute("data-src");
|
this.imageURL = element.querySelector(".promotion-card-image_img__Ca5n8").getAttribute("data-src");
|
||||||
|
|
||||||
|
ElementHandle priceContainer = element.querySelector("[data-testhook=\"price\"]");
|
||||||
|
if (priceContainer != null) {
|
||||||
|
float priceInteger = Float.parseFloat(priceContainer.querySelector(".promotion-price_integer__Tq2rf").innerText());
|
||||||
|
|
||||||
|
int priceDecimalsTemp = Integer.parseInt(priceContainer.querySelector(".promotion-price_fractional__U-irD").innerText());
|
||||||
|
float priceDecimals = (float) priceDecimalsTemp / 100;
|
||||||
|
|
||||||
|
this.bonusPrice = priceInteger + priceDecimals;
|
||||||
|
ElementHandle originalPriceElement = priceContainer.querySelector(".promotion-price_was__jhW9R");
|
||||||
|
if (originalPriceElement != null) { // those annoying "Christmas only items" are so annoying
|
||||||
|
this.originalPrice = Float.parseFloat(originalPriceElement.innerText());
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -0,0 +1,86 @@
|
|||||||
|
package nl.herpiederpiee.appie_scraper;
|
||||||
|
|
||||||
|
import com.microsoft.playwright.*;
|
||||||
|
import xyz.nextn.levenshteindistance.LevenshteinDistance;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Comparator;
|
||||||
|
import java.util.concurrent.TimeUnit;
|
||||||
|
|
||||||
|
public class BonusManager {
|
||||||
|
ArrayList<BonusItem> bonusItems = new ArrayList<BonusItem>();;
|
||||||
|
|
||||||
|
public void updateBonusItems(){
|
||||||
|
try (Playwright playwright = Playwright.create()) {
|
||||||
|
|
||||||
|
Browser browser = playwright.chromium().launch(new BrowserType.LaunchOptions().setHeadless(false));
|
||||||
|
|
||||||
|
BrowserContext context = browser.newContext();
|
||||||
|
Page bonusPagina = context.newPage();
|
||||||
|
|
||||||
|
|
||||||
|
bonusPagina.navigate("https://www.ah.nl/bonus");
|
||||||
|
TimeUnit.SECONDS.sleep(5); // wait for page to actaully fully load
|
||||||
|
|
||||||
|
Locator bonusElements = bonusPagina.locator(".promotion-card_root__tQA3z");
|
||||||
|
for (ElementHandle bonusElement : bonusElements.elementHandles()){
|
||||||
|
BonusItem bonusItem = new BonusItem(bonusElement);
|
||||||
|
|
||||||
|
// exclude annoying elements
|
||||||
|
if (bonusItem.category.equals( "onlineOnly")) continue;
|
||||||
|
if (bonusItem.category.equals( "gall")) continue;
|
||||||
|
if (bonusItem.category.equals( "gall-card")) continue;
|
||||||
|
if (bonusItem.category.equals( "etos")) continue;
|
||||||
|
|
||||||
|
this.bonusItems.add(bonusItem);
|
||||||
|
}
|
||||||
|
|
||||||
|
} catch (InterruptedException e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public ArrayList<BonusItem> getBonusItems(String name){
|
||||||
|
ArrayList<Pair<BonusItem, Integer>> list = new ArrayList<>();
|
||||||
|
|
||||||
|
for (BonusItem bonusItem : bonusItems) {
|
||||||
|
Integer score = fuzzyMatchScore(name, bonusItem.title);
|
||||||
|
list.add(Pair.pair(bonusItem, score));
|
||||||
|
}
|
||||||
|
list.sort((a, b) -> Integer.compare(b.second, a.second));
|
||||||
|
|
||||||
|
ArrayList<BonusItem> top10 = new ArrayList<>();
|
||||||
|
int i = 0;
|
||||||
|
while (top10.size() < 10) {
|
||||||
|
top10.add(list.get(i).first);
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
|
||||||
|
return top10;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int fuzzyMatchScore(String query, String title) {
|
||||||
|
query = query.toLowerCase();
|
||||||
|
title = title.toLowerCase();
|
||||||
|
|
||||||
|
if (title.contains(query)) {
|
||||||
|
return 100; // perfect match
|
||||||
|
}
|
||||||
|
|
||||||
|
int best = Integer.MAX_VALUE;
|
||||||
|
|
||||||
|
int qlen = query.length();
|
||||||
|
int tlen = title.length();
|
||||||
|
|
||||||
|
for (int i = 0; i <= tlen - qlen; i++) {
|
||||||
|
String sub = title.substring(i, i + qlen);
|
||||||
|
int dist = LevenshteinDistance.calculate(query, sub);
|
||||||
|
if (dist < best) best = dist;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Convert distance to similarity percentage
|
||||||
|
int score = (int)(100.0 * (1.0 - (best / (double) qlen)));
|
||||||
|
|
||||||
|
return Math.max(0, Math.min(100, score));
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,46 +1,30 @@
|
|||||||
package nl.herpiederpiee.appie_scraper;
|
package nl.herpiederpiee.appie_scraper;
|
||||||
import com.microsoft.playwright.*;
|
|
||||||
import com.microsoft.playwright.options.WaitUntilState;
|
|
||||||
import org.json.*;
|
|
||||||
|
|
||||||
|
import com.microsoft.playwright.*;
|
||||||
|
import org.json.*;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Random;
|
import java.util.Scanner;
|
||||||
import java.util.concurrent.TimeUnit;
|
|
||||||
|
|
||||||
public class Main {
|
public class Main {
|
||||||
|
|
||||||
|
|
||||||
public static void main(String[] args) {
|
public static void main(String[] args) {
|
||||||
try (Playwright playwright = Playwright.create()) {
|
BonusManager bonusManager = new BonusManager();
|
||||||
|
bonusManager.updateBonusItems();
|
||||||
|
|
||||||
Browser browser = playwright.chromium().launch(new BrowserType.LaunchOptions().setHeadless(false));
|
Scanner input = new Scanner(System.in);
|
||||||
|
|
||||||
BrowserContext context = browser.newContext();
|
|
||||||
Page bonusPagina = context.newPage();
|
|
||||||
|
|
||||||
|
|
||||||
bonusPagina.navigate("https://www.ah.nl/bonus");
|
|
||||||
System.out.println("Dom Content Loaded!");
|
|
||||||
|
|
||||||
TimeUnit.SECONDS.sleep(5);
|
while (true) {
|
||||||
ArrayList<BonusItem> bonusItems = new ArrayList<BonusItem>();
|
System.out.println("\n\nWhat item would you like to look for?");
|
||||||
int counter = 0;
|
String userInput = input.nextLine();
|
||||||
Locator bonusElements = bonusPagina.locator(".promotion-card_root__tQA3z");
|
if (userInput.equals("qqq")) break;
|
||||||
for (ElementHandle bonusElement : bonusElements.elementHandles()){
|
ArrayList<BonusItem> userResults = bonusManager.getBonusItems(userInput);
|
||||||
BonusItem bonusItem = new BonusItem(bonusElement);
|
|
||||||
bonusItems.add(bonusItem);
|
for (BonusItem bonusItem : userResults) {
|
||||||
counter++;
|
System.out.println(bonusItem.title + " => " + bonusItem.bonusText);
|
||||||
}
|
}
|
||||||
System.out.println("Amount of items: " + counter);
|
|
||||||
// get random item from array
|
|
||||||
Random random = new Random();
|
|
||||||
BonusItem chosenItem = bonusItems.get(random.nextInt(bonusItems.size()));
|
|
||||||
System.out.println("Random Item:\n"+chosenItem.title+" => "+chosenItem.bonusText + " ("+chosenItem.category+")\nImage URL:"+chosenItem.imageURL);
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
} catch (InterruptedException e) {
|
|
||||||
throw new RuntimeException(e);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
17
src/main/java/nl/herpiederpiee/appie_scraper/Pair.java
Normal file
17
src/main/java/nl/herpiederpiee/appie_scraper/Pair.java
Normal file
@@ -0,0 +1,17 @@
|
|||||||
|
package nl.herpiederpiee.appie_scraper;
|
||||||
|
|
||||||
|
/* generated by chatGPT, since i couldnt find a proper library that works how i want */
|
||||||
|
public class Pair<F, S> {
|
||||||
|
|
||||||
|
public F first;
|
||||||
|
public S second;
|
||||||
|
|
||||||
|
public Pair(F first, S second) {
|
||||||
|
this.first = first;
|
||||||
|
this.second = second;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static <F, S> Pair<F, S> pair(F first, S second) {
|
||||||
|
return new Pair<>(first, second);
|
||||||
|
}
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user