it all seems to work somewhat well

This commit is contained in:
Valentijn van der Jagt
2025-11-22 15:00:09 +01:00
parent ec2db75eb8
commit e2ee4dd30e
5 changed files with 148 additions and 31 deletions

View File

@@ -22,6 +22,11 @@
<artifactId>json</artifactId>
<version>20250517</version>
</dependency>
<dependency>
<groupId>xyz.nextn</groupId>
<artifactId>simple-levenshtein-distance</artifactId>
<version>1.0.0</version>
</dependency>
</dependencies>
<build>
<plugins>

View File

@@ -4,12 +4,24 @@ import com.microsoft.playwright.ElementHandle;
public class BonusItem {
String title;
String description = "";
String bonusText;
String category;
String imageURL;
String moreInfoURL;
float originalPrice = 0.0f;
float bonusPrice = 0.0f;
BonusItem(ElementHandle element) {
this.title = element.getAttribute("title");
this.moreInfoURL = "https://www.ah.nl"+element.getAttribute("href");
ElementHandle descriptionElement = element.querySelector("[data-testhook=\"card-description\"]");
if (descriptionElement != null) {
this.description = descriptionElement.innerText();
}
ElementHandle bonusElement = element.querySelector(".promotion-label-base_textContainer__DFx6D");
this.bonusText = bonusElement.innerHTML().replaceAll("<[^>]*>", " ");
@@ -17,7 +29,20 @@ public class BonusItem {
ElementHandle categoryContainer = element.evaluateHandle("el => el.closest('section')").asElement();
this.category = categoryContainer.getAttribute("id");
this.imageURL = element.querySelector(".promotion-card-image_img__Ca5n8").getAttribute("data-src");
ElementHandle priceContainer = element.querySelector("[data-testhook=\"price\"]");
if (priceContainer != null) {
float priceInteger = Float.parseFloat(priceContainer.querySelector(".promotion-price_integer__Tq2rf").innerText());
int priceDecimalsTemp = Integer.parseInt(priceContainer.querySelector(".promotion-price_fractional__U-irD").innerText());
float priceDecimals = (float) priceDecimalsTemp / 100;
this.bonusPrice = priceInteger + priceDecimals;
ElementHandle originalPriceElement = priceContainer.querySelector(".promotion-price_was__jhW9R");
if (originalPriceElement != null) { // those annoying "Christmas only items" are so annoying
this.originalPrice = Float.parseFloat(originalPriceElement.innerText());
}
}
}
}

View File

@@ -0,0 +1,86 @@
package nl.herpiederpiee.appie_scraper;
import com.microsoft.playwright.*;
import xyz.nextn.levenshteindistance.LevenshteinDistance;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.concurrent.TimeUnit;
public class BonusManager {
ArrayList<BonusItem> bonusItems = new ArrayList<BonusItem>();;
public void updateBonusItems(){
try (Playwright playwright = Playwright.create()) {
Browser browser = playwright.chromium().launch(new BrowserType.LaunchOptions().setHeadless(false));
BrowserContext context = browser.newContext();
Page bonusPagina = context.newPage();
bonusPagina.navigate("https://www.ah.nl/bonus");
TimeUnit.SECONDS.sleep(5); // wait for page to actaully fully load
Locator bonusElements = bonusPagina.locator(".promotion-card_root__tQA3z");
for (ElementHandle bonusElement : bonusElements.elementHandles()){
BonusItem bonusItem = new BonusItem(bonusElement);
// exclude annoying elements
if (bonusItem.category.equals( "onlineOnly")) continue;
if (bonusItem.category.equals( "gall")) continue;
if (bonusItem.category.equals( "gall-card")) continue;
if (bonusItem.category.equals( "etos")) continue;
this.bonusItems.add(bonusItem);
}
} catch (InterruptedException e) {
throw new RuntimeException(e);
}
}
public ArrayList<BonusItem> getBonusItems(String name){
ArrayList<Pair<BonusItem, Integer>> list = new ArrayList<>();
for (BonusItem bonusItem : bonusItems) {
Integer score = fuzzyMatchScore(name, bonusItem.title);
list.add(Pair.pair(bonusItem, score));
}
list.sort((a, b) -> Integer.compare(b.second, a.second));
ArrayList<BonusItem> top10 = new ArrayList<>();
int i = 0;
while (top10.size() < 10) {
top10.add(list.get(i).first);
i++;
}
return top10;
}
public int fuzzyMatchScore(String query, String title) {
query = query.toLowerCase();
title = title.toLowerCase();
if (title.contains(query)) {
return 100; // perfect match
}
int best = Integer.MAX_VALUE;
int qlen = query.length();
int tlen = title.length();
for (int i = 0; i <= tlen - qlen; i++) {
String sub = title.substring(i, i + qlen);
int dist = LevenshteinDistance.calculate(query, sub);
if (dist < best) best = dist;
}
// Convert distance to similarity percentage
int score = (int)(100.0 * (1.0 - (best / (double) qlen)));
return Math.max(0, Math.min(100, score));
}
}

View File

@@ -1,46 +1,30 @@
package nl.herpiederpiee.appie_scraper;
import com.microsoft.playwright.*;
import com.microsoft.playwright.options.WaitUntilState;
import org.json.*;
import com.microsoft.playwright.*;
import org.json.*;
import java.util.ArrayList;
import java.util.Random;
import java.util.concurrent.TimeUnit;
import java.util.Scanner;
public class Main {
public static void main(String[] args) {
try (Playwright playwright = Playwright.create()) {
BonusManager bonusManager = new BonusManager();
bonusManager.updateBonusItems();
Browser browser = playwright.chromium().launch(new BrowserType.LaunchOptions().setHeadless(false));
BrowserContext context = browser.newContext();
Page bonusPagina = context.newPage();
Scanner input = new Scanner(System.in);
bonusPagina.navigate("https://www.ah.nl/bonus");
System.out.println("Dom Content Loaded!");
TimeUnit.SECONDS.sleep(5);
ArrayList<BonusItem> bonusItems = new ArrayList<BonusItem>();
int counter = 0;
Locator bonusElements = bonusPagina.locator(".promotion-card_root__tQA3z");
for (ElementHandle bonusElement : bonusElements.elementHandles()){
BonusItem bonusItem = new BonusItem(bonusElement);
bonusItems.add(bonusItem);
counter++;
while (true) {
System.out.println("\n\nWhat item would you like to look for?");
String userInput = input.nextLine();
if (userInput.equals("qqq")) break;
ArrayList<BonusItem> userResults = bonusManager.getBonusItems(userInput);
for (BonusItem bonusItem : userResults) {
System.out.println(bonusItem.title + " => " + bonusItem.bonusText);
}
System.out.println("Amount of items: " + counter);
// get random item from array
Random random = new Random();
BonusItem chosenItem = bonusItems.get(random.nextInt(bonusItems.size()));
System.out.println("Random Item:\n"+chosenItem.title+" => "+chosenItem.bonusText + " ("+chosenItem.category+")\nImage URL:"+chosenItem.imageURL);
} catch (InterruptedException e) {
throw new RuntimeException(e);
}
}
}

View File

@@ -0,0 +1,17 @@
package nl.herpiederpiee.appie_scraper;
/* generated by chatGPT, since i couldnt find a proper library that works how i want */
public class Pair<F, S> {
public F first;
public S second;
public Pair(F first, S second) {
this.first = first;
this.second = second;
}
public static <F, S> Pair<F, S> pair(F first, S second) {
return new Pair<>(first, second);
}
}