it all seems to work somewhat well
This commit is contained in:
5
pom.xml
5
pom.xml
@@ -22,6 +22,11 @@
|
||||
<artifactId>json</artifactId>
|
||||
<version>20250517</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>xyz.nextn</groupId>
|
||||
<artifactId>simple-levenshtein-distance</artifactId>
|
||||
<version>1.0.0</version>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
<build>
|
||||
<plugins>
|
||||
|
||||
@@ -4,12 +4,24 @@ import com.microsoft.playwright.ElementHandle;
|
||||
|
||||
public class BonusItem {
|
||||
String title;
|
||||
String description = "";
|
||||
String bonusText;
|
||||
String category;
|
||||
String imageURL;
|
||||
|
||||
String moreInfoURL;
|
||||
|
||||
float originalPrice = 0.0f;
|
||||
float bonusPrice = 0.0f;
|
||||
|
||||
BonusItem(ElementHandle element) {
|
||||
this.title = element.getAttribute("title");
|
||||
this.moreInfoURL = "https://www.ah.nl"+element.getAttribute("href");
|
||||
|
||||
ElementHandle descriptionElement = element.querySelector("[data-testhook=\"card-description\"]");
|
||||
if (descriptionElement != null) {
|
||||
this.description = descriptionElement.innerText();
|
||||
}
|
||||
|
||||
ElementHandle bonusElement = element.querySelector(".promotion-label-base_textContainer__DFx6D");
|
||||
this.bonusText = bonusElement.innerHTML().replaceAll("<[^>]*>", " ");
|
||||
@@ -17,7 +29,20 @@ public class BonusItem {
|
||||
ElementHandle categoryContainer = element.evaluateHandle("el => el.closest('section')").asElement();
|
||||
this.category = categoryContainer.getAttribute("id");
|
||||
|
||||
|
||||
this.imageURL = element.querySelector(".promotion-card-image_img__Ca5n8").getAttribute("data-src");
|
||||
|
||||
ElementHandle priceContainer = element.querySelector("[data-testhook=\"price\"]");
|
||||
if (priceContainer != null) {
|
||||
float priceInteger = Float.parseFloat(priceContainer.querySelector(".promotion-price_integer__Tq2rf").innerText());
|
||||
|
||||
int priceDecimalsTemp = Integer.parseInt(priceContainer.querySelector(".promotion-price_fractional__U-irD").innerText());
|
||||
float priceDecimals = (float) priceDecimalsTemp / 100;
|
||||
|
||||
this.bonusPrice = priceInteger + priceDecimals;
|
||||
ElementHandle originalPriceElement = priceContainer.querySelector(".promotion-price_was__jhW9R");
|
||||
if (originalPriceElement != null) { // those annoying "Christmas only items" are so annoying
|
||||
this.originalPrice = Float.parseFloat(originalPriceElement.innerText());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,86 @@
|
||||
package nl.herpiederpiee.appie_scraper;
|
||||
|
||||
import com.microsoft.playwright.*;
|
||||
import xyz.nextn.levenshteindistance.LevenshteinDistance;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Comparator;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
public class BonusManager {
|
||||
ArrayList<BonusItem> bonusItems = new ArrayList<BonusItem>();;
|
||||
|
||||
public void updateBonusItems(){
|
||||
try (Playwright playwright = Playwright.create()) {
|
||||
|
||||
Browser browser = playwright.chromium().launch(new BrowserType.LaunchOptions().setHeadless(false));
|
||||
|
||||
BrowserContext context = browser.newContext();
|
||||
Page bonusPagina = context.newPage();
|
||||
|
||||
|
||||
bonusPagina.navigate("https://www.ah.nl/bonus");
|
||||
TimeUnit.SECONDS.sleep(5); // wait for page to actaully fully load
|
||||
|
||||
Locator bonusElements = bonusPagina.locator(".promotion-card_root__tQA3z");
|
||||
for (ElementHandle bonusElement : bonusElements.elementHandles()){
|
||||
BonusItem bonusItem = new BonusItem(bonusElement);
|
||||
|
||||
// exclude annoying elements
|
||||
if (bonusItem.category.equals( "onlineOnly")) continue;
|
||||
if (bonusItem.category.equals( "gall")) continue;
|
||||
if (bonusItem.category.equals( "gall-card")) continue;
|
||||
if (bonusItem.category.equals( "etos")) continue;
|
||||
|
||||
this.bonusItems.add(bonusItem);
|
||||
}
|
||||
|
||||
} catch (InterruptedException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
public ArrayList<BonusItem> getBonusItems(String name){
|
||||
ArrayList<Pair<BonusItem, Integer>> list = new ArrayList<>();
|
||||
|
||||
for (BonusItem bonusItem : bonusItems) {
|
||||
Integer score = fuzzyMatchScore(name, bonusItem.title);
|
||||
list.add(Pair.pair(bonusItem, score));
|
||||
}
|
||||
list.sort((a, b) -> Integer.compare(b.second, a.second));
|
||||
|
||||
ArrayList<BonusItem> top10 = new ArrayList<>();
|
||||
int i = 0;
|
||||
while (top10.size() < 10) {
|
||||
top10.add(list.get(i).first);
|
||||
i++;
|
||||
}
|
||||
|
||||
return top10;
|
||||
}
|
||||
|
||||
public int fuzzyMatchScore(String query, String title) {
|
||||
query = query.toLowerCase();
|
||||
title = title.toLowerCase();
|
||||
|
||||
if (title.contains(query)) {
|
||||
return 100; // perfect match
|
||||
}
|
||||
|
||||
int best = Integer.MAX_VALUE;
|
||||
|
||||
int qlen = query.length();
|
||||
int tlen = title.length();
|
||||
|
||||
for (int i = 0; i <= tlen - qlen; i++) {
|
||||
String sub = title.substring(i, i + qlen);
|
||||
int dist = LevenshteinDistance.calculate(query, sub);
|
||||
if (dist < best) best = dist;
|
||||
}
|
||||
|
||||
// Convert distance to similarity percentage
|
||||
int score = (int)(100.0 * (1.0 - (best / (double) qlen)));
|
||||
|
||||
return Math.max(0, Math.min(100, score));
|
||||
}
|
||||
}
|
||||
@@ -1,46 +1,30 @@
|
||||
package nl.herpiederpiee.appie_scraper;
|
||||
import com.microsoft.playwright.*;
|
||||
import com.microsoft.playwright.options.WaitUntilState;
|
||||
import org.json.*;
|
||||
|
||||
import com.microsoft.playwright.*;
|
||||
import org.json.*;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Random;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.Scanner;
|
||||
|
||||
public class Main {
|
||||
|
||||
|
||||
public static void main(String[] args) {
|
||||
try (Playwright playwright = Playwright.create()) {
|
||||
BonusManager bonusManager = new BonusManager();
|
||||
bonusManager.updateBonusItems();
|
||||
|
||||
Browser browser = playwright.chromium().launch(new BrowserType.LaunchOptions().setHeadless(false));
|
||||
|
||||
BrowserContext context = browser.newContext();
|
||||
Page bonusPagina = context.newPage();
|
||||
|
||||
|
||||
bonusPagina.navigate("https://www.ah.nl/bonus");
|
||||
System.out.println("Dom Content Loaded!");
|
||||
|
||||
TimeUnit.SECONDS.sleep(5);
|
||||
ArrayList<BonusItem> bonusItems = new ArrayList<BonusItem>();
|
||||
int counter = 0;
|
||||
Locator bonusElements = bonusPagina.locator(".promotion-card_root__tQA3z");
|
||||
for (ElementHandle bonusElement : bonusElements.elementHandles()){
|
||||
BonusItem bonusItem = new BonusItem(bonusElement);
|
||||
bonusItems.add(bonusItem);
|
||||
counter++;
|
||||
}
|
||||
System.out.println("Amount of items: " + counter);
|
||||
// get random item from array
|
||||
Random random = new Random();
|
||||
BonusItem chosenItem = bonusItems.get(random.nextInt(bonusItems.size()));
|
||||
System.out.println("Random Item:\n"+chosenItem.title+" => "+chosenItem.bonusText + " ("+chosenItem.category+")\nImage URL:"+chosenItem.imageURL);
|
||||
Scanner input = new Scanner(System.in);
|
||||
|
||||
|
||||
|
||||
} catch (InterruptedException e) {
|
||||
throw new RuntimeException(e);
|
||||
while (true) {
|
||||
System.out.println("\n\nWhat item would you like to look for?");
|
||||
String userInput = input.nextLine();
|
||||
if (userInput.equals("qqq")) break;
|
||||
ArrayList<BonusItem> userResults = bonusManager.getBonusItems(userInput);
|
||||
|
||||
for (BonusItem bonusItem : userResults) {
|
||||
System.out.println(bonusItem.title + " => " + bonusItem.bonusText);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
17
src/main/java/nl/herpiederpiee/appie_scraper/Pair.java
Normal file
17
src/main/java/nl/herpiederpiee/appie_scraper/Pair.java
Normal file
@@ -0,0 +1,17 @@
|
||||
package nl.herpiederpiee.appie_scraper;
|
||||
|
||||
/* generated by chatGPT, since i couldnt find a proper library that works how i want */
|
||||
public class Pair<F, S> {
|
||||
|
||||
public F first;
|
||||
public S second;
|
||||
|
||||
public Pair(F first, S second) {
|
||||
this.first = first;
|
||||
this.second = second;
|
||||
}
|
||||
|
||||
public static <F, S> Pair<F, S> pair(F first, S second) {
|
||||
return new Pair<>(first, second);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user