From dd4cd72bf4e5fcaf048d91156525c75df594d443 Mon Sep 17 00:00:00 2001 From: mehbark Date: Sat, 10 Feb 2024 18:51:17 -0500 Subject: [PATCH] update to query hs2's new, more semantic html --- serverside/hs2-last-updated.ts | 50 ++++++---------------------------- 1 file changed, 8 insertions(+), 42 deletions(-) diff --git a/serverside/hs2-last-updated.ts b/serverside/hs2-last-updated.ts index 2aba00e..5ed042e 100644 --- a/serverside/hs2-last-updated.ts +++ b/serverside/hs2-last-updated.ts @@ -9,8 +9,6 @@ import { contentType } from "https://deno.land/std@0.202.0/media_types/mod.ts"; import { DOMParser, - NodeList, - NodeType, } from "https://deno.land/x/deno_dom@v0.1.38/deno-dom-wasm.ts"; import { Image, @@ -20,29 +18,6 @@ import { // not sure how worthwhile this optimization is but const dom_parser = new DOMParser(); -const murica_short_date = new Intl.DateTimeFormat("en-US", { - month: "numeric", - day: "numeric", - year: "numeric", -}); - -function get_updates_for_date( - { date, ps }: { date: Date; ps: NodeList }, -): number { - let count = 0; - - const looking_for = murica_short_date.format(date) + " - "; - - ps.forEach((p) => { - if ( - p.childNodes[0] && p.childNodes[0].nodeType == NodeType.TEXT_NODE && - p.childNodes[0].textContent == looking_for - ) count++; - }); - - return count; -} - async function get_last_update_date(): Promise< { last_updated: Date; last_update_count: number } | string > { @@ -55,26 +30,17 @@ async function get_last_update_date(): Promise< const doc = dom_parser.parseFromString(body, "text/html"); if (!doc) return "couldn't parse the body into the DOM"; - const ps = doc.querySelectorAll("p"); - if (!ps) return "couldn't get the ps from the doc"; + const time = doc.querySelector("time"); + if (!time) return "couldn't get even a single time from the doc (bad!)"; - if (!ps[0]) return "couldn't get even a single p from the doc (bad!)"; - - // should really enable strict indexing - const us_date_node = ps[0].childNodes[0]; - if (!us_date_node || us_date_node.nodeType != NodeType.TEXT_NODE) { - return "couldn't get a date node from the log entry"; - } - - const us_date = us_date_node.textContent.replace(" - ", ""); - // apparently doing new Date("10/8/2023") is implementation defined which is icky but it works with deno - const date = new Date(us_date); - if (Number.isNaN(date.valueOf())) { - return `got an invalid date :(. the text_content was '${us_date}'`; - } + const datetime = time.attributes.getNamedItem("datetime")?.value; + if (!datetime) return "time didn't have datetime attr??"; + const date = new Date(datetime); return { last_updated: date, - last_update_count: get_updates_for_date({ date, ps }), + last_update_count: + doc.querySelectorAll(`time[datetime="${date.toISOString()}"]`) + .length, }; } catch (e) { return `caught error: ${e}`;