update to query hs2's new, more semantic html

This commit is contained in:
mehbark 2024-02-10 18:51:17 -05:00
parent d8149f00b4
commit dd4cd72bf4

View file

@ -9,8 +9,6 @@
import { contentType } from "https://deno.land/std@0.202.0/media_types/mod.ts";
import {
DOMParser,
NodeList,
NodeType,
} from "https://deno.land/x/deno_dom@v0.1.38/deno-dom-wasm.ts";
import {
Image,
@ -20,29 +18,6 @@ import {
// not sure how worthwhile this optimization is but
const dom_parser = new DOMParser();
const murica_short_date = new Intl.DateTimeFormat("en-US", {
month: "numeric",
day: "numeric",
year: "numeric",
});
function get_updates_for_date(
{ date, ps }: { date: Date; ps: NodeList },
): number {
let count = 0;
const looking_for = murica_short_date.format(date) + " - ";
ps.forEach((p) => {
if (
p.childNodes[0] && p.childNodes[0].nodeType == NodeType.TEXT_NODE &&
p.childNodes[0].textContent == looking_for
) count++;
});
return count;
}
async function get_last_update_date(): Promise<
{ last_updated: Date; last_update_count: number } | string
> {
@ -55,26 +30,17 @@ async function get_last_update_date(): Promise<
const doc = dom_parser.parseFromString(body, "text/html");
if (!doc) return "couldn't parse the body into the DOM";
const ps = doc.querySelectorAll("p");
if (!ps) return "couldn't get the ps from the doc";
const time = doc.querySelector("time");
if (!time) return "couldn't get even a single time from the doc (bad!)";
if (!ps[0]) return "couldn't get even a single p from the doc (bad!)";
// should really enable strict indexing
const us_date_node = ps[0].childNodes[0];
if (!us_date_node || us_date_node.nodeType != NodeType.TEXT_NODE) {
return "couldn't get a date node from the log entry";
}
const us_date = us_date_node.textContent.replace(" - ", "");
// apparently doing new Date("10/8/2023") is implementation defined which is icky but it works with deno
const date = new Date(us_date);
if (Number.isNaN(date.valueOf())) {
return `got an invalid date :(. the text_content was '${us_date}'`;
}
const datetime = time.attributes.getNamedItem("datetime")?.value;
if (!datetime) return "time didn't have datetime attr??";
const date = new Date(datetime);
return {
last_updated: date,
last_update_count: get_updates_for_date({ date, ps }),
last_update_count:
doc.querySelectorAll(`time[datetime="${date.toISOString()}"]`)
.length,
};
} catch (e) {
return `caught error: ${e}`;