update to query hs2's new, more semantic html
This commit is contained in:
parent
d8149f00b4
commit
dd4cd72bf4
1 changed files with 8 additions and 42 deletions
|
@ -9,8 +9,6 @@
|
||||||
import { contentType } from "https://deno.land/std@0.202.0/media_types/mod.ts";
|
import { contentType } from "https://deno.land/std@0.202.0/media_types/mod.ts";
|
||||||
import {
|
import {
|
||||||
DOMParser,
|
DOMParser,
|
||||||
NodeList,
|
|
||||||
NodeType,
|
|
||||||
} from "https://deno.land/x/deno_dom@v0.1.38/deno-dom-wasm.ts";
|
} from "https://deno.land/x/deno_dom@v0.1.38/deno-dom-wasm.ts";
|
||||||
import {
|
import {
|
||||||
Image,
|
Image,
|
||||||
|
@ -20,29 +18,6 @@ import {
|
||||||
// not sure how worthwhile this optimization is but
|
// not sure how worthwhile this optimization is but
|
||||||
const dom_parser = new DOMParser();
|
const dom_parser = new DOMParser();
|
||||||
|
|
||||||
const murica_short_date = new Intl.DateTimeFormat("en-US", {
|
|
||||||
month: "numeric",
|
|
||||||
day: "numeric",
|
|
||||||
year: "numeric",
|
|
||||||
});
|
|
||||||
|
|
||||||
function get_updates_for_date(
|
|
||||||
{ date, ps }: { date: Date; ps: NodeList },
|
|
||||||
): number {
|
|
||||||
let count = 0;
|
|
||||||
|
|
||||||
const looking_for = murica_short_date.format(date) + " - ";
|
|
||||||
|
|
||||||
ps.forEach((p) => {
|
|
||||||
if (
|
|
||||||
p.childNodes[0] && p.childNodes[0].nodeType == NodeType.TEXT_NODE &&
|
|
||||||
p.childNodes[0].textContent == looking_for
|
|
||||||
) count++;
|
|
||||||
});
|
|
||||||
|
|
||||||
return count;
|
|
||||||
}
|
|
||||||
|
|
||||||
async function get_last_update_date(): Promise<
|
async function get_last_update_date(): Promise<
|
||||||
{ last_updated: Date; last_update_count: number } | string
|
{ last_updated: Date; last_update_count: number } | string
|
||||||
> {
|
> {
|
||||||
|
@ -55,26 +30,17 @@ async function get_last_update_date(): Promise<
|
||||||
const doc = dom_parser.parseFromString(body, "text/html");
|
const doc = dom_parser.parseFromString(body, "text/html");
|
||||||
if (!doc) return "couldn't parse the body into the DOM";
|
if (!doc) return "couldn't parse the body into the DOM";
|
||||||
|
|
||||||
const ps = doc.querySelectorAll("p");
|
const time = doc.querySelector("time");
|
||||||
if (!ps) return "couldn't get the ps from the doc";
|
if (!time) return "couldn't get even a single time from the doc (bad!)";
|
||||||
|
|
||||||
if (!ps[0]) return "couldn't get even a single p from the doc (bad!)";
|
const datetime = time.attributes.getNamedItem("datetime")?.value;
|
||||||
|
if (!datetime) return "time didn't have datetime attr??";
|
||||||
// should really enable strict indexing
|
const date = new Date(datetime);
|
||||||
const us_date_node = ps[0].childNodes[0];
|
|
||||||
if (!us_date_node || us_date_node.nodeType != NodeType.TEXT_NODE) {
|
|
||||||
return "couldn't get a date node from the log entry";
|
|
||||||
}
|
|
||||||
|
|
||||||
const us_date = us_date_node.textContent.replace(" - ", "");
|
|
||||||
// apparently doing new Date("10/8/2023") is implementation defined which is icky but it works with deno
|
|
||||||
const date = new Date(us_date);
|
|
||||||
if (Number.isNaN(date.valueOf())) {
|
|
||||||
return `got an invalid date :(. the text_content was '${us_date}'`;
|
|
||||||
}
|
|
||||||
return {
|
return {
|
||||||
last_updated: date,
|
last_updated: date,
|
||||||
last_update_count: get_updates_for_date({ date, ps }),
|
last_update_count:
|
||||||
|
doc.querySelectorAll(`time[datetime="${date.toISOString()}"]`)
|
||||||
|
.length,
|
||||||
};
|
};
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
return `caught error: ${e}`;
|
return `caught error: ${e}`;
|
||||||
|
|
Loading…
Reference in a new issue