plc-replica/directory-tailer.ts
Charlotte Som e027bc234e use did/createdAt/cid prefix on written records
this means we can skip over dids we don't care about without parsing
JSON *AND* we get good lexicographic sorting of log files (which will be
useful for more complex compaction later)
2025-01-03 06:45:38 +00:00

102 lines
2.7 KiB
TypeScript

import { IterLines } from "./util.ts";
import { writeLiveRecord } from "./write-live.ts";
type PlcOperation = unknown;
export interface ExportEntry {
did: string;
operation: PlcOperation;
cid: string;
nullified: boolean;
createdAt: string; // iso timestamp
}
const sleep = (timeout: number) => new Promise((r) => setTimeout(r, timeout));
export class DirectoryTailer {
public abort = new AbortController();
lastBatchCIDs = new Set<string>();
latestDate: string | undefined;
saveRaw: boolean = true; // set to false in production so you don't double-store plc data
constructor(startDate?: string) {
this.latestDate = startDate;
}
async processRecord(entry: ExportEntry, raw: string) {
console.log(
Deno.inspect(
{
createdAt: entry.createdAt,
did: entry.did,
cid: entry.cid,
},
{ breakLength: Infinity, compact: true, colors: true }
)
);
await writeLiveRecord(entry, raw);
}
async fetchExports() {
const url = new URL("https://plc.directory/export");
url.searchParams.set("count", "1000");
while (!this.abort.signal.aborted) {
if (this.latestDate !== undefined) {
url.searchParams.set("after", this.latestDate);
}
console.log("%c[+]%c %s", "color: green", "color: unset", url.toString());
const response = await fetch(url, {
headers: { "User-Agent": "cerulea-plc-replica/1.0 (cerulea.blue)" },
});
if (response.status !== 200) {
console.error(response);
break;
}
const text = await response.text();
if (this.saveRaw) {
await Deno.writeTextFile("data/exports.jsonl", text + "\n", {
append: true,
});
}
let entry: ExportEntry | undefined;
const promises = [];
const cids = new Set<string>();
for (const line of new IterLines(text)) {
entry = JSON.parse(line) as unknown as ExportEntry;
if (this.lastBatchCIDs.has(entry.cid)) continue;
this.latestDate = entry.createdAt;
cids.add(entry.cid);
promises.push(this.processRecord(entry, line));
}
await Promise.all(promises);
this.lastBatchCIDs = cids;
if (entry) {
this.latestDate = entry.createdAt;
const write = Deno.writeTextFile("./data/latest-date", this.latestDate);
const timestamp = new Date(this.latestDate).getTime();
if (Date.now() - timestamp > 5_000) {
await sleep(600); // 500 per 5 minutes
} else {
await sleep(2500); // sleep a little longer so that we can get more ops per request
}
await write;
} else {
await sleep(10_000); // we got nothing! sleep way longer
}
}
}
}