plc-replica/directory-tailer.ts

99 lines
2.6 KiB
TypeScript

import { IterLines } from "./util.ts";
type PlcOperation = unknown;
export interface ExportEntry {
did: string;
operation: PlcOperation;
cid: string;
nullified: boolean;
createdAt: string; // iso timestamp
}
const sleep = (timeout: number) => new Promise((r) => setTimeout(r, timeout));
export class DirectoryTailer {
public abort = new AbortController();
latestDate: string | undefined;
saveRaw: boolean = true; // set to false in production so you don't double-store plc data
constructor(startDate?: string) {
this.latestDate = startDate;
}
async processRecord(entry: ExportEntry, raw: string) {
console.log(
Deno.inspect(
{
createdAt: entry.createdAt,
did: entry.did,
cid: entry.cid,
},
{ breakLength: Infinity, compact: true, colors: true }
)
);
const didplc = "did:plc:".length;
const prefix = entry.did.substring(didplc, didplc + 2);
const out = "./data/plc/live/" + prefix;
await Deno.writeTextFile(out, raw + "\n", {
append: true,
});
}
async fetchExports() {
const url = new URL("https://plc.directory/export");
url.searchParams.set("count", "1000");
while (!this.abort.signal.aborted) {
if (this.latestDate !== undefined) {
url.searchParams.set("after", this.latestDate);
}
console.log("%c[+]%c %s", "color: green", "color: unset", url.toString());
const response = await fetch(url, {
headers: { "User-Agent": "cerulea-plc-replica/1.0 (cerulea.blue)" },
});
if (response.status !== 200) {
console.error(response);
break;
}
const text = await response.text();
if (this.saveRaw) {
await Deno.writeTextFile("data/exports.jsonl", text + "\n", {
append: true,
});
}
let entry: ExportEntry | undefined;
const promises = [];
for (const line of new IterLines(text)) {
entry = JSON.parse(line) as unknown as ExportEntry;
this.latestDate = entry.createdAt;
promises.push(this.processRecord(entry, line));
}
await Promise.all(promises);
if (entry) {
this.latestDate = entry.createdAt;
const write = Deno.writeTextFile("./data/latest-date", this.latestDate);
const timestamp = new Date(this.latestDate).getTime();
if (Date.now() - timestamp > 5_000) {
await sleep(600); // 500 per 5 minutes
} else {
await sleep(2500); // sleep a little longer so that we can get more ops per request
}
await write;
} else {
await sleep(10_000); // we got nothing! sleep way longer
}
}
}
}