filter duplicate cids

This commit is contained in:
Charlotte Som 2024-11-30 20:17:49 +00:00
parent e9af92c00f
commit 0d2412813f
2 changed files with 16 additions and 7 deletions

View file

@ -15,6 +15,8 @@ const sleep = (timeout: number) => new Promise((r) => setTimeout(r, timeout));
export class DirectoryTailer {
public abort = new AbortController();
lastBatchCIDs = new Set<string>();
latestDate: string | undefined;
saveRaw: boolean = true; // set to false in production so you don't double-store plc data
@ -72,13 +74,19 @@ export class DirectoryTailer {
let entry: ExportEntry | undefined;
const promises = [];
const cids = new Set<string>();
for (const line of new IterLines(text)) {
entry = JSON.parse(line) as unknown as ExportEntry;
if (this.lastBatchCIDs.has(entry.cid)) continue;
this.latestDate = entry.createdAt;
cids.add(entry.cid);
promises.push(this.processRecord(entry, line));
}
await Promise.all(promises);
this.lastBatchCIDs = cids;
if (entry) {
this.latestDate = entry.createdAt;
const write = Deno.writeTextFile("./data/latest-date", this.latestDate);

View file

@ -17,14 +17,15 @@ export const catchUp = async () => {
lineReader.releaseLock();
}
let lastLine: string | undefined;
tailer.lastBatchCIDs.clear();
for await (const line of lineStream.values()) {
lastLine = line;
}
if (lastLine) {
const entry = JSON.parse(lastLine) as unknown as ExportEntry;
try {
const entry = JSON.parse(line) as unknown as ExportEntry;
tailer.latestDate = entry.createdAt;
tailer.lastBatchCIDs.add(entry.cid);
} catch (_err) {
// ignore
}
}
};