move to flat files idk
This commit is contained in:
		
							parent
							
								
									47678e1e95
								
							
						
					
					
						commit
						c539a4dc5a
					
				
					 7 changed files with 115 additions and 26 deletions
				
			
		|  | @ -1,7 +1,6 @@ | ||||||
| { | { | ||||||
|   "tasks": { |   "tasks": { | ||||||
|     "full-scan": "deno run -A --unstable-kv ./full-scan.ts", |     "scrape": "deno run -A ./scrape.ts" | ||||||
|     "scrape": "deno run -A --unstable-kv ./scrape.ts" |  | ||||||
|   }, |   }, | ||||||
|   "imports": { |   "imports": { | ||||||
|     "@std/assert": "jsr:@std/assert@1" |     "@std/assert": "jsr:@std/assert@1" | ||||||
|  |  | ||||||
							
								
								
									
										11
									
								
								deno.lock
									
									
									
									
									
								
							
							
						
						
									
										11
									
								
								deno.lock
									
									
									
									
									
								
							|  | @ -1,13 +1,24 @@ | ||||||
| { | { | ||||||
|   "version": "4", |   "version": "4", | ||||||
|   "specifiers": { |   "specifiers": { | ||||||
|  |     "jsr:@std/fs@1": "1.0.6", | ||||||
|     "jsr:@std/json@1": "1.0.1", |     "jsr:@std/json@1": "1.0.1", | ||||||
|  |     "jsr:@std/path@^1.0.8": "1.0.8", | ||||||
|     "jsr:@std/streams@1": "1.0.8" |     "jsr:@std/streams@1": "1.0.8" | ||||||
|   }, |   }, | ||||||
|   "jsr": { |   "jsr": { | ||||||
|  |     "@std/fs@1.0.6": { | ||||||
|  |       "integrity": "42b56e1e41b75583a21d5a37f6a6a27de9f510bcd36c0c85791d685ca0b85fa2", | ||||||
|  |       "dependencies": [ | ||||||
|  |         "jsr:@std/path" | ||||||
|  |       ] | ||||||
|  |     }, | ||||||
|     "@std/json@1.0.1": { |     "@std/json@1.0.1": { | ||||||
|       "integrity": "1f0f70737e8827f9acca086282e903677bc1bb0c8ffcd1f21bca60039563049f" |       "integrity": "1f0f70737e8827f9acca086282e903677bc1bb0c8ffcd1f21bca60039563049f" | ||||||
|     }, |     }, | ||||||
|  |     "@std/path@1.0.8": { | ||||||
|  |       "integrity": "548fa456bb6a04d3c1a1e7477986b6cffbce95102d0bb447c67c4ee70e0364be" | ||||||
|  |     }, | ||||||
|     "@std/streams@1.0.8": { |     "@std/streams@1.0.8": { | ||||||
|       "integrity": "b41332d93d2cf6a82fe4ac2153b930adf1a859392931e2a19d9fabfb6f154fb3" |       "integrity": "b41332d93d2cf6a82fe4ac2153b930adf1a859392931e2a19d9fabfb6f154fb3" | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  | @ -16,16 +16,13 @@ export class DirectoryTailer { | ||||||
|   public abort = new AbortController(); |   public abort = new AbortController(); | ||||||
| 
 | 
 | ||||||
|   latestDate: string | undefined; |   latestDate: string | undefined; | ||||||
|  |   saveRaw: boolean = true; // set to false in production so you don't double-store plc data
 | ||||||
| 
 | 
 | ||||||
|   constructor( |   constructor(startDate?: string) { | ||||||
|     public kv: Deno.Kv, |  | ||||||
|     startDate?: string |  | ||||||
|   ) { |  | ||||||
|     this.latestDate = startDate; |     this.latestDate = startDate; | ||||||
|   } |   } | ||||||
| 
 | 
 | ||||||
|   async processRecord(entry: ExportEntry) { |   async processRecord(entry: ExportEntry, raw: string) { | ||||||
|     await this.kv.set([entry.did, entry.cid], entry); |  | ||||||
|     console.log( |     console.log( | ||||||
|       Deno.inspect( |       Deno.inspect( | ||||||
|         { |         { | ||||||
|  | @ -36,6 +33,14 @@ export class DirectoryTailer { | ||||||
|         { breakLength: Infinity, compact: true, colors: true } |         { breakLength: Infinity, compact: true, colors: true } | ||||||
|       ) |       ) | ||||||
|     ); |     ); | ||||||
|  | 
 | ||||||
|  |     const didplc = "did:plc:".length; | ||||||
|  |     const prefix = entry.did.substring(didplc, didplc + 2); | ||||||
|  |     const out = "./data/plc/live/" + prefix; | ||||||
|  | 
 | ||||||
|  |     await Deno.writeTextFile(out, raw + "\n", { | ||||||
|  |       append: true, | ||||||
|  |     }); | ||||||
|   } |   } | ||||||
| 
 | 
 | ||||||
|   async fetchExports() { |   async fetchExports() { | ||||||
|  | @ -58,24 +63,34 @@ export class DirectoryTailer { | ||||||
|       } |       } | ||||||
| 
 | 
 | ||||||
|       const text = await response.text(); |       const text = await response.text(); | ||||||
|       await Deno.writeTextFile("data/exports.jsonl", text + "\n", { | 
 | ||||||
|         append: true, |       if (this.saveRaw) { | ||||||
|       }); |         await Deno.writeTextFile("data/exports.jsonl", text + "\n", { | ||||||
|  |           append: true, | ||||||
|  |         }); | ||||||
|  |       } | ||||||
| 
 | 
 | ||||||
|       let entry: ExportEntry | undefined; |       let entry: ExportEntry | undefined; | ||||||
|  |       const promises = []; | ||||||
|       for (const line of new IterLines(text)) { |       for (const line of new IterLines(text)) { | ||||||
|         entry = JSON.parse(line) as unknown as ExportEntry; |         entry = JSON.parse(line) as unknown as ExportEntry; | ||||||
|         await this.processRecord(entry); |         this.latestDate = entry.createdAt; | ||||||
|  |         promises.push(this.processRecord(entry, line)); | ||||||
|       } |       } | ||||||
|  |       await Promise.all(promises); | ||||||
| 
 | 
 | ||||||
|       if (entry) { |       if (entry) { | ||||||
|         this.latestDate = entry.createdAt; |         this.latestDate = entry.createdAt; | ||||||
|  |         const write = Deno.writeTextFile("./data/latest-date", this.latestDate); | ||||||
|  | 
 | ||||||
|         const timestamp = new Date(this.latestDate).getTime(); |         const timestamp = new Date(this.latestDate).getTime(); | ||||||
|         if (Date.now() - timestamp > 5_000) { |         if (Date.now() - timestamp > 5_000) { | ||||||
|           await sleep(600); // 500 per 5 minutes
 |           await sleep(600); // 500 per 5 minutes
 | ||||||
|         } else { |         } else { | ||||||
|           await sleep(2500); // sleep a little longer so that we can get more ops per request
 |           await sleep(2500); // sleep a little longer so that we can get more ops per request
 | ||||||
|         } |         } | ||||||
|  | 
 | ||||||
|  |         await write; | ||||||
|       } else { |       } else { | ||||||
|         await sleep(10_000); // we got nothing! sleep way longer
 |         await sleep(10_000); // we got nothing! sleep way longer
 | ||||||
|       } |       } | ||||||
|  |  | ||||||
|  | @ -1,10 +0,0 @@ | ||||||
| import { ExportEntry } from "./directory-tailer.ts"; |  | ||||||
| import { kv } from "./main.ts"; |  | ||||||
| 
 |  | ||||||
| export const getDidDocument = async (did: string) => { |  | ||||||
|   const listing = kv.list<ExportEntry>({ prefix: [did] }); |  | ||||||
|   for await (const { value: operation } of listing) { |  | ||||||
|     if (operation.nullified) continue; |  | ||||||
|     console.log(operation); |  | ||||||
|   } |  | ||||||
| }; |  | ||||||
							
								
								
									
										59
									
								
								get-operations.ts
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										59
									
								
								get-operations.ts
									
									
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,59 @@ | ||||||
|  | import { TextLineStream } from "jsr:@std/streams@1/text-line-stream"; | ||||||
|  | import { ExportEntry } from "./directory-tailer.ts"; | ||||||
|  | 
 | ||||||
|  | export const getOperations = async (did: string) => { | ||||||
|  |   const operations = []; | ||||||
|  | 
 | ||||||
|  |   const didplc = "did:plc:".length; | ||||||
|  |   const prefix = did.substring(didplc, didplc + 2); | ||||||
|  | 
 | ||||||
|  |   const compactedEntries = await Array.fromAsync( | ||||||
|  |     Deno.readDir("./data/plc/compacted") | ||||||
|  |   ); | ||||||
|  |   compactedEntries.sort(); | ||||||
|  |   for (const entry of compactedEntries) { | ||||||
|  |     const process = new Deno.Command("zstd", { | ||||||
|  |       args: [ | ||||||
|  |         "-d", | ||||||
|  |         `./data/plc/compacted/${entry.name}/${prefix}.zst`, | ||||||
|  |         "--stdout", | ||||||
|  |       ], | ||||||
|  |       cwd: Deno.cwd(), | ||||||
|  |       stdout: "piped", | ||||||
|  |       stdin: "null", | ||||||
|  |       stderr: "piped", | ||||||
|  |     }).spawn(); | ||||||
|  | 
 | ||||||
|  |     const lines = process.stdout | ||||||
|  |       .pipeThrough(new TextDecoderStream()) | ||||||
|  |       .pipeThrough(new TextLineStream()); | ||||||
|  | 
 | ||||||
|  |     for await (const line of lines.values()) { | ||||||
|  |       const entry = JSON.parse(line) as unknown as ExportEntry; | ||||||
|  |       if (entry.did !== did) continue; | ||||||
|  |       operations.push(entry); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     await process.status; | ||||||
|  |   } | ||||||
|  | 
 | ||||||
|  |   const f = await Deno.open(`./data/plc/live/${prefix}`, { read: true }); | ||||||
|  |   const lines = f.readable | ||||||
|  |     .pipeThrough(new TextDecoderStream()) | ||||||
|  |     .pipeThrough(new TextLineStream()); | ||||||
|  |   for await (const line of lines.values()) { | ||||||
|  |     const entry = JSON.parse(line) as unknown as ExportEntry; | ||||||
|  |     if (entry.did !== did) continue; | ||||||
|  |     operations.push(entry); | ||||||
|  |   } | ||||||
|  | 
 | ||||||
|  |   return operations; | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | if (import.meta.main) { | ||||||
|  |   await getOperations(Deno.args[0]); | ||||||
|  | 
 | ||||||
|  |   const then = performance.now(); | ||||||
|  |   console.log(await getOperations(Deno.args[0])); | ||||||
|  |   console.log(performance.now() - then); | ||||||
|  | } | ||||||
							
								
								
									
										7
									
								
								main.ts
									
									
									
									
									
								
							
							
						
						
									
										7
									
								
								main.ts
									
									
									
									
									
								
							|  | @ -1,4 +1,7 @@ | ||||||
|  | import { ensureDir } from "jsr:@std/fs@1"; | ||||||
| import { DirectoryTailer } from "./directory-tailer.ts"; | import { DirectoryTailer } from "./directory-tailer.ts"; | ||||||
| 
 | 
 | ||||||
| export const kv = await Deno.openKv("./data/directory-kv.db"); | await ensureDir("./data/plc/compacted"); | ||||||
| export const tailer = new DirectoryTailer(kv); | await ensureDir("./data/plc/live"); | ||||||
|  | 
 | ||||||
|  | export const tailer = new DirectoryTailer(); | ||||||
|  |  | ||||||
|  | @ -1,16 +1,28 @@ | ||||||
| import { TextLineStream } from "jsr:@std/streams@1/text-line-stream"; | import { TextLineStream } from "jsr:@std/streams@1/text-line-stream"; | ||||||
| 
 |  | ||||||
| import { ExportEntry } from "./directory-tailer.ts"; | import { ExportEntry } from "./directory-tailer.ts"; | ||||||
| import { tailer } from "./main.ts"; | import { tailer } from "./main.ts"; | ||||||
| 
 | 
 | ||||||
| export const fullScan = async () => { | export const fullScan = async () => { | ||||||
|   using exports = await Deno.open("./data/exports.jsonl", { read: true }); |   using exports = await Deno.open("./data/exports.jsonl", { read: true }); | ||||||
|  | 
 | ||||||
|  |   // interrupted at 2024-11-12T21:33:47.118Z
 | ||||||
|  |   // byte offset
 | ||||||
|  |   await exports.seek(13526812085, Deno.SeekMode.Start); | ||||||
|  | 
 | ||||||
|   const lineStream = exports.readable |   const lineStream = exports.readable | ||||||
|     .pipeThrough(new TextDecoderStream()) |     .pipeThrough(new TextDecoderStream()) | ||||||
|     .pipeThrough(new TextLineStream()); |     .pipeThrough(new TextLineStream()); | ||||||
|  | 
 | ||||||
|  |   { | ||||||
|  |     const reader = lineStream.getReader(); | ||||||
|  |     const line = await reader.read(); | ||||||
|  |     console.log("dropping: " + line.value); | ||||||
|  |     reader.releaseLock(); | ||||||
|  |   } | ||||||
|  | 
 | ||||||
|   for await (const line of lineStream.values()) { |   for await (const line of lineStream.values()) { | ||||||
|     const entry = JSON.parse(line) as unknown as ExportEntry; |     const entry = JSON.parse(line) as unknown as ExportEntry; | ||||||
|     await tailer.processRecord(entry); |     await tailer.processRecord(entry, line); | ||||||
|   } |   } | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
		Loading…
	
		Reference in a new issue