Skip to content

Commit

Permalink
local-file-storage: scalability + race fixes
Browse files Browse the repository at this point in the history
This commit fixes race conditions and potential performance issues

## Race conditions:

- Multiple calls to `set` caused race conditions with the central metafile index.
- Not only did this cause the set to fail, but it often corrupted the metafile causing a permanent, unrecoverable error since the metafile could no longer be parsed.

## Scalability:

- The single metadata file would eventually grow large in a successful application and become both a memory issue and performance bottleneck
- Additionally, storing all files in a single directory would eventually slow down the file system’s access of files in the directory.

## Changes:

- Removes race conditions with atomic reads/writes
  - Removes the metafile and instead relies on SHA-256 hashes for file keys
  - Stores metadata alongside each file as .meta.json instead of central index
  - removes filename collision handling since it’s unnecessary with hash-based names
- Performance
  - Shards files into subdirectories using first 8 chars of SHA-256 hash to maintain file system performance
  - Without a metafile, it is no longer a memory or performance bottleneck

Should rebase/squash the commits before merging, added the failing test first to prove the bug.
  • Loading branch information
ryanflorence committed Jan 8, 2025
1 parent aba8810 commit 6281172
Show file tree
Hide file tree
Showing 2 changed files with 54 additions and 92 deletions.
5 changes: 5 additions & 0 deletions packages/file-storage/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,11 @@

This is the changelog for [`file-storage`](https://github.com/mjackson/remix-the-web/tree/main/packages/file-storage). It follows [semantic versioning](https://semver.org/).

## v0.3.1

- Fixes race conditions with concurrent calls to `set`
- Shards storage directories for more scalable file systems

## v0.3.0 (2024-11-14)

- Added CommonJS build
Expand Down
141 changes: 49 additions & 92 deletions packages/file-storage/src/lib/local-file-storage.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import * as fs from 'node:fs';
import * as fsp from 'node:fs/promises';
import * as path from 'node:path';
import { openFile, writeFile } from '@mjackson/lazy-file/fs';
import * as crypto from 'node:crypto';

import { type FileStorage } from './file-storage.ts';

Expand All @@ -15,7 +16,6 @@ import { type FileStorage } from './file-storage.ts';
*/
export class LocalFileStorage implements FileStorage {
#dirname: string;
#metadata: FileMetadataIndex;

/**
* @param directory The directory where files are stored
Expand All @@ -36,132 +36,89 @@ export class LocalFileStorage implements FileStorage {

fs.mkdirSync(this.#dirname, { recursive: true });
}

this.#metadata = new FileMetadataIndex(path.join(directory, '.metadata.json'));
}

has(key: string): Promise<boolean> {
return this.#metadata.has(key);
async has(key: string): Promise<boolean> {
let { metaPath } = this.#getFilePaths(key);
try {
await fsp.access(metaPath);
return true;
} catch {
return false;
}
}

async set(key: string, file: File): Promise<void> {
// Remove any existing file with the same key.
await this.remove(key);

let storedFile = await storeFile(this.#dirname, file);
let { directory, filePath, metaPath } = this.#getFilePaths(key);

// Ensure directory exists
await fsp.mkdir(directory, { recursive: true });

let handle = await fsp.open(filePath, 'w');
await writeFile(handle, file);

await this.#metadata.set(key, {
file: storedFile,
let metadata: FileMetadata = {
name: file.name,
type: file.type,
mtime: file.lastModified,
});
};
await fsp.writeFile(metaPath, JSON.stringify(metadata));
}

async get(key: string): Promise<File | null> {
let metadata = await this.#metadata.get(key);
if (metadata == null) return null;

let filename = path.join(this.#dirname, metadata.file);

return openFile(filename, {
name: metadata.name,
type: metadata.type,
lastModified: metadata.mtime,
});
}

async remove(key: string): Promise<void> {
let metadata = await this.#metadata.get(key);
if (metadata == null) return;

let filename = path.join(this.#dirname, metadata.file);
let { filePath, metaPath } = this.#getFilePaths(key);

try {
await fsp.unlink(filename);
let metadataContent = await fsp.readFile(metaPath, 'utf-8');
let metadata: FileMetadata = JSON.parse(metadataContent);

return openFile(filePath, {
name: metadata.name,
type: metadata.type,
lastModified: metadata.mtime,
});
} catch (error) {
if (!isNoEntityError(error)) {
throw error;
}
}

await this.#metadata.remove(key);
}
}

async function storeFile(dirname: string, file: File): Promise<string> {
let filename = randomFilename();

let handle: fsp.FileHandle;
try {
handle = await fsp.open(path.join(dirname, filename), 'w');
} catch (error) {
if ((error as NodeJS.ErrnoException).code === 'EEXIST') {
// Try again with a different filename
return storeFile(dirname, file);
} else {
throw error;
return null;
}
}

await writeFile(handle, file);

return filename;
}

function randomFilename(): string {
return `${new Date().getTime().toString(36)}.${Math.random().toString(36).slice(2, 6)}`;
}

interface FileMetadata {
file: string;
name: string;
type: string;
mtime: number;
}

class FileMetadataIndex {
#path: string;

constructor(path: string) {
this.#path = path;
}
async remove(key: string): Promise<void> {
let { filePath, metaPath } = this.#getFilePaths(key);

async #getAll(): Promise<Record<string, FileMetadata>> {
try {
return JSON.parse(await openFile(this.#path).text());
await Promise.all([fsp.unlink(filePath), fsp.unlink(metaPath)]);
} catch (error) {
if (!isNoEntityError(error)) {
throw error;
}

return {};
}
}

async #save(info: Record<string, FileMetadata | undefined>): Promise<void> {
await fsp.writeFile(this.#path, JSON.stringify(info));
}

async has(key: string): Promise<boolean> {
let info = await this.#getAll();
return key in info;
}

async set(key: string, metadata: FileMetadata): Promise<void> {
let info = await this.#getAll();
await this.#save({ ...info, [key]: metadata });
}

async get(key: string): Promise<FileMetadata | null> {
let info = await this.#getAll();
return info[key] ?? null;
#getFilePaths(key: string): { directory: string; filePath: string; metaPath: string } {
let hash = crypto.createHash('sha256').update(key).digest('hex');
let shardDir = hash.slice(0, 8);
let directory = path.join(this.#dirname, shardDir);
let filename = `${hash}.bin`;
let metaname = `${hash}.meta.json`;

return {
directory,
filePath: path.join(directory, filename),
metaPath: path.join(directory, metaname),
};
}
}

async remove(key: string): Promise<void> {
let info = await this.#getAll();
await this.#save({ ...info, [key]: undefined });
}
interface FileMetadata {
name: string;
type: string;
mtime: number;
}

function isNoEntityError(obj: unknown): obj is NodeJS.ErrnoException & { code: 'ENOENT' } {
Expand Down

0 comments on commit 6281172

Please sign in to comment.