From 1c5b5ff52b0e61edef4da5079e5781b3b89a0709 Mon Sep 17 00:00:00 2001 From: inference Date: Wed, 25 Jun 2025 22:08:20 +0000 Subject: [PATCH] add: Duplicate Check --- duplicate_check.py | 45 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) create mode 100644 duplicate_check.py diff --git a/duplicate_check.py b/duplicate_check.py new file mode 100644 index 0000000..7196a03 --- /dev/null +++ b/duplicate_check.py @@ -0,0 +1,45 @@ +#!/usr/bin/env python + +# Duplicate Check +# Version: 1.0.0-beta.1 + +# Copyright 2025 Jake Winters +# SPDX-License-Identifier: BSD-3-Clause + + +import os +import hashlib +import argparse + + +parser = argparse.ArgumentParser(description='Scan directory for duplicate files and delete them.') +parser.add_argument('--dry-run', '-d', action='store_true', help='Detect duplicates without deletion.') +parser.add_argument('directory', type=str, help='The directory to scan for duplicate files.') +args = parser.parse_args() + + +def hash_file(file_path): + sha256_hash = hashlib.sha256() + with open(file_path, 'rb') as f: + for byte_block in iter(lambda: f.read(65536), b''): + sha256_hash.update(byte_block) + return sha256_hash.hexdigest() + +def find_and_delete_duplicates(directory, dry_run): + os.chdir(directory) + file_hashes = {} + for file in os.listdir(): + file_path = os.path.abspath(file) + if os.path.isfile(file_path): + file_hash = hash_file(file_path) + if file_hash in file_hashes: + if not dry_run: + os.remove(file_path) + print(f"Duplicate detected: {file_path}") + if not dry_run: + print(f"Duplicate deleted: {file_path}") + else: + file_hashes[file_hash] = file_hash + os.chdir('..') + +find_and_delete_duplicates(args.directory, args.dry_run)