scr/duplicate_check.py

#!/usr/bin/env python

# Duplicate Check
# Version: 0.15.0

# Copyright 2025 Jake Winters
# SPDX-License-Identifier: BSD-3-Clause


import os
import hashlib
import argparse


parser = argparse.ArgumentParser(description='Scan directory for duplicate files and delete them.')
parser.add_argument('--dry-run', '-d', action='store_true', help='Detect duplicates without deletion.')
parser.add_argument('directory', type=str, help='The directory to scan for duplicate files.')
args = parser.parse_args()


def hash_file(file_path):
    sha256_hash = hashlib.sha256()
    with open(file_path, 'rb') as f:
        for byte_block in iter(lambda: f.read(65536), b''):
            sha256_hash.update(byte_block)
    return sha256_hash.hexdigest()

def find_and_delete_duplicates(directory, dry_run):
    os.chdir(directory)
    file_hashes = {}
    for file in os.listdir():
        file_path = os.path.abspath(file)
        if os.path.isfile(file_path):
            file_hash = hash_file(file_path)
            if file_hash in file_hashes:
                if not dry_run:
                    os.remove(file_path)
                print(f"Duplicate detected: {file_path}")
                if not dry_run:
                    print(f"Duplicate deleted: {file_path}")
            else:
                file_hashes[file_hash] = file_hash
    os.chdir('..')

find_and_delete_duplicates(args.directory, args.dry_run)