21 – Real-World Python Projects – File Organizer (Advanced)

🎯 Project Objective

To create an advanced File Organizer that automatically scans folders, classifies files by type, renames duplicates, logs actions, and can run on a schedule for continuous cleanup.


🧩 Skills You’ll Learn

  • File handling with os and shutil
  • Pattern matching using fnmatch and re
  • Logging and reporting
  • Exception handling and automation
  • Scheduling cleanups with schedule
  • Building modular, maintainable code

βš™οΈ Technology Stack

LibraryPurpose
os, shutilFile and directory operations
re, fnmatchPattern matching for file extensions
time, datetimeScheduling and timestamps
loggingTracking all actions performed
pandas (optional)Generating CSV reports
schedule (optional)Automate periodic cleanups

πŸ”§ Auto-Install Dependencies

import subprocess, sys

def install(package):
    try:
        __import__(package)
    except ImportError:
        subprocess.check_call([sys.executable, "-m", "pip", "install", package])

for pkg in ["pandas", "schedule"]:
    install(pkg)

πŸ“‚ Step 1 β€” Directory Setup

Create categorized folders automatically (if not present):

import os

TARGET_DIR = "C:/Users/YourName/Downloads"
CATEGORIES = {
    "Images": [".jpg", ".jpeg", ".png", ".gif", ".bmp"],
    "Documents": [".pdf", ".docx", ".txt", ".xlsx", ".pptx"],
    "Videos": [".mp4", ".mov", ".avi", ".mkv"],
    "Music": [".mp3", ".wav", ".aac"],
    "Archives": [".zip", ".rar", ".tar"],
    "Programs": [".exe", ".msi"],
    "Scripts": [".py", ".js", ".html", ".css"]
}

for folder in CATEGORIES.keys():
    path = os.path.join(TARGET_DIR, folder)
    if not os.path.exists(path):
        os.makedirs(path)

βœ… Creates subfolders like β€œImages”, β€œDocuments”, etc.


βš™οΈ Step 2 β€” Smart File Sorting Function

import shutil, re
from datetime import datetime

def organize_files():
    log_entries = []
    for file_name in os.listdir(TARGET_DIR):
        file_path = os.path.join(TARGET_DIR, file_name)
        if os.path.isfile(file_path):
            ext = os.path.splitext(file_name)[1].lower()
            moved = False

            for category, extensions in CATEGORIES.items():
                if ext in extensions:
                    dest = os.path.join(TARGET_DIR, category)
                    new_name = rename_if_duplicate(dest, file_name)
                    shutil.move(file_path, os.path.join(dest, new_name))
                    moved = True
                    log_entries.append((file_name, category, datetime.now()))
                    break

            if not moved:
                log_entries.append((file_name, "Uncategorized", datetime.now()))
    
    return log_entries

🧩 Step 3 β€” Handle Duplicate File Names

def rename_if_duplicate(dest, file_name):
    base, ext = os.path.splitext(file_name)
    counter = 1
    new_name = file_name

    while os.path.exists(os.path.join(dest, new_name)):
        new_name = f"{base}_{counter}{ext}"
        counter += 1
    return new_name

βœ… Automatically renames duplicates like photo_1.jpg, photo_2.jpg, etc.


🧾 Step 4 β€” Logging Actions

import logging

logging.basicConfig(
    filename="file_organizer.log",
    level=logging.INFO,
    format="%(asctime)s - %(levelname)s - %(message)s"
)

def log_activity(entries):
    for file, category, timestamp in entries:
        logging.info(f"Moved: {file} β†’ {category}")

πŸ“Š Step 5 β€” Generate CSV Report

import pandas as pd

def export_report(entries):
    df = pd.DataFrame(entries, columns=["File", "Category", "Timestamp"])
    df.to_csv("organizer_report.csv", index=False)
    print("βœ… Report saved as 'organizer_report.csv'")

πŸ” Step 6 β€” Automate Organization Periodically

import schedule, time

def job():
    print("Running file organizer...")
    entries = organize_files()
    log_activity(entries)
    export_report(entries)

# Run every 10 minutes
schedule.every(10).minutes.do(job)

while True:
    schedule.run_pending()
    time.sleep(1)

βœ… Your computer now auto-organizes files every 10 minutes.


🧱 Full Combined Code (Advanced Organizer)

import os, shutil, logging, pandas as pd, schedule, time
from datetime import datetime

TARGET_DIR = "C:/Users/YourName/Downloads"
CATEGORIES = {
    "Images": [".jpg", ".jpeg", ".png", ".gif", ".bmp"],
    "Documents": [".pdf", ".docx", ".txt", ".xlsx", ".pptx"],
    "Videos": [".mp4", ".mov", ".avi", ".mkv"],
    "Music": [".mp3", ".wav", ".aac"],
    "Archives": [".zip", ".rar", ".tar"],
    "Programs": [".exe", ".msi"],
    "Scripts": [".py", ".js", ".html", ".css"]
}

for folder in CATEGORIES.keys():
    os.makedirs(os.path.join(TARGET_DIR, folder), exist_ok=True)

logging.basicConfig(filename="file_organizer.log", level=logging.INFO, format="%(asctime)s - %(message)s")

def rename_if_duplicate(dest, file_name):
    base, ext = os.path.splitext(file_name)
    counter = 1
    new_name = file_name
    while os.path.exists(os.path.join(dest, new_name)):
        new_name = f"{base}_{counter}{ext}"
        counter += 1
    return new_name

def organize_files():
    log_entries = []
    for file_name in os.listdir(TARGET_DIR):
        file_path = os.path.join(TARGET_DIR, file_name)
        if os.path.isfile(file_path):
            ext = os.path.splitext(file_name)[1].lower()
            moved = False
            for category, extensions in CATEGORIES.items():
                if ext in extensions:
                    dest = os.path.join(TARGET_DIR, category)
                    new_name = rename_if_duplicate(dest, file_name)
                    shutil.move(file_path, os.path.join(dest, new_name))
                    log_entries.append((file_name, category, datetime.now()))
                    moved = True
                    break
            if not moved:
                log_entries.append((file_name, "Uncategorized", datetime.now()))
    return log_entries

def log_activity(entries):
    for file, category, timestamp in entries:
        logging.info(f"Moved {file} β†’ {category}")

def export_report(entries):
    df = pd.DataFrame(entries, columns=["File", "Category", "Timestamp"])
    df.to_csv("organizer_report.csv", index=False)

def job():
    entries = organize_files()
    log_activity(entries)
    export_report(entries)
    print(f"βœ… Organized at {datetime.now()}")

schedule.every(10).minutes.do(job)

while True:
    schedule.run_pending()
    time.sleep(1)

πŸ’‘ Optional Enhancements

FeatureDescription
πŸ—‘οΈ Trash ManagerMove deleted files to a β€œRecycle Bin”
πŸ” Duplicate DetectorUse file hashes to detect duplicates
🧠 AI File SortingUse NLP to auto-categorize based on filenames
🧰 GUI InterfaceAdd a Tkinter dashboard for manual control
☁️ Cloud SyncAuto-sync organized files to Google Drive / Dropbox

🧠 Learning Outcomes

βœ… File handling and directory traversal
βœ… Automated file management
βœ… Logging and reporting
βœ… Scheduling with Python
βœ… Writing professional, modular code


Comments

Leave a Reply

Your email address will not be published. Required fields are marked *