1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123
| import pandas as pd import os import shutil import sys import math
participant_table_path = 'Participant_table.csv'
filename_column = 'T1 structural brain images - NIFTI | Instance 2'
source_base_dir = '/mnt/project/Bulk/Brain MRI/T1/'
destination_base_dir = './T1_NIFTI_Batched'
batch_size = 100
file_delimiter=','
try: os.makedirs(destination_base_dir, exist_ok=True) print(f"基础目标目录已创建或已存在: {destination_base_dir}") except OSError as e: print(f"错误:无法创建基础目标目录 {destination_base_dir}: {e}", file=sys.stderr) sys.exit(1)
try: print(f"正在读取参与者表格: {participant_table_path}") df = pd.read_csv(participant_table_path, delimiter=file_delimiter, dtype=str) if filename_column not in df.columns: print(f"错误: 在文件 {participant_table_path} 中未找到列 '{filename_column}'", file=sys.stderr) print(f"文件中包含的列有: {list(df.columns)}", file=sys.stderr) print("请检查并更新脚本中的 'filename_column' 变量。", file=sys.stderr) sys.exit(1) target_files = set(df[filename_column].dropna().unique()) print(f"从表格中找到 {len(target_files)} 个唯一的目标 .zip 文件名。") if not target_files: print("警告:在指定列中未找到任何目标文件名。请检查列名和文件内容。") sys.exit(0) except FileNotFoundError: print(f"错误: 参与者表格文件未找到,路径: {participant_table_path}", file=sys.stderr) print("请检查并更新脚本中的 'participant_table_path' 变量。", file=sys.stderr) sys.exit(1) except Exception as e: print(f"错误:读取参与者表格 {participant_table_path} 时出错: {e}", file=sys.stderr) print(f"请检查您的文件分隔符 '{file_delimiter}' 是否设置正确。", file=sys.stderr) sys.exit(1)
files_copied_count = 0
files_processed = set()
print(f"\n正在搜索目标文件于: {source_base_dir} 及其子目录...") try: found_file_paths = {} for root, dirs, files_in_current_dir in os.walk(source_base_dir): for file_name in files_in_current_dir: if file_name in target_files: if file_name not in found_file_paths: found_file_paths[file_name] = os.path.join(root, file_name)
print(f"在源目录中初步定位到 {len(found_file_paths)} 个目标文件。开始复制并分批...")
sorted_target_files_found = sorted(list(found_file_paths.keys()))
for filename_to_copy in sorted_target_files_found: batch_index = (files_copied_count // batch_size) + 1 current_batch_subdir_name = f"batch_{batch_index:03d}" current_batch_dir = os.path.join(destination_base_dir, current_batch_subdir_name)
os.makedirs(current_batch_dir, exist_ok=True)
source_path = found_file_paths[filename_to_copy] dest_path = os.path.join(current_batch_dir, filename_to_copy)
try: print(f" 目标文件: {filename_to_copy}") print(f" 归入批次: {current_batch_subdir_name}") print(f" 正在复制...") shutil.copy2(source_path, dest_path) files_copied_count += 1 files_processed.add(filename_to_copy) print(f" 成功复制 {filename_to_copy} 到 {current_batch_subdir_name}") except Exception as e: print(f" 错误:复制文件 {filename_to_copy} 时出错: {e}", file=sys.stderr)
except Exception as e: print(f"在目录遍历或文件处理过程中发生严重错误: {e}", file=sys.stderr)
|