import zipfile import os import json def unzip_file(zip_path, extract_dir): """ 解压zip文件到指定目录,并在指定目录下创建一个新的目录存放解压后的文件 参数: zip_path (str): zip压缩包的地址 extract_dir (str): 指定解压的目录 返回: str: 解压后的路径 """ if not os.path.exists(extract_dir): os.makedirs(extract_dir) base_name = os.path.basename(zip_path) dir_name = os.path.splitext(base_name)[0] new_extract_dir = os.path.join(extract_dir, dir_name) if not os.path.exists(new_extract_dir): os.makedirs(new_extract_dir) with zipfile.ZipFile(zip_path, 'r') as zip_ref: zip_ref.extractall(new_extract_dir) return new_extract_dir def get_project_files_with_content(project_dir): """ 获取项目目录下所有文件的相对路径和内容 参数: project_dir (str): 项目目录地址 返回: list: 包含字典的列表,每个字典包含文件的相对路径和内容 """ files_list = [] for root, dirs, files in os.walk(project_dir): for file in files: if filter_data(file): file_path = os.path.join(root, file) relative_path = os.path.relpath(file_path, project_dir) if "__MACOSX" in relative_path: continue with open(file_path, 'r', encoding='utf-8', errors='ignore') as f: content = f.read() files_list.append({'path': relative_path, 'content': content}) else: continue return files_list def filter_data(obj): LANGUAGE_TAG = { "c++" : "// C++", "cpp" : "// C++", "c" : "// C", "c#" : "// C#", "c-sharp" : "// C#", "css" : "/* CSS */", "cuda" : "// Cuda", "fortran" : "! Fortran", "go" : "// Go", "html" : "", "java" : "// Java", "js" : "// JavaScript", "javascript" : "// JavaScript", "kotlin" : "// Kotlin", "lean" : "-- Lean", "lua" : "-- Lua", "objectivec" : "// Objective-C", "objective-c" : "// Objective-C", "objective-c++": "// Objective-C++", "pascal" : "// Pascal", "php" : "// PHP", "python" : "# Python", "r" : "# R", "rust" : "// Rust", "ruby" : "# Ruby", "scala" : "// Scala", "shell" : "# Shell", "sql" : "-- SQL", "tex" : f"% TeX", "typescript" : "// TypeScript", "vue" : "", "assembly" : "; Assembly", "dart" : "// Dart", "perl" : "# Perl", "prolog" : f"% Prolog", "swift" : "// swift", "lisp" : "; Lisp", "vb" : "' Visual Basic", "visual basic" : "' Visual Basic", "matlab" : f"% Matlab", "delphi" : "{ Delphi }", "scheme" : "; Scheme", "basic" : "' Basic", "assembly" : "; Assembly", "groovy" : "// Groovy", "abap" : "* Abap", "gdscript" : "# GDScript", "haskell" : "-- Haskell", "julia" : "# Julia", "elixir" : "# Elixir", "excel" : "' Excel", "clojure" : "; Clojure", "actionscript" : "// ActionScript", "solidity" : "// Solidity", "powershell" : "# PowerShell", "erlang" : f"% Erlang", "cobol" : "// Cobol", "batchfile" : ":: Batch file", "makefile" : "# Makefile", "dockerfile" : "# Dockerfile", "markdown" : "", "cmake" : "# CMake", "dockerfile" : "# Dockerfile", } programming_languages_to_file_extensions = json.load(open('utils/programming-languages-to-file-extensions.json')) need2del = [] for key in programming_languages_to_file_extensions.keys(): if key.lower() not in LANGUAGE_TAG: need2del.append(key) for key in need2del: del programming_languages_to_file_extensions[key] ext_to_programming_languages = {} want_languages = [] for key in programming_languages_to_file_extensions: for item in programming_languages_to_file_extensions[key]: ext_to_programming_languages[item] = key want_languages.append(item) ext = '.'+obj.split('.')[-1] with open('utils/keep.txt', 'r') as f: keep_files = f.readlines() keep_files = [l.strip() for l in keep_files] #print(ext) if ext not in want_languages: if obj in keep_files: return True return False else: return True