From 0e4a4e3c4d602691ce1b4fc6a721c5c637ead57f Mon Sep 17 00:00:00 2001 From: Thaddeus Crews Date: Sun, 13 Oct 2024 13:59:33 -0500 Subject: [PATCH] SCons: Improve cache purging logic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit • Implement caching via SCons arguments, rather than environment variables --- .github/actions/godot-build/action.yml | 10 +- .../actions/godot-cache-restore/action.yml | 3 +- .github/actions/godot-cache-save/action.yml | 2 +- .github/workflows/godot_cpp_test.yml | 3 - .gitignore | 6 +- SConstruct | 16 +- methods.py | 266 +++++++++--------- 7 files changed, 151 insertions(+), 155 deletions(-) diff --git a/.github/actions/godot-build/action.yml b/.github/actions/godot-build/action.yml index 93d6f076b72..ebc301dd0f4 100644 --- a/.github/actions/godot-build/action.yml +++ b/.github/actions/godot-build/action.yml @@ -18,12 +18,12 @@ inputs: required: false scons-cache: description: The SCons cache path. - default: ${{ github.workspace }}/.scons-cache/ + default: ${{ github.workspace }}/.scons_cache/ scons-cache-limit: description: The SCons cache size limit. # actions/cache has 10 GiB limit, and GitHub runners have a 14 GiB disk. # Limit to 7 GiB to avoid having the extracted cache fill the disk. - default: 7168 + default: 7 runs: using: composite @@ -32,10 +32,8 @@ runs: shell: sh env: SCONSFLAGS: ${{ inputs.sconsflags }} - SCONS_CACHE: ${{ inputs.scons-cache }} - SCONS_CACHE_LIMIT: ${{ inputs.scons-cache-limit }} run: | - echo "Building with flags:" platform=${{ inputs.platform }} target=${{ inputs.target }} tests=${{ inputs.tests }} ${{ env.SCONSFLAGS }} + echo "Building with flags:" platform=${{ inputs.platform }} target=${{ inputs.target }} tests=${{ inputs.tests }} ${{ env.SCONSFLAGS }} "cache_path=${{ inputs.scons-cache }}" cache_limit=${{ inputs.scons-cache-limit }} if [ "${{ inputs.target }}" != "editor" ]; then # Ensure we don't include editor code in export template builds. @@ -49,5 +47,5 @@ runs: export BUILD_NAME="gh" fi - scons platform=${{ inputs.platform }} target=${{ inputs.target }} tests=${{ inputs.tests }} ${{ env.SCONSFLAGS }} + scons platform=${{ inputs.platform }} target=${{ inputs.target }} tests=${{ inputs.tests }} ${{ env.SCONSFLAGS }} "cache_path=${{ inputs.scons-cache }}" cache_limit=${{ inputs.scons-cache-limit }} ls -l bin/ diff --git a/.github/actions/godot-cache-restore/action.yml b/.github/actions/godot-cache-restore/action.yml index 7abec20a282..e2a1b970191 100644 --- a/.github/actions/godot-cache-restore/action.yml +++ b/.github/actions/godot-cache-restore/action.yml @@ -6,7 +6,7 @@ inputs: default: ${{ github.job }} scons-cache: description: The SCons cache path. - default: ${{ github.workspace }}/.scons-cache/ + default: ${{ github.workspace }}/.scons_cache/ runs: using: composite @@ -29,7 +29,6 @@ runs: # 4. A partial match for the same base branch only (not ideal, matches any PR with the same base branch). restore-keys: | - ${{ inputs.cache-name }}-${{ env.GODOT_BASE_BRANCH }}-${{ github.ref }}-${{ github.sha }} ${{ inputs.cache-name }}-${{ env.GODOT_BASE_BRANCH }}-${{ github.ref }} ${{ inputs.cache-name }}-${{ env.GODOT_BASE_BRANCH }}-refs/heads/${{ env.GODOT_BASE_BRANCH }} ${{ inputs.cache-name }}-${{ env.GODOT_BASE_BRANCH }} diff --git a/.github/actions/godot-cache-save/action.yml b/.github/actions/godot-cache-save/action.yml index df877cec67b..42aa836406f 100644 --- a/.github/actions/godot-cache-save/action.yml +++ b/.github/actions/godot-cache-save/action.yml @@ -6,7 +6,7 @@ inputs: default: ${{ github.job }} scons-cache: description: The SCons cache path. - default: ${{ github.workspace }}/.scons-cache/ + default: ${{ github.workspace }}/.scons_cache/ runs: using: composite diff --git a/.github/workflows/godot_cpp_test.yml b/.github/workflows/godot_cpp_test.yml index dc82a7cb3c4..af99a4b0357 100644 --- a/.github/workflows/godot_cpp_test.yml +++ b/.github/workflows/godot_cpp_test.yml @@ -52,9 +52,6 @@ jobs: # continue-on-error: true - name: Build godot-cpp test extension - env: # Keep synced with godot-build. - SCONS_CACHE: ${{ github.workspace }}/.scons-cache/ - SCONS_CACHE_LIMIT: 7168 run: scons --directory=./godot-cpp/test target=template_debug dev_build=yes verbose=yes # - name: Save Godot build cache diff --git a/.gitignore b/.gitignore index 32a43b8c637..0dcf79c4604 100644 --- a/.gitignore +++ b/.gitignore @@ -77,6 +77,9 @@ venv __pycache__/ *.pyc +# Python modules +.*_cache/ + # Documentation doc/_build/ @@ -164,9 +167,6 @@ gmon.out # Kdevelop *.kdev4 -# Mypy -.mypy_cache - # Qt Creator *.config *.creator diff --git a/SConstruct b/SConstruct index 5a3a8f49ebd..405e87c27a4 100644 --- a/SConstruct +++ b/SConstruct @@ -271,6 +271,8 @@ opts.Add(BoolVariable("scu_build", "Use single compilation unit build", False)) opts.Add("scu_limit", "Max includes per SCU file when using scu_build (determines RAM use)", "0") opts.Add(BoolVariable("engine_update_check", "Enable engine update checks in the Project Manager", True)) opts.Add(BoolVariable("steamapi", "Enable minimal SteamAPI integration for usage time tracking (editor only)", False)) +opts.Add("cache_path", "Path to a directory where SCons cache files will be stored. No value disables the cache.", "") +opts.Add("cache_limit", "Max size (in GiB) for the SCons cache. 0 means no limit.", "0") # Thirdparty libraries opts.Add(BoolVariable("builtin_brotli", "Use the built-in Brotli library", True)) @@ -321,6 +323,9 @@ opts.Add("rcflags", "Custom flags for Windows resource compiler") # in following code (especially platform and custom_modules). opts.Update(env) +# Setup caching logic early to catch everything. +methods.prepare_cache(env) + # Copy custom environment variables if set. if env["import_env_vars"]: for env_var in str(env["import_env_vars"]).split(","): @@ -354,7 +359,9 @@ if env["platform"] == "": if env["platform"] in compatibility_platform_aliases: alias = env["platform"] platform = compatibility_platform_aliases[alias] - print_warning(f'Platform "{alias}" has been renamed to "{platform}" in Godot 4. Building for platform "{platform}".') + print_warning( + f'Platform "{alias}" has been renamed to "{platform}" in Godot 4. Building for platform "{platform}".' + ) env["platform"] = platform # Alias for convenience. @@ -1039,11 +1046,6 @@ GLSL_BUILDERS = { } env.Append(BUILDERS=GLSL_BUILDERS) -scons_cache_path = os.environ.get("SCONS_CACHE") -if scons_cache_path is not None: - CacheDir(scons_cache_path) - print("Scons cache enabled... (path: '" + scons_cache_path + "')") - if env["compiledb"]: env.Tool("compilation_db") env.Alias("compiledb", env.CompilationDatabase()) @@ -1126,5 +1128,3 @@ def purge_flaky_files(): atexit.register(purge_flaky_files) - -methods.clean_cache(env) diff --git a/methods.py b/methods.py index d89185f5858..73b49775df4 100644 --- a/methods.py +++ b/methods.py @@ -1,5 +1,7 @@ +import atexit import contextlib import glob +import math import os import re import subprocess @@ -8,7 +10,7 @@ from collections import OrderedDict from enum import Enum from io import StringIO, TextIOWrapper from pathlib import Path -from typing import Generator, List, Optional, Union +from typing import Generator, List, Optional, Union, cast # Get the "Godot" folder name ahead of time base_folder_path = str(os.path.abspath(Path(__file__).parent)) + "/" @@ -784,159 +786,159 @@ def using_emcc(env): def show_progress(env): - if env["ninja"]: - # Has its own progress/tracking tool that clashes with ours + # Progress reporting is not available in non-TTY environments since it messes with the output + # (for example, when writing to a file). Ninja has its own progress/tracking tool that clashes + # with ours. + if not env["progress"] or not sys.stdout.isatty() or env["ninja"]: return - import sys + NODE_COUNT_FILENAME = f"{base_folder_path}.scons_node_count" - from SCons.Script import AlwaysBuild, Command, Progress - - screen = sys.stdout - # Progress reporting is not available in non-TTY environments since it - # messes with the output (for example, when writing to a file) - show_progress = env["progress"] and sys.stdout.isatty() - node_count = 0 - node_count_max = 0 - node_count_interval = 1 - node_count_fname = str(env.Dir("#")) + "/.scons_node_count" - - import math - - class cache_progress: - # The default is 1 GB cache - def __init__(self, path=None, limit=pow(1024, 3)): - self.path = path - self.limit = limit - if env["verbose"] and path is not None: - screen.write( - "Current cache limit is {} (used: {})\n".format( - self.convert_size(limit), self.convert_size(self.get_size(path)) - ) - ) + class ShowProgress: + def __init__(self): + self.count = 0 + self.max = 0 + try: + with open(NODE_COUNT_FILENAME, "r", encoding="utf-8") as f: + self.max = int(f.readline()) + except OSError: + pass + if self.max == 0: + print("NOTE: Performing initial build, progress percentage unavailable!") def __call__(self, node, *args, **kw): - nonlocal node_count, node_count_max, node_count_interval, node_count_fname, show_progress - if show_progress: - # Print the progress percentage - node_count += node_count_interval - if node_count_max > 0 and node_count <= node_count_max: - screen.write("\r[%3d%%] " % (node_count * 100 / node_count_max)) - screen.flush() - elif node_count_max > 0 and node_count > node_count_max: - screen.write("\r[100%] ") - screen.flush() - else: - screen.write("\r[Initial build] ") - screen.flush() + self.count += 1 + if self.max != 0: + percent = int(min(self.count * 100 / self.max, 100)) + sys.stdout.write(f"\r[{percent:3d}%] ") + sys.stdout.flush() - def convert_size(self, size_bytes): - if size_bytes == 0: - return "0 bytes" - size_name = ("bytes", "KB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB") - i = int(math.floor(math.log(size_bytes, 1024))) - p = math.pow(1024, i) - s = round(size_bytes / p, 2) - return "%s %s" % (int(s) if i == 0 else s, size_name[i]) + from SCons.Script import Progress - def get_size(self, start_path="."): - total_size = 0 - for dirpath, dirnames, filenames in os.walk(start_path): - for f in filenames: - fp = os.path.join(dirpath, f) - total_size += os.path.getsize(fp) - return total_size + progressor = ShowProgress() + Progress(progressor) def progress_finish(target, source, env): - nonlocal node_count, progressor try: - with open(node_count_fname, "w", encoding="utf-8", newline="\n") as f: - f.write("%d\n" % node_count) - except Exception: + with open(NODE_COUNT_FILENAME, "w", encoding="utf-8", newline="\n") as f: + f.write(f"{progressor.count}\n") + except OSError: pass - try: - with open(node_count_fname, "r", encoding="utf-8") as f: - node_count_max = int(f.readline()) - except Exception: - pass - - cache_directory = os.environ.get("SCONS_CACHE") - # Simple cache pruning, attached to SCons' progress callback. Trim the - # cache directory to a size not larger than cache_limit. - cache_limit = float(os.getenv("SCONS_CACHE_LIMIT", 1024)) * 1024 * 1024 - progressor = cache_progress(cache_directory, cache_limit) - Progress(progressor, interval=node_count_interval) - - progress_finish_command = Command("progress_finish", [], progress_finish) - AlwaysBuild(progress_finish_command) + env.AlwaysBuild( + env.CommandNoCache( + "progress_finish", [], env.Action(progress_finish, "Building node count database .scons_node_count") + ) + ) -def clean_cache(env): - import atexit - import time +def convert_size(size_bytes: int) -> str: + if size_bytes == 0: + return "0 bytes" + SIZE_NAMES = ["bytes", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB"] + index = math.floor(math.log(size_bytes, 1024)) + power = math.pow(1024, index) + size = round(size_bytes / power, 2) + return f"{size} {SIZE_NAMES[index]}" - class cache_clean: - def __init__(self, path=None, limit=pow(1024, 3)): - self.path = path - self.limit = limit - def clean(self): - self.delete(self.file_list()) +def get_size(start_path: str = ".") -> int: + total_size = 0 + for dirpath, _, filenames in os.walk(start_path): + for file in filenames: + path = os.path.join(dirpath, file) + total_size += os.path.getsize(path) + return total_size - def delete(self, files): - if len(files) == 0: - return - if env["verbose"]: - # Utter something - print("Purging %d %s from cache..." % (len(files), "files" if len(files) > 1 else "file")) - [os.remove(f) for f in files] - def file_list(self): - if self.path is None: - # Nothing to do - return [] - # Gather a list of (filename, (size, atime)) within the - # cache directory - file_stat = [(x, os.stat(x)[6:8]) for x in glob.glob(os.path.join(self.path, "*", "*"))] - if file_stat == []: - # Nothing to do - return [] - # Weight the cache files by size (assumed to be roughly - # proportional to the recompilation time) times an exponential - # decay since the ctime, and return a list with the entries - # (filename, size, weight). - current_time = time.time() - file_stat = [(x[0], x[1][0], (current_time - x[1][1])) for x in file_stat] - # Sort by the most recently accessed files (most sensible to keep) first - file_stat.sort(key=lambda x: x[2]) - # Search for the first entry where the storage limit is - # reached - sum, mark = 0, None - for i, x in enumerate(file_stat): - sum += x[1] - if sum > self.limit: - mark = i - break - if mark is None: - return [] - else: - return [x[0] for x in file_stat[mark:]] +def clean_cache(cache_path: str, cache_limit: int, verbose: bool): + files = glob.glob(os.path.join(cache_path, "*", "*")) + if not files: + return - def cache_finally(): - nonlocal cleaner + # Remove all text files, store binary files in list of (filename, size, atime). + purge = [] + texts = [] + stats = [] + for file in files: + # Failing a utf-8 decode is the easiest way to determine if a file is binary. try: - cleaner.clean() - except Exception: - pass + with open(file, encoding="utf-8") as out: + out.read(1024) + except UnicodeDecodeError: + stats.append((file, *os.stat(file)[6:8])) + except OSError: + print_error(f'Failed to access cache file "{file}"; skipping.') + else: + texts.append(file) - cache_directory = os.environ.get("SCONS_CACHE") - # Simple cache pruning, attached to SCons' progress callback. Trim the - # cache directory to a size not larger than cache_limit. - cache_limit = float(os.getenv("SCONS_CACHE_LIMIT", 1024)) * 1024 * 1024 - cleaner = cache_clean(cache_directory, cache_limit) + if texts: + count = len(texts) + for file in texts: + try: + os.remove(file) + except OSError: + print_error(f'Failed to remove cache file "{file}"; skipping.') + count -= 1 + if verbose: + print("Purging %d text %s from cache..." % (count, "files" if count > 1 else "file")) - atexit.register(cache_finally) + if cache_limit: + # Sort by most recent access (most sensible to keep) first. Search for the first entry where + # the cache limit is reached. + stats.sort(key=lambda x: x[2], reverse=True) + sum = 0 + for index, stat in enumerate(stats): + sum += stat[1] + if sum > cache_limit: + purge.extend([x[0] for x in stats[index:]]) + break + + if purge: + count = len(purge) + for file in purge: + try: + os.remove(file) + except OSError: + print_error(f'Failed to remove cache file "{file}"; skipping.') + count -= 1 + if verbose: + print("Purging %d %s from cache..." % (count, "files" if count > 1 else "file")) + + +def prepare_cache(env) -> None: + if env.GetOption("clean"): + return + + if env["cache_path"]: + cache_path = cast(str, env["cache_path"]) + elif os.environ.get("SCONS_CACHE"): + print_warning("Environment variable `SCONS_CACHE` is deprecated; use `cache_path` argument instead.") + cache_path = cast(str, os.environ.get("SCONS_CACHE")) + + if not cache_path: + return + + env.CacheDir(cache_path) + print(f'SCons cache enabled... (path: "{cache_path}")') + + if env["cache_limit"]: + cache_limit = float(env["cache_limit"]) + elif os.environ.get("SCONS_CACHE_LIMIT"): + print_warning("Environment variable `SCONS_CACHE_LIMIT` is deprecated; use `cache_limit` argument instead.") + cache_limit = float(os.getenv("SCONS_CACHE_LIMIT", "0")) / 1024 # Old method used MiB, convert to GiB + + # Convert GiB to bytes; treat negative numbers as 0 (unlimited). + cache_limit = max(0, int(cache_limit * 1024 * 1024 * 1024)) + if env["verbose"]: + print( + "Current cache limit is {} (used: {})".format( + convert_size(cache_limit) if cache_limit else "∞", + convert_size(get_size(cache_path)), + ) + ) + + atexit.register(clean_cache, cache_path, cache_limit, env["verbose"]) def dump(env):