A management command to check for missing static files?

Hi everyone,

I use ManifestStaticFilesStorage (in strict mode) in production but not in development.

This means that sometimes when deploying and running collectstatic, I get errors (ValueError) if I have CSS files referencing other missing files (like an image or another CSS file).

Are there tools (ideally a management command) that I could run in my CI pipeline (I use github actions) to check for those errors ahead of time.
Basically I want the missing file check but not the hashing part of ManifestStaticFilesStorage.

Thanks :sparkles:

For anyone running into the same issue, here’s the custom command I came up with after reading the implementation of Django’s collectstatic command and ManifestStaticFilesStorage (those two are pretty heavily tied together as it turned out):

from dataclasses import dataclass
from functools import wraps
import posixpath
from urllib.parse import urldefrag

from django.contrib.staticfiles.finders import get_finders
from django.contrib.staticfiles.storage import ManifestStaticFilesStorage, StaticFilesStorage
from django.contrib.staticfiles.utils import matches_patterns
from django.core.exceptions import ImproperlyConfigured
from django.core.management import BaseCommand, CommandError
from django.utils.functional import cached_property


class _PassthroughDict:
    """
    A fake dictionnary that always returns the requested key as the value.

    In other words, `d[k]` will always give you `k` (same with `d.get(k)`).

    Used as a pretend cache for ManifestStaticFilesStorage.url_converter().
    """
    def __getitem__(self, key):
        return key

    def get(self, key, default=None):
        return key


def deduplicate(generator_fn):
    """
    A decorator for generator functions that deduplicates the resulting
    generator.
    """
    @wraps(generator_fn)
    def wrapped_generator_fn(*args, **kwargs):
        seen = set()
        results = generator_fn(*args, **kwargs)
        for result in results:
            if result in seen:
                continue
            yield result
            seen.add(result)

    return wrapped_generator_fn


@dataclass
class StaticFile:
    storage: StaticFilesStorage
    path: str

    @property
    def actual_path(self):
        """
        The actual path of the file, relative to the current working directory
        """
        return posixpath.relpath(self.storage.path(self.path))

    @cached_property
    def content(self):
        """
        The content (str) of the file, assumed to be utf8-encoded
        """
        with self.storage.open(self.path) as f:
            return f.read().decode('utf8')

    @property
    @deduplicate
    def references(self):
        """
        Generate the paths of other files referenced in the current file
        """
        for extension, patterns in self._static_storage._patterns.items():
            if not matches_patterns(self.path, [extension]):
                continue

            for pattern, _ in patterns:
                yield from self._gen_references_for_pattern(pattern)

    @property
    def _static_storage(self):
        # Django's original ManifestStaticFilesStorage is used here for two things:
        #   1) Its list of patterns to find references in CSS files
        #   2) Its `url_converter()` method that can convert a match object
        #      into a path
        return ManifestStaticFilesStorage(location='.', base_url='/static/')

    def _relpath(self, other):
        """
        The path of the given file relative to the current file
        """
        reference_dir = posixpath.dirname(self.path)
        relative_path = posixpath.join(reference_dir, other)
        return posixpath.normpath(relative_path)

    def _get_path_from_matchobj(self, matchobj):
        """
        Convert the given match object (from re.finditer) into a path
        """
        # We don't care about any the hashing part of ManifestStaticFilesStorage
        # so we use a pass-through cache as the second argument here.
        conversion_fn = self._static_storage.url_converter(self.path, _PassthroughDict(), '%s')
        path = conversion_fn(matchobj)
        path, _ = urldefrag(path)
        if not path:  # Some CSS files have weird urls like `url(#something)`, we ignore those
            return None
        return self._relpath(path)

    def _gen_references_for_pattern(self, pattern):
        """
        Generate the paths of references matching the given pattern (compiled regex)
        in the current file
        """
        matches = pattern.finditer(self.content)
        paths = map(self._get_path_from_matchobj, matches)
        yield from filter(None, paths)  # remove empty paths


def gen_all_static_files(ignore_patterns=()):
    """
    Generate all the project's static files (as `StaticFile` instances)
    """
    for finder in get_finders():
        for path, storage in finder.list(ignore_patterns):
            if getattr(storage, 'prefix', None):
                raise ImproperlyConfigured("checkstatic does not support prefixes in STATICFILES_DIRS")

            yield StaticFile(path=path, storage=storage)


class Command(BaseCommand):
    help = "Check the project's static files for possible errors"

    def handle(self, **options):
        files = list(gen_all_static_files())

        self._error_happened = False

        self._check_duplicates(files)
        self._check_missing_reference(files)

        if self._error_happened:
            raise CommandError("checkstatic found some issues")
        else:
            self.stdout.write(self.style.SUCCESS("No issues found, good job"))

    def _log_error(self, message):
        self._error_happened = True
        self.stderr.write(message)

    def _check_duplicates(self, files):
        seen = {}
        for f in files:
            if f.path in seen:
                self._log_error(f"DUPLICATE NAMES {f.actual_path!r} and {seen[f.path].actual_path!r}")
            seen[f.path] = f

    def _check_missing_reference(self, files):
        known_paths = {f.path for f in files}
        references = ((f, ref) for f in files for ref in f.references)

        for f, reference in references:
            if reference not in known_paths:
                self._log_error(f"MISSING REFERENCE {reference!r} in file {f.actual_path!r}")

It can probably be improved but for now it fits my needs:

  • it detects duplicated names (I don’t use any static files overrides in my project so this check is useful in preventing accidental overwrites of static files)
  • it detects missing references in static files (using the same logic as collectstatic, so just CSS files for now)
2 Likes