#21114 Allow specifying exclude directories for Data Sources

This commit is contained in:
Arthur
2026-03-10 08:46:47 -07:00
parent b5bd8905ca
commit 83c6149e49
3 changed files with 18 additions and 14 deletions

View File

@@ -36,13 +36,16 @@ If false, synchronization will be disabled.
### Ignore Rules ### Ignore Rules
A set of rules (one per line) identifying filenames to ignore during synchronization. Some examples are provided below. See Python's [`fnmatch()` documentation](https://docs.python.org/3/library/fnmatch.html) for a complete reference. A set of rules (one per line) identifying files or paths to ignore during synchronization. Rules are matched against both the full relative path (e.g. `subdir/file.txt`) and the bare filename, so path-based patterns can be used to exclude entire directories. Some examples are provided below. See Python's [`fnmatch()` documentation](https://docs.python.org/3/library/fnmatch.html) for a complete reference.
| Rule | Description | | Rule | Description |
|----------------|------------------------------------------| |-----------------------|------------------------------------------------------|
| `README` | Ignore any files named `README` | | `README` | Ignore any files named `README` |
| `*.txt` | Ignore any files with a `.txt` extension | | `*.txt` | Ignore any files with a `.txt` extension |
| `data???.json` | Ignore e.g. `data123.json` | | `data???.json` | Ignore e.g. `data123.json` |
| `subdir/*` | Ignore all files directly within `subdir/` |
| `subdir/*/*` | Ignore all files one level deep within `subdir/` |
| `*/dev/*` | Ignore files inside any directory named `dev/` |
### Sync Interval ### Sync Interval

View File

@@ -43,7 +43,7 @@ class DataSourceForm(PrimaryModelForm):
attrs={ attrs={
'rows': 5, 'rows': 5,
'class': 'font-monospace', 'class': 'font-monospace',
'placeholder': '.cache\n*.txt' 'placeholder': '.cache\n*.txt\nsubdir/*'
} }
), ),
} }

View File

@@ -69,7 +69,7 @@ class DataSource(JobsMixin, PrimaryModel):
ignore_rules = models.TextField( ignore_rules = models.TextField(
verbose_name=_('ignore rules'), verbose_name=_('ignore rules'),
blank=True, blank=True,
help_text=_("Patterns (one per line) matching files to ignore when syncing") help_text=_("Patterns (one per line) matching files or paths to ignore when syncing")
) )
parameters = models.JSONField( parameters = models.JSONField(
verbose_name=_('parameters'), verbose_name=_('parameters'),
@@ -258,21 +258,22 @@ class DataSource(JobsMixin, PrimaryModel):
if path.startswith('.'): if path.startswith('.'):
continue continue
for file_name in file_names: for file_name in file_names:
if not self._ignore(file_name): file_path = os.path.join(path, file_name)
paths.add(os.path.join(path, file_name)) if not self._ignore(file_path):
paths.add(file_path)
logger.debug(f"Found {len(paths)} files") logger.debug(f"Found {len(paths)} files")
return paths return paths
def _ignore(self, filename): def _ignore(self, file_path):
""" """
Returns a boolean indicating whether the file should be ignored per the DataSource's configured Returns a boolean indicating whether the file should be ignored per the DataSource's configured
ignore rules. ignore rules. file_path is the full relative path (e.g. "subdir/file.txt").
""" """
if filename.startswith('.'): if os.path.basename(file_path).startswith('.'):
return True return True
for rule in self.ignore_rules.splitlines(): for rule in self.ignore_rules.splitlines():
if fnmatchcase(filename, rule): if fnmatchcase(file_path, rule) or fnmatchcase(os.path.basename(file_path), rule):
return True return True
return False return False