mirror of
https://github.com/eitchtee/WYGIWYH.git
synced 2026-04-25 01:58:54 +02:00
@@ -92,6 +92,20 @@ class CSVImportSettings(BaseModel):
|
|||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
class ExcelImportSettings(BaseModel):
|
||||||
|
skip_errors: bool = Field(
|
||||||
|
default=False,
|
||||||
|
description="If True, errors during import will be logged and skipped",
|
||||||
|
)
|
||||||
|
file_type: Literal["xls", "xlsx"]
|
||||||
|
trigger_transaction_rules: bool = True
|
||||||
|
importing: Literal[
|
||||||
|
"transactions", "accounts", "currencies", "categories", "tags", "entities"
|
||||||
|
]
|
||||||
|
start_row: int = Field(default=1, description="Where your header is located")
|
||||||
|
sheets: list[str] | str = "*"
|
||||||
|
|
||||||
|
|
||||||
class ColumnMapping(BaseModel):
|
class ColumnMapping(BaseModel):
|
||||||
source: Optional[str] | Optional[list[str]] = Field(
|
source: Optional[str] | Optional[list[str]] = Field(
|
||||||
default=None,
|
default=None,
|
||||||
@@ -328,7 +342,7 @@ class CurrencyExchangeMapping(ColumnMapping):
|
|||||||
|
|
||||||
|
|
||||||
class ImportProfileSchema(BaseModel):
|
class ImportProfileSchema(BaseModel):
|
||||||
settings: CSVImportSettings
|
settings: CSVImportSettings | ExcelImportSettings
|
||||||
mapping: Dict[
|
mapping: Dict[
|
||||||
str,
|
str,
|
||||||
TransactionAccountMapping
|
TransactionAccountMapping
|
||||||
|
|||||||
@@ -3,13 +3,16 @@ import hashlib
|
|||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
from datetime import datetime
|
from datetime import datetime, date
|
||||||
from decimal import Decimal, InvalidOperation
|
from decimal import Decimal, InvalidOperation
|
||||||
from typing import Dict, Any, Literal, Union
|
from typing import Dict, Any, Literal, Union
|
||||||
|
|
||||||
|
import openpyxl
|
||||||
|
import xlrd
|
||||||
import yaml
|
import yaml
|
||||||
from cachalot.api import cachalot_disabled
|
from cachalot.api import cachalot_disabled
|
||||||
from django.utils import timezone
|
from django.utils import timezone
|
||||||
|
from openpyxl.utils.exceptions import InvalidFileException
|
||||||
|
|
||||||
from apps.accounts.models import Account, AccountGroup
|
from apps.accounts.models import Account, AccountGroup
|
||||||
from apps.currencies.models import Currency
|
from apps.currencies.models import Currency
|
||||||
@@ -39,7 +42,9 @@ class ImportService:
|
|||||||
self.import_run: ImportRun = import_run
|
self.import_run: ImportRun = import_run
|
||||||
self.profile: ImportProfile = import_run.profile
|
self.profile: ImportProfile = import_run.profile
|
||||||
self.config: version_1.ImportProfileSchema = self._load_config()
|
self.config: version_1.ImportProfileSchema = self._load_config()
|
||||||
self.settings: version_1.CSVImportSettings = self.config.settings
|
self.settings: version_1.CSVImportSettings | version_1.ExcelImportSettings = (
|
||||||
|
self.config.settings
|
||||||
|
)
|
||||||
self.deduplication: list[version_1.CompareDeduplicationRule] = (
|
self.deduplication: list[version_1.CompareDeduplicationRule] = (
|
||||||
self.config.deduplication
|
self.config.deduplication
|
||||||
)
|
)
|
||||||
@@ -74,6 +79,13 @@ class ImportService:
|
|||||||
self.import_run.logs += log_line
|
self.import_run.logs += log_line
|
||||||
self.import_run.save(update_fields=["logs"])
|
self.import_run.save(update_fields=["logs"])
|
||||||
|
|
||||||
|
if level == "info":
|
||||||
|
logger.info(log_line)
|
||||||
|
elif level == "warning":
|
||||||
|
logger.warning(log_line)
|
||||||
|
elif level == "error":
|
||||||
|
logger.error(log_line, exc_info=True)
|
||||||
|
|
||||||
def _update_totals(
|
def _update_totals(
|
||||||
self,
|
self,
|
||||||
field: Literal["total", "processed", "successful", "skipped", "failed"],
|
field: Literal["total", "processed", "successful", "skipped", "failed"],
|
||||||
@@ -163,6 +175,7 @@ class ImportService:
|
|||||||
transformed = transformed.replace(
|
transformed = transformed.replace(
|
||||||
transform.pattern, transform.replacement
|
transform.pattern, transform.replacement
|
||||||
)
|
)
|
||||||
|
|
||||||
elif transform.type == "regex":
|
elif transform.type == "regex":
|
||||||
if transform.exclusive:
|
if transform.exclusive:
|
||||||
transformed = re.sub(
|
transformed = re.sub(
|
||||||
@@ -172,10 +185,12 @@ class ImportService:
|
|||||||
transformed = re.sub(
|
transformed = re.sub(
|
||||||
transform.pattern, transform.replacement, transformed
|
transform.pattern, transform.replacement, transformed
|
||||||
)
|
)
|
||||||
|
|
||||||
elif transform.type == "date_format":
|
elif transform.type == "date_format":
|
||||||
transformed = datetime.strptime(
|
transformed = datetime.strptime(
|
||||||
transformed, transform.original_format
|
transformed, transform.original_format
|
||||||
).strftime(transform.new_format)
|
).strftime(transform.new_format)
|
||||||
|
|
||||||
elif transform.type == "merge":
|
elif transform.type == "merge":
|
||||||
values_to_merge = []
|
values_to_merge = []
|
||||||
for field in transform.fields:
|
for field in transform.fields:
|
||||||
@@ -188,12 +203,14 @@ class ImportService:
|
|||||||
):
|
):
|
||||||
values_to_merge.append(str(mapped_data[field[2:]]))
|
values_to_merge.append(str(mapped_data[field[2:]]))
|
||||||
transformed = transform.separator.join(values_to_merge)
|
transformed = transform.separator.join(values_to_merge)
|
||||||
|
|
||||||
elif transform.type == "split":
|
elif transform.type == "split":
|
||||||
parts = transformed.split(transform.separator)
|
parts = transformed.split(transform.separator)
|
||||||
if transform.index is not None:
|
if transform.index is not None:
|
||||||
transformed = parts[transform.index] if parts else ""
|
transformed = parts[transform.index] if parts else ""
|
||||||
else:
|
else:
|
||||||
transformed = parts
|
transformed = parts
|
||||||
|
|
||||||
elif transform.type in ["add", "subtract"]:
|
elif transform.type in ["add", "subtract"]:
|
||||||
try:
|
try:
|
||||||
source_value = Decimal(transformed)
|
source_value = Decimal(transformed)
|
||||||
@@ -442,7 +459,7 @@ class ImportService:
|
|||||||
|
|
||||||
def _coerce_type(
|
def _coerce_type(
|
||||||
self, value: str, mapping: version_1.ColumnMapping
|
self, value: str, mapping: version_1.ColumnMapping
|
||||||
) -> Union[str, int, bool, Decimal, datetime, list]:
|
) -> Union[str, int, bool, Decimal, datetime, list, None]:
|
||||||
if not value:
|
if not value:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
@@ -477,6 +494,11 @@ class ImportService:
|
|||||||
version_1.TransactionReferenceDateMapping,
|
version_1.TransactionReferenceDateMapping,
|
||||||
),
|
),
|
||||||
):
|
):
|
||||||
|
if isinstance(value, datetime):
|
||||||
|
return value.date()
|
||||||
|
elif isinstance(value, date):
|
||||||
|
return value
|
||||||
|
|
||||||
formats = (
|
formats = (
|
||||||
mapping.format
|
mapping.format
|
||||||
if isinstance(mapping.format, list)
|
if isinstance(mapping.format, list)
|
||||||
@@ -646,6 +668,151 @@ class ImportService:
|
|||||||
for row_number, row in enumerate(reader, start=1):
|
for row_number, row in enumerate(reader, start=1):
|
||||||
self._process_row(row, row_number)
|
self._process_row(row, row_number)
|
||||||
|
|
||||||
|
def _process_excel(self, file_path):
|
||||||
|
try:
|
||||||
|
if self.settings.file_type == "xlsx":
|
||||||
|
workbook = openpyxl.load_workbook(
|
||||||
|
file_path, read_only=True, data_only=True
|
||||||
|
)
|
||||||
|
sheets_to_process = (
|
||||||
|
workbook.sheetnames
|
||||||
|
if self.settings.sheets == "*"
|
||||||
|
else (
|
||||||
|
self.settings.sheets
|
||||||
|
if isinstance(self.settings.sheets, list)
|
||||||
|
else [self.settings.sheets]
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Calculate total rows
|
||||||
|
total_rows = sum(
|
||||||
|
max(0, workbook[sheet_name].max_row - self.settings.start_row)
|
||||||
|
for sheet_name in sheets_to_process
|
||||||
|
if sheet_name in workbook.sheetnames
|
||||||
|
)
|
||||||
|
self._update_totals("total", value=total_rows)
|
||||||
|
|
||||||
|
# Process sheets
|
||||||
|
for sheet_name in sheets_to_process:
|
||||||
|
if sheet_name not in workbook.sheetnames:
|
||||||
|
self._log(
|
||||||
|
"warning",
|
||||||
|
f"Sheet '{sheet_name}' not found in the Excel file. Skipping.",
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
|
||||||
|
sheet = workbook[sheet_name]
|
||||||
|
self._log("info", f"Processing sheet: {sheet_name}")
|
||||||
|
headers = [
|
||||||
|
str(cell.value or "") for cell in sheet[self.settings.start_row]
|
||||||
|
]
|
||||||
|
|
||||||
|
for row_number, row in enumerate(
|
||||||
|
sheet.iter_rows(
|
||||||
|
min_row=self.settings.start_row + 1, values_only=True
|
||||||
|
),
|
||||||
|
start=1,
|
||||||
|
):
|
||||||
|
try:
|
||||||
|
row_data = {
|
||||||
|
key: str(value) if value is not None else None
|
||||||
|
for key, value in zip(headers, row)
|
||||||
|
}
|
||||||
|
self._process_row(row_data, row_number)
|
||||||
|
except Exception as e:
|
||||||
|
if self.settings.skip_errors:
|
||||||
|
self._log(
|
||||||
|
"warning",
|
||||||
|
f"Error processing row {row_number} in sheet '{sheet_name}': {str(e)}",
|
||||||
|
)
|
||||||
|
self._increment_totals("failed", value=1)
|
||||||
|
else:
|
||||||
|
raise
|
||||||
|
|
||||||
|
workbook.close()
|
||||||
|
|
||||||
|
else: # xls
|
||||||
|
workbook = xlrd.open_workbook(file_path)
|
||||||
|
sheets_to_process = (
|
||||||
|
workbook.sheet_names()
|
||||||
|
if self.settings.sheets == "*"
|
||||||
|
else (
|
||||||
|
self.settings.sheets
|
||||||
|
if isinstance(self.settings.sheets, list)
|
||||||
|
else [self.settings.sheets]
|
||||||
|
)
|
||||||
|
)
|
||||||
|
# Calculate total rows
|
||||||
|
total_rows = sum(
|
||||||
|
max(
|
||||||
|
0,
|
||||||
|
workbook.sheet_by_name(sheet_name).nrows
|
||||||
|
- self.settings.start_row,
|
||||||
|
)
|
||||||
|
for sheet_name in sheets_to_process
|
||||||
|
if sheet_name in workbook.sheet_names()
|
||||||
|
)
|
||||||
|
self._update_totals("total", value=total_rows)
|
||||||
|
# Process sheets
|
||||||
|
for sheet_name in sheets_to_process:
|
||||||
|
if sheet_name not in workbook.sheet_names():
|
||||||
|
self._log(
|
||||||
|
"warning",
|
||||||
|
f"Sheet '{sheet_name}' not found in the Excel file. Skipping.",
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
sheet = workbook.sheet_by_name(sheet_name)
|
||||||
|
self._log("info", f"Processing sheet: {sheet_name}")
|
||||||
|
headers = [
|
||||||
|
str(sheet.cell_value(self.settings.start_row - 1, col) or "")
|
||||||
|
for col in range(sheet.ncols)
|
||||||
|
]
|
||||||
|
for row_number in range(self.settings.start_row, sheet.nrows):
|
||||||
|
try:
|
||||||
|
row_data = {}
|
||||||
|
for col, key in enumerate(headers):
|
||||||
|
cell_type = sheet.cell_type(row_number, col)
|
||||||
|
cell_value = sheet.cell_value(row_number, col)
|
||||||
|
|
||||||
|
if cell_type == xlrd.XL_CELL_DATE:
|
||||||
|
# Convert Excel date to Python datetime
|
||||||
|
try:
|
||||||
|
python_date = datetime(
|
||||||
|
*xlrd.xldate_as_tuple(
|
||||||
|
cell_value, workbook.datemode
|
||||||
|
)
|
||||||
|
)
|
||||||
|
row_data[key] = python_date
|
||||||
|
except Exception:
|
||||||
|
# If date conversion fails, use the original value
|
||||||
|
row_data[key] = (
|
||||||
|
str(cell_value)
|
||||||
|
if cell_value is not None
|
||||||
|
else None
|
||||||
|
)
|
||||||
|
elif cell_value is None:
|
||||||
|
row_data[key] = None
|
||||||
|
else:
|
||||||
|
row_data[key] = str(cell_value)
|
||||||
|
|
||||||
|
self._process_row(
|
||||||
|
row_data, row_number - self.settings.start_row + 1
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
if self.settings.skip_errors:
|
||||||
|
self._log(
|
||||||
|
"warning",
|
||||||
|
f"Error processing row {row_number} in sheet '{sheet_name}': {str(e)}",
|
||||||
|
)
|
||||||
|
self._increment_totals("failed", value=1)
|
||||||
|
else:
|
||||||
|
raise
|
||||||
|
|
||||||
|
except (InvalidFileException, xlrd.XLRDError) as e:
|
||||||
|
raise ValueError(
|
||||||
|
f"Invalid {self.settings.file_type.upper()} file format: {str(e)}"
|
||||||
|
)
|
||||||
|
|
||||||
def _validate_file_path(self, file_path: str) -> str:
|
def _validate_file_path(self, file_path: str) -> str:
|
||||||
"""
|
"""
|
||||||
Validates that the file path is within the allowed temporary directory.
|
Validates that the file path is within the allowed temporary directory.
|
||||||
@@ -668,8 +835,10 @@ class ImportService:
|
|||||||
self._log("info", "Starting import process")
|
self._log("info", "Starting import process")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
if self.settings.file_type == "csv":
|
if isinstance(self.settings, version_1.CSVImportSettings):
|
||||||
self._process_csv(file_path)
|
self._process_csv(file_path)
|
||||||
|
elif isinstance(self.settings, version_1.ExcelImportSettings):
|
||||||
|
self._process_excel(file_path)
|
||||||
|
|
||||||
self._update_status("FINISHED")
|
self._update_status("FINISHED")
|
||||||
self._log(
|
self._log(
|
||||||
|
|||||||
@@ -27,3 +27,5 @@ simpleeval~=1.0.0
|
|||||||
pydantic~=2.10.5
|
pydantic~=2.10.5
|
||||||
PyYAML~=6.0.2
|
PyYAML~=6.0.2
|
||||||
mistune~=3.1.1
|
mistune~=3.1.1
|
||||||
|
openpyxl~=3.1
|
||||||
|
xlrd~=2.0
|
||||||
|
|||||||
Reference in New Issue
Block a user