Hello! I am working on a wallpapers site project. I am developing this feature where the admin can perform “Bulk Upload” by uploading a zip file full of wallpapers. I’ve written a form that does it, below is simplified version:
class BulkUploadForm(forms.Form):
zip_file = forms.FileField(
label='Wallpapers Zip',
required=True,
max_length=64,
allow_empty_file=False,
help_text=f"Upload a zip file containing wallpapers that does not exceed {settings.MAX_BULK_UPLOAD_SIZE // mb} MB."
)
def clean_zip_file(self) -> zipfile.ZipFile:
uploaded_file = cast(UploadedFile, self.cleaned_data['zip_file'])
if uploaded_file.name is None or PurePath(uploaded_file.name).suffix != '.zip':
raise ValidationError(
"Only '.zip' extension is allowed.",
code='invalid_zip_file_extension'
)
if uploaded_file.file is None:
raise ValidationError(
"zip file is required.",
code='zip_file_required'
)
try:
zip_file = zipfile.ZipFile(uploaded_file)
except zipfile.BadZipfile as err:
raise ValidationError(
'Invalid zip file.',
code='invalid_zip_file'
)
if (bad_file:=zip_file.testzip()) is not None:
raise ValidationError(
"Bad file found in zip: %(bad_file)s",
code='bad_file_found_in_zip_file',
params=dict(bad_file=bad_file)
)
return zip_file
This project is open source and hosted on my Github account.
I wanted some suggestion from community and optimizations tips for this type of “Large File Task”. This is because the zip file will typically have size around 300 to 400 MB in my application.
Please tell me if above implementation contains any bugs, memory leaks, security mistake.
One more question, exactly at which point the underlying Uploaded file (either TemporaryUploadedFile
or InMemoryUploadedFile
) is destroyed and release resources, assuming that I performs something in a view like below:
def bulk_upload(request: HttpRequest) -> HttpResponse:
if request.method == 'POST':
form = BulkUploadForm(request.POST, request.FILES)
if form.is_valid():
zip_file = cast(zipfile.ZipFile, form.cleaned_data['zip_file'])
upload_process = BulkUploadProcess.upload_procedures.bulk_upload(zipfile.Path(zip_file))
return render(request, 'success.html')
else:
return render(request, 'app/bulk_upload.html', dict(form=form))
return render(request, 'app/bulk_upload.html', dict(form=BulkUploadForm()))
And bulk_upload function is following:
def bulk_upload(self, zip_file_path: ZipPath) -> "BulkUploadProcess":
paths = []
extensions = get_file_extensions_for_image_format(ImageFormat.JPEG)
glob_patterns = tuple([f'**/*{extension}' for extension in extensions])
for glob_pattern in glob_patterns: paths += [*zip_file_path.glob(glob_pattern)]
group_result = cast(GroupResult, group(
chain(
save_wallpaper.s(file.at, str(file.root.filename)),
generate_and_save_dummy_wallpaper.s(),
)
for file in paths
)(countdown=10)
)
group_result.save() # type: ignore[attr-defined]
result_id = cast(str, group_result.id) # type: ignore[attr-defined]
return BulkUploadProcess.objects.create(uuid=uuid.UUID(result_id))
The bulk_upload
function uses celery to perform group task of chains. Each chain has two function: save_wallpaper
and generate_and_save_dummy_wallpaper
. The chain first saves wallpaper to database, then it prepares a “dummy” wallpaper to show on frontend. The dummy wallpaper will be small in size thus faster to load; for example original wallpaper can be 4-5 MB while dummy is 400 to 500 KB. When user downloads wallpaper, they get original wallpaper.