I need to update a field of a model and that model have around 30000 rows. i created a management command to update the value of each field. But normally server took huge resources and got down unfortunately.
So i decide to chunk my data. Here is my implementation. Please do share your knowledge that what will be the better solution to handle big data with limited server resource.
def chunked_queryset_iterator(queryset, size, *, ordering=(“id”,)):
from cursor_pagination import CursorPaginator
pager = CursorPaginator(queryset, ordering)
after = None
while True:
page = pager.page(after=after, first=size)
if page:
yield from page.items
else:
return
if not page.has_next:
break
# take last item, next page starts after this.
after = pager.cursor(instance=page[-1])
management command
class Command(BaseCommand):
help = “Calculate profile match score”
def handle(self, *args, **options):
with transaction.atomic():
job_application = chunked_queryset_iterator(
JobApplication.objects.prefetch_related(
"skills", "candidate__skills", "candidate__userdetails"
).filter(),
40,
)
applications_to_update = []
self.stdout.write(self.style.WARNING("Applying score to candidates..."))
for application in tqdm(job_application):
score = get_profile_match_score(
job=application.job,
candidate=application.candidate,
candidate_resume=application.resume,
job_application=application,
)
application.resume_score = score
applications_to_update.append(application)
# Bulk update every BATCH_SIZE records
if len(applications_to_update) >= 40:
JobApplication.objects.bulk_update(
applications_to_update, ["resume_score"]
)
applications_to_update = []
# print updating the resume score
self.stdout.write(self.style.WARNING("Updating score to database..."))
JobApplication.objects.bulk_update(applications_to_update, ["resume_score"])