i have this query that takes a lot of time(as it retrieve 10millions records). I tried to convert the sql to MV then use it in qs but this breaks the app as there are many other fields that should be loaded. I s there a way to tell the qs to load the other fields normaly beside this MV
Welcome @ahmadMousa !
Please post the models being used and the view containing the query.
(What are you referring to by the term “MV”?)
Are you really looking to process 10,000,000 records in your view? Notice the last clause in that select statement - LIMIT 25
. This query is only going to return 25 rows.
it is an admin view class im trying to stop using typesense and move to postgres totally
by MV i mean materialized view
class CapturedWorkAdmin(admin.ModelAdmin):
form = CapturedWorkAdminForm
change_form_template = 'admin/work/change_form.html'
list_display = [
'song_title', 'programme_title', 'programme_sub_title', 'episode_number', 'usage_date', 'distribution_title',
'licensee_title', 'music_type', 'status'
]
search_fields = [
'song_title', 'programme_title', 'portal_work_id', 'composers_list', 'external_publishers_list',
'payee_publishers__publisher__society_record_id', 'payee_writers__writer__society_record_id',
]
typesense_search_fields = [
'song_title', 'programme_title', 'programme_sub_title', 'composers_list', 'external_publishers_list'
]
readonly_fields = [
'cue_sheet', 'distribution', 'history', 'status', 'digital_service_provider', 'isrc', 'iswc', 'rate', 'nett',
'original_music_type', 'composers_list', 'external_publishers_list', 'artists_list', 'percentage',
'date_aired', 'total_duration', 'gross', 'country', 'artists', 'notes', 'record_company', 'minutes_original',
'seconds_original', 'tune_code', 'confidence_score', 'payee_source_link', 'has_payee', 'payee_source_details',
'total_duration_auto_populated', 'date_created', 'date_updated', 'date_payee_manually_updated',
'date_payee_automatically_updated', 'number_of_uses', 'hours_original'
]
list_filter = [
'distribution',
'digital_service_provider',
MusicTypeFilter,
'status',
ProgrammeTitleFilter,
ProgrammeSubTitleFilter,
EpisodeNumberFilter,
HasPayeeFilter,
HasUsageDateFilter,
HasPortalWorkIDFilter,
('usage_date', DateRangeFilter),
]
inlines = [PayToWriterInline, PayToPublisherInline]
fieldsets = (
('General', {
'fields': (
'date_created', 'date_updated', 'date_payee_manually_updated', 'date_payee_automatically_updated',
'status', 'has_payee', 'confidence_score', 'payee_source_link',
)
}),
('Work Details', {
'classes': ('collapse',),
'fields': (
'song_title', 'composers', 'composers_list', 'external_publishers',
'external_publishers_list', 'artists', 'iswc', 'isrc'
)
}),
('Distribution Details', {
'classes': ('collapse',),
'fields': (
'distribution', 'cue_sheet', 'digital_service_provider', 'digital_service_provider_channel', 'channel',
'episode_number', 'season_number', 'programme_title', 'programme_sub_title', 'original_music_type',
'music_type', 'country', 'date_aired', 'usage_date', 'start_date', 'end_date', 'record_company',
'library_owner', 'catalog_number', 'commissioned_owners', 'hours_original', 'minutes_original',
'seconds_original',
)
}),
('Payment Details', {
'fields': ('hours', 'minutes', 'seconds', 'total_duration', 'total_duration_auto_populated',
'number_of_uses', 'rate', 'units', 'gross', 'nett'),
}),
)
list_per_page = 25
actions = ['update_music_type_and_payee', 'approve_captured_works', 'keep_discarded_works', 'discard_works']
@admin.display(description='Distribution')
def distribution_title(self, obj):
return obj.distribution.title
@admin.display(description='Licensee')
def licensee_title(self, obj):
return obj.licensee.title
"""
def get_search_results(self, request, queryset, search_term):
Override the default search to use the materialized view.
if not search_term:
return queryset, False # Return default queryset if no search term
# Query the Materialized View
mv_results = MV_SEARCH.objects.filter(
Q(song_title__icontains=search_term) |
Q(programme_title__icontains=search_term) |
Q(composers_list_text__icontains=search_term) |
Q(external_publishers_list_text__icontains=search_term)
)
# Get matching IDs from the MV
matching_ids = list(mv_results.values_list("id", flat=True))
# Filter the main model using IDs from the MV
queryset = CapturedWork.objects.filter(id__in=matching_ids)
return queryset, False
"""
def get_queryset(self, request):
qs = super().get_queryset(request)
#qs = qs.select_related('distribution', 'digital_service_provider', 'cue_sheet')
#qs = qs.prefetch_related('payee_writers', 'payee_publishers')
qs = qs.only('song_title', 'programme_title', 'programme_sub_title', 'episode_number',
'usage_date', 'music_type', 'status')
return qs
def get_actions(self, request):
actions = super().get_actions(request)
if request.user.email.lower() in settings.SIEGE_STAFF_EMAILS:
actions['get_payees'] = (self.get_payees.__func__, 'get_payees', self.get_payees.short_description)
return actions
def has_change_permission(self, request, obj=None):
if obj and obj.status in [CapturedWork.STATUS_PROCESSED, CapturedWork.STATUS_NOT_PROCESSED]:
return False
if obj and obj.licensee_settings.block_work_updates:
return False
return super().has_change_permission(request, obj=obj)
def has_view_permission(self, request, obj=None):
if obj and obj.status == CapturedWork.STATUS_NOT_PROCESSED:
return request.user.email.lower() in settings.SIEGE_STAFF_EMAILS
return super().has_view_permission(request, obj=obj)
def has_add_permission(self, request):
return False
def has_approve_permission(self, request, obj=None):
return request.user.has_perm("works.can_approve_work")
def lookup_allowed(self, lookup, value):
if lookup in ['payee_publishers__publisher__id', 'payee_writers__writer__id']:
return True
return super().lookup_allowed(lookup, value)
def save_related(self, request, form, formsets, change):
if not change:
return super().save_related(request, form, formsets, change)
total_shares = 0
work = form.instance
payee_writers = []
payee_publishers = []
for formset in formsets:
if formset.model == PayToWriter:
total_shares += sum(
[paid_writer.get('share', 0)
for paid_writer in formset.cleaned_data if not paid_writer.get('DELETE', False)]
)
payee_writers.extend(
[paid_writer['writer']
for paid_writer in formset.cleaned_data if paid_writer and not paid_writer.get('DELETE', False)]
)
if formset.model == PayToPublisher:
total_shares += sum(
[paid_publisher.get('share', 0)
for paid_publisher in formset.cleaned_data if not paid_publisher.get('DELETE', False)]
)
payee_publishers.extend(
[paid_publisher['publisher']
for paid_publisher in formset.cleaned_data if paid_publisher and not paid_publisher.get('DELETE', False)]
)
super().save_related(request, form, formsets, change)
# m2m signals not fired so we update manually
work.refresh_from_db()
CapturedWorkCollection(work, update_fields=['publisher_payees', 'writer_payees']).update()
# Exclude Works That Should Not be updated
similar_works = work.similar_works.exclude(
status__in=[CapturedWork.STATUS_PROCESSED, CapturedWork.STATUS_NOT_PROCESSED]
).exclude(licensee_settings__block_work_updates=True)
# DELETE PAYEES FROM SIMILAR WORKS
PayToWriter.objects.filter(~Q(writer__in=payee_writers), work__in=similar_works).delete()
PayToPublisher.objects.filter(~Q(publisher__in=payee_publishers), work__in=similar_works).delete()
# We can't do this check in form validation because it spans different inline models
if not 99.99 <= total_shares <= 100.01:
message = f'The sum of the shares {total_shares} is not within the required limits (99.99 - 100.01)'
work.history = get_history_message(message) + work.history
work.status = CapturedWork.STATUS_ERROR
work.has_payee = False
work.save(update_fields=['has_payee', 'status', 'history'])
self.message_user(request, message, messages.ERROR)
else:
writer_society_record_ids = [writer.society_record_id for writer in payee_writers]
publisher_society_record_ids = [publisher.society_record_id for publisher in payee_publishers]
new_payees_society_record_ids = sorted(list(set(writer_society_record_ids + publisher_society_record_ids)))
if work._internal_payee_society_record_ids == new_payees_society_record_ids:
date_payee_manually_updated = None
else:
date_payee_manually_updated = timezone.now()
writer_payees = []
publisher_payees = []
update_payees_in_save_related(publisher_payees, work, writer_payees, date_payee_manually_updated)
# UPDATE CURRENT WORK AND ITS SIMILAR WORKS
work_ids = [work.id]
work_ids.extend(list(similar_works.values_list('id', flat=True)))
works = CapturedWork.objects.filter(id__in=work_ids)
update_status_of_works(works, final_status=CapturedWork.STATUS_PENDING)
def save_model(self, request, obj, form, change):
if 'music_type' in form.changed_data:
similar_works = obj.similar_works.exclude(
status__in=[CapturedWork.STATUS_PROCESSED, CapturedWork.STATUS_NOT_PROCESSED]
).exclude(licensee_settings__block_work_updates=True)
similar_works.update(music_type=obj.music_type)
works_ids = list(similar_works.values_list('id', flat=True))
bulk_update_work_status.apply_async(args=[works_ids])
if obj.total_duration == 0 or obj.total_duration_auto_populated:
duration_populated = False
if obj.music_type == CapturedWork.MUSIC_TYPE_PRODUCTION:
obj.seconds = 60
duration_populated = True
elif obj.music_type in [CapturedWork.MUSIC_TYPE_COMMISSIONED, CapturedWork.MUSIC_TYPE_COMMERCIAL]:
obj.seconds = 30
duration_populated = True
if duration_populated:
obj.total_duration = obj.seconds
obj.total_duration_auto_populated = True
obj.units = math.ceil(obj.total_duration / 30)
if 'composers' in form.changed_data:
obj.composers_list = clean_names(obj.composers, composers_cleaning_pattern)
if 'external_publishers' in form.changed_data:
obj.external_publishers_list = clean_names(obj.external_publishers, ",|;|:")
# This should be done after the duration auto population has been handled above
duration_changed = 'minutes' in form.changed_data or 'seconds' in form.changed_data
if duration_changed:
obj.total_duration = int(obj.minutes * 60 + obj.seconds)
obj.units = math.ceil(obj.total_duration / 30)
if form.has_changed():
has_error = False
message = ''
if obj.music_type not in CapturedWork.VALID_MUSIC_TYPES:
has_error = True
message = get_history_message('The work was put to an error state because it has no music type')
else:
obj.status = CapturedWork.STATUS_PENDING
if has_error:
obj.status = CapturedWork.STATUS_ERROR
if message:
obj.history = message + obj.history
super().save_model(request, obj, form, change)
def response_change(self, request, obj):
if obj and "_approve" in request.POST:
if not request.user.has_perm("works.can_approve_work"):
raise PermissionDenied('You are not allowed to approve works')
work_query = CapturedWork.objects.filter(id=obj.id)
work = work_query.first()
update_status_of_works(work_query)
work.refresh_from_db()
work.history = get_history_message(f'Work changes approved by: {request.user}') + work.history
work.save(update_fields=["status", "history"])
self.message_user(request, f"The work {obj} was approved")
return super().response_change(request, obj)
# pagination optimization
def changelist_view(self, request, extra_context=None):
if 'q' in request.GET:
search_term = request.GET['q']
queryset = self.get_search_results(request, self.get_queryset(request), search_term)[0]
self.queryset = queryset
return super().changelist_view(request, extra_context)
@admin.action(description='Update Music-Type and Payee', permissions=['change'])
def update_music_type_and_payee(self, request, queryset):
work_ids = queryset.values_list('id', flat=True)
work_ids = '\n'.join(map(str, work_ids))
work_bulk_update = WorkBulkUpdate.objects.create(work_ids=work_ids)
redirect_url = reverse('admin:works_workbulkupdate_change', args=(work_bulk_update.id,))
return HttpResponseRedirect(redirect_url)
@admin.action(description='Approve Captured Works', permissions=['approve'])
def approve_captured_works(self, request, queryset):
work_ids = list(queryset.filter(status=CapturedWork.STATUS_PENDING).values_list('id', flat=True))
bulk_update_work_status.apply_async(
args=[work_ids],
kwargs={'status': CapturedWork.STATUS_READY, 'message': f'Work changes approved by: {request.user}'}
)
self.message_user(
request, 'The selected works that were in a pending status have been approved.', messages.SUCCESS
)
return redirect('admin:works_capturedwork_changelist')
@admin.action(description='Get Payee(s)', permissions=['change'])
def get_payees(self, request, queryset):
if request.user.email.lower() not in settings.SIEGE_STAFF_EMAILS:
raise PermissionDenied('You cannot perform this action')
work_ids = list(queryset.values_list('id', flat=True))
get_captured_works_payees_admin_action.delay(work_ids)
self.message_user(
request,
'The task to allocated payees for the selected works running in the background. '
'Please check after some time for results.',
messages.SUCCESS
)
return redirect('admin:works_capturedwork_changelist')
@admin.action(description='Keep Discarded Work(s)', permissions=['change'])
def keep_discarded_works(self, request, queryset):
if request.user.email.lower() not in settings.SIEGE_STAFF_EMAILS:
raise PermissionDenied('You cannot perform this action')
if queryset.exclude(status=CapturedWork.STATUS_DISCARDED).exists():
self.message_user(request, 'Please only select works in a discarded status.', messages.ERROR)
return redirect('admin:works_capturedwork_changelist')
work_ids = list(queryset.values_list('id', flat=True))
message = get_history_message(f'Work was removed from discarded state by: {request.user}')
queryset.update(status=CapturedWork.STATUS_NOT_PROCESSED, history=Concat(Value(message), 'history'))
transaction.on_commit(lambda: update_status_of_works_task.delay(work_ids))
self.message_user(
request,
'The task to keep the selected discarded works is running in the background. '
'Please check after some time for results.',
messages.SUCCESS
)
return redirect('admin:works_capturedwork_changelist')
@admin.action(description='Discard Work(s)', permissions=['change'])
def discard_works(self, request, queryset):
if request.user.email.lower() not in settings.SIEGE_STAFF_EMAILS:
raise PermissionDenied('You cannot perform this action')
message = get_history_message(f'Work was put to discarded state by: {request.user}')
queryset.exclude(status=CapturedWork.STATUS_DISCARDED).update(
status=CapturedWork.STATUS_DISCARDED, history=Concat(Value(message), 'history')
)
self.message_user(request, 'All the selected works have been marked as discarded.', messages.SUCCESS)
return redirect('admin:works_capturedwork_changelist')
@admin.display(description='rate')
def formatted_rate(self, obj):
return f'{obj.rate:.2f}'
Side note: To fence off code, you need to use three backticks - `, not quotes - " or apostrophes - ' (I’ve taken the liberty of fixing your post.)
So what is this view you’re running? Are you sure the delay is in the Django ORM query and not the view?
What does the output of an explain analyze
show if you only call the view?
How does that differ from the full query being issued by the ORM?