I have an application that scrapes data from various IT infrastructure sources (VMware, storage, backup) and inventories all the different things that would be useful to have at a quick reference. I broke out everything into threads, but I’m realizing that the Python GIL is slowing things down when collections are running from more than 2 types of sources (ex: VMware and storage), and it’s making the views sluggish to load.
My thought was that if I can spin up the main collection threads as a process instead, it would workaround the Python GIL and hopefully make the web interface part a bit snappier since it wouldn’t have to wait on 300 or so other threads that are also doing stuff. What happens is that since the spawned processes aren’t attached to Django, the code that accesses the models inside of those processes throws
django.core.exceptions.AppRegistryNotReady: Apps aren't loaded yet. since it’s trying to access the models/DB without Django being started in that process.
Effectively, I want these spawned processes to be able to interact with Django, the models, and the DB since they are doing the heavy lifting of the application.
File "C:\Program Files\Python310\lib\multiprocessing\spawn.py", line 116, in spawn_main exitcode = _main(fd, parent_sentinel) File "C:\Program Files\Python310\lib\multiprocessing\spawn.py", line 126, in _main self = reduction.pickle.load(from_parent) File "C:\Users\jfort\PycharmProjects\VmInventory\VMwareInventory\Processes\Vcenter.py", line 5, in <module> from VMwareInventory.VMwareRest.VMwareRest import RESTVCenter File "C:\Users\jfort\PycharmProjects\VmInventory\VMwareInventory\VMwareRest\VMwareRest.py", line 19, in <module> from VMwareInventory.models import * File "C:\Users\jfort\PycharmProjects\VmInventory\VMwareInventory\models\__init__.py", line 2, in <module> from .Base.cost import Cost File "C:\Users\jfort\PycharmProjects\VmInventory\VMwareInventory\models\Base\cost.py", line 2, in <module> from .base import BaseModel File "C:\Users\jfort\PycharmProjects\VmInventory\VMwareInventory\models\Base\base.py", line 4, in <module> class BaseModel(models.Model): File "C:\Program Files\Python310\lib\site-packages\django\db\models\base.py", line 127, in __new__ app_config = apps.get_containing_app_config(module) File "C:\Program Files\Python310\lib\site-packages\django\apps\registry.py", line 260, in get_containing_app_config self.check_apps_ready() File "C:\Program Files\Python310\lib\site-packages\django\apps\registry.py", line 138, in check_apps_ready raise AppRegistryNotReady("Apps aren't loaded yet.") django.core.exceptions.AppRegistryNotReady: Apps aren't loaded yet.
from django.apps import AppConfig from VMwareInventory.settings_local import environment from VMwareInventory.threading.initial_settings import set_default_database_items class VmwareinventoryConfig(AppConfig): name = 'VMwareInventory' def ready(self): set_default_database_items() if environment == "prod": from .threading.scraping import TimerScrape TimerScrape()
threading\scraping.py (Where the TimerScrape() function lives):
# Python imports: from threading import Thread, enumerate from multiprocessing import Queue from datetime import datetime, timezone from time import sleep # from queue import Queue # Local imports: from VMwareInventory.models import Vcenter, StorageSystem, BackupSystem, Setting, Platform, Application, Function, \ Region, Site, Environment, TagsReport from VMwareInventory.reports.tags_report import TagGenericReport from VMwareInventory.reports.missing_tags_report import MissingTagsReport from VMwareInventory.Processes.Backup import BackupWorker from VMwareInventory.Processes.Storage import StorageWorker from VMwareInventory.Processes.Vcenter import VcenterWorker # Django imports: from django.db import connection class TimerScrape(Thread): def __init__(self): Thread.__init__(self) self.name = "timerscrape_thread" self.vcenter_queue = Queue() self.vcenter_list =  self.storage_queue = Queue() self.storage_list =  self.backup_queue = Queue() self.backup_list =  self.vcenter_worker = 0 self.storage_worker = 0 self.backup_worker = 0 self.daemon = True self.start() def run(self): sleep(60) while True: vcenters = Vcenter.objects.all() netapps = StorageSystem.objects.all() rubriks = BackupSystem.objects.all() vcenter_threads = Setting.objects.get(name='vcenter_scraping_threads') storage_threads = Setting.objects.get(name='storage_scraping_threads') backup_threads = Setting.objects.get(name='backup_scraping_threads') wait_hours = int(Setting.objects.get(name='scrape_wait_time').value) connection.close() wait_seconds = wait_hours * 3600 current_time = datetime.now(timezone.utc) # get list of current threading and their names threads = enumerate() # print(threads) thread_list =  worker_list =  for thread in threads: if thread.name == "vCenter_worker_thread": worker_list.append(thread) elif thread.name == "storage_worker_thread": worker_list.append(thread) elif thread.name == "backup_worker_thread": worker_list.append(thread) else: thread_list.append(thread.name) self.vcenter_worker = 0 self.storage_worker = 0 self.backup_worker = 0 for thread in worker_list: if thread.name == "vCenter_worker_thread": self.vcenter_worker += 1 elif thread.name == "storage_worker_thread": self.storage_worker += 1 elif thread.name == "backup_worker_thread": self.backup_worker += 1 while self.vcenter_worker < int(vcenter_threads.value): VcenterWorker(self.vcenter_queue, self.vcenter_list) self.vcenter_worker += 1 while self.storage_worker < int(storage_threads.value): StorageWorker(self.storage_queue, self.storage_list) self.storage_worker += 1 while self.backup_worker < int(backup_threads.value): BackupWorker(self.backup_queue, self.backup_list) self.backup_worker += 1
Processes\Vcenter.py (where VcenterWorker lives)
# Python imports: from multiprocessing import Process # Local imports: from VMwareInventory.VMwareRest.VMwareRest import RESTVCenter class VcenterWorker(Process): def __init__(self, queue, vcenter_list): Process.__init__(self) self.queue = queue self.list = vcenter_list self.name = "vCenter_worker_process" self.start() def run(self): while True: vcenter = self.queue.get() self.list.remove(vcenter) self.vcscrape(vcenter.name, vcenter.user, vcenter.password) self.queue.task_done() @staticmethod def vcscrape(name, user, pwd): vc_scrape = RESTVCenter(name, user, pwd) vc_scrape.join() return