I have an application that scrapes data from various IT infrastructure sources (VMware, storage, backup) and inventories all the different things that would be useful to have at a quick reference. I broke out everything into threads, but I’m realizing that the Python GIL is slowing things down when collections are running from more than 2 types of sources (ex: VMware and storage), and it’s making the views sluggish to load.
My thought was that if I can spin up the main collection threads as a process instead, it would workaround the Python GIL and hopefully make the web interface part a bit snappier since it wouldn’t have to wait on 300 or so other threads that are also doing stuff. What happens is that since the spawned processes aren’t attached to Django, the code that accesses the models inside of those processes throws django.core.exceptions.AppRegistryNotReady: Apps aren't loaded yet.
since it’s trying to access the models/DB without Django being started in that process.
Effectively, I want these spawned processes to be able to interact with Django, the models, and the DB since they are doing the heavy lifting of the application.
Code references:
Stacktrace:
File "C:\Program Files\Python310\lib\multiprocessing\spawn.py", line 116, in spawn_main
exitcode = _main(fd, parent_sentinel)
File "C:\Program Files\Python310\lib\multiprocessing\spawn.py", line 126, in _main
self = reduction.pickle.load(from_parent)
File "C:\Users\jfort\PycharmProjects\VmInventory\VMwareInventory\Processes\Vcenter.py", line 5, in <module>
from VMwareInventory.VMwareRest.VMwareRest import RESTVCenter
File "C:\Users\jfort\PycharmProjects\VmInventory\VMwareInventory\VMwareRest\VMwareRest.py", line 19, in <module>
from VMwareInventory.models import *
File "C:\Users\jfort\PycharmProjects\VmInventory\VMwareInventory\models\__init__.py", line 2, in <module>
from .Base.cost import Cost
File "C:\Users\jfort\PycharmProjects\VmInventory\VMwareInventory\models\Base\cost.py", line 2, in <module>
from .base import BaseModel
File "C:\Users\jfort\PycharmProjects\VmInventory\VMwareInventory\models\Base\base.py", line 4, in <module>
class BaseModel(models.Model):
File "C:\Program Files\Python310\lib\site-packages\django\db\models\base.py", line 127, in __new__
app_config = apps.get_containing_app_config(module)
File "C:\Program Files\Python310\lib\site-packages\django\apps\registry.py", line 260, in get_containing_app_config
self.check_apps_ready()
File "C:\Program Files\Python310\lib\site-packages\django\apps\registry.py", line 138, in check_apps_ready
raise AppRegistryNotReady("Apps aren't loaded yet.")
django.core.exceptions.AppRegistryNotReady: Apps aren't loaded yet.
apps.py:
from django.apps import AppConfig
from VMwareInventory.settings_local import environment
from VMwareInventory.threading.initial_settings import set_default_database_items
class VmwareinventoryConfig(AppConfig):
name = 'VMwareInventory'
def ready(self):
set_default_database_items()
if environment == "prod":
from .threading.scraping import TimerScrape
TimerScrape()
threading\scraping.py (Where the TimerScrape() function lives):
# Python imports:
from threading import Thread, enumerate
from multiprocessing import Queue
from datetime import datetime, timezone
from time import sleep
# from queue import Queue
# Local imports:
from VMwareInventory.models import Vcenter, StorageSystem, BackupSystem, Setting, Platform, Application, Function, \
Region, Site, Environment, TagsReport
from VMwareInventory.reports.tags_report import TagGenericReport
from VMwareInventory.reports.missing_tags_report import MissingTagsReport
from VMwareInventory.Processes.Backup import BackupWorker
from VMwareInventory.Processes.Storage import StorageWorker
from VMwareInventory.Processes.Vcenter import VcenterWorker
# Django imports:
from django.db import connection
class TimerScrape(Thread):
def __init__(self):
Thread.__init__(self)
self.name = "timerscrape_thread"
self.vcenter_queue = Queue()
self.vcenter_list = []
self.storage_queue = Queue()
self.storage_list = []
self.backup_queue = Queue()
self.backup_list = []
self.vcenter_worker = 0
self.storage_worker = 0
self.backup_worker = 0
self.daemon = True
self.start()
def run(self):
sleep(60)
while True:
vcenters = Vcenter.objects.all()
netapps = StorageSystem.objects.all()
rubriks = BackupSystem.objects.all()
vcenter_threads = Setting.objects.get(name='vcenter_scraping_threads')
storage_threads = Setting.objects.get(name='storage_scraping_threads')
backup_threads = Setting.objects.get(name='backup_scraping_threads')
wait_hours = int(Setting.objects.get(name='scrape_wait_time').value)
connection.close()
wait_seconds = wait_hours * 3600
current_time = datetime.now(timezone.utc)
# get list of current threading and their names
threads = enumerate()
# print(threads)
thread_list = []
worker_list = []
for thread in threads:
if thread.name == "vCenter_worker_thread":
worker_list.append(thread)
elif thread.name == "storage_worker_thread":
worker_list.append(thread)
elif thread.name == "backup_worker_thread":
worker_list.append(thread)
else:
thread_list.append(thread.name)
self.vcenter_worker = 0
self.storage_worker = 0
self.backup_worker = 0
for thread in worker_list:
if thread.name == "vCenter_worker_thread":
self.vcenter_worker += 1
elif thread.name == "storage_worker_thread":
self.storage_worker += 1
elif thread.name == "backup_worker_thread":
self.backup_worker += 1
while self.vcenter_worker < int(vcenter_threads.value):
VcenterWorker(self.vcenter_queue, self.vcenter_list)
self.vcenter_worker += 1
while self.storage_worker < int(storage_threads.value):
StorageWorker(self.storage_queue, self.storage_list)
self.storage_worker += 1
while self.backup_worker < int(backup_threads.value):
BackupWorker(self.backup_queue, self.backup_list)
self.backup_worker += 1
Processes\Vcenter.py (where VcenterWorker lives)
# Python imports:
from multiprocessing import Process
# Local imports:
from VMwareInventory.VMwareRest.VMwareRest import RESTVCenter
class VcenterWorker(Process):
def __init__(self, queue, vcenter_list):
Process.__init__(self)
self.queue = queue
self.list = vcenter_list
self.name = "vCenter_worker_process"
self.start()
def run(self):
while True:
vcenter = self.queue.get()
self.list.remove(vcenter)
self.vcscrape(vcenter.name, vcenter.user, vcenter.password)
self.queue.task_done()
@staticmethod
def vcscrape(name, user, pwd):
vc_scrape = RESTVCenter(name, user, pwd)
vc_scrape.join()
return