Union on two querysets mixes the data.

Hi. Below are my models and a view to display data from two models to me in one list. As the models have a lot in common I thought I would make a union and display the data. Unfortunately, a problem occurred with the display. It turns out that the data in the columns is mixed up as shown in the screenshot.

Any suggestions on how to fix this?
Best regards.

models.py

def validate_court_register_number(value: str):
    if not re.match(r"^\d{9}$|^\d{14}$", value):
        raise ValidationError(_("Numer KRS musi składać się z 9 lub 14 cyfr!"))


def validate_tax_identification_number(length: int, value: str):
    if len(value) != length:
        raise ValidationError(_("Długość numeru NIP jest niepoprawna!"))


def validate_allowed_chars(allowed_chars: str, value: str):
    if not set(value).issubset(allowed_chars):
        raise ValidationError(_("Podana wartość posiada nie niepoprawne znaki!"))


class LegalSubject(TimeStampedModel):
    address1 = models.CharField(_("Adres linia 1"), blank=True, null=True, max_length=255)
    address2 = models.CharField(_("Adres linia 2"), blank=True, null=True, max_length=255)
    tax_identification_number = models.CharField(
        _("NIP"),
        unique=True,
        blank=True,
        null=True,
        validators=[
            partial(validate_allowed_chars, string.digits),
            partial(validate_tax_identification_number, 10),
        ],
    )
    is_client = models.BooleanField(_("Mój klient"), default=False)
    notes = models.TextField(_("Notatki"))

    class Meta:
        abstract = True


class IndividualPerson(LegalSubject):
    first_name = models.CharField(_("Imię"), max_length=150)
    last_name = models.CharField(_("Nazwisko"), max_length=150)
    email = models.EmailField(_("Email"), blank=True, null=True)
    personal_identification_number = models.CharField(
        _("PESEL"),
        unique=True,
        blank=True,
        null=True,
        validators=[
            partial(validate_allowed_chars, string.digits),
            partial(validate_tax_identification_number, 11),
        ],
    )
    contacts = models.ManyToManyField("self", related_name="contacted_by", blank=True, verbose_name=_("Kontakty"))
    representatives = models.ManyToManyField(
        "self",
        related_name="represented_by",
        blank=True,
        verbose_name=_("Pełnomocnicy"),
    )
    company = models.ForeignKey(
        "LegalEntity",
        related_name="workers",
        blank=True,
        null=True,
        on_delete=models.SET_NULL,
        verbose_name=_("Firma"),
    )

    class Meta:
        ordering = ("last_name", "first_name")
        indexes = [
            models.Index(fields=["last_name"]),
            models.Index(fields=["first_name"]),
            models.Index(fields=["last_name", "first_name"]),
            models.Index(fields=["personal_identification_number"]),
        ]

    @property
    def name(self):
        return self.first_name + " " + self.last_name

    @classmethod
    def as_legal_subject(cls):
        return cls.objects.annotate(
            name=Concat("first_name", V(" "), "last_name"),
            national_court_register_number=V(None, output_field=models.CharField()),
            is_person=V(True, output_field=models.BooleanField()),
        )

    def __str__(self):
        return self.name


class LegalEntity(LegalSubject):
    name = models.CharField(_("Nazwa"), max_length=150)
    national_court_register_number = models.CharField(
        _("KRS"),
        unique=True,
        blank=True,
        null=True,
        validators=[partial(validate_allowed_chars, string.digits), validate_court_register_number],
    )
    contacts = models.ManyToManyField(
        "IndividualPerson",
        related_name="contacted_by",
        blank=True,
        verbose_name=_("Kontakty"),
    )
    representatives = models.ManyToManyField(
        "IndividualPerson",
        related_name="represented_by",
        blank=True,
        verbose_name=_("Pełnomocnicy"),
    )

    class Meta:
        ordering = ("name",)
        indexes = [
            models.Index(fields=["name"]),
            models.Index(fields=["national_court_register_number"]),
        ]

    @classmethod
    def as_legal_subject(cls):
        return cls.objects.annotate(
            first_name=V(None, output_field=models.CharField()),
            last_name=V(None, output_field=models.CharField()),
            personal_identification_number=V(None, output_field=models.CharField()),
            is_person=V(False, output_field=models.BooleanField()),
        )

    def __str__(self):
        return self.name

views.py

class LegalSubjectListView(LoginRequiredMixin, ListView):
    template_name = "tenant/legalsubject_list.html"
    paginate_by = 10
    ordering = ["created"]
    queryset_values = [
        "id",
        "name",
        "is_person",
        "address1",
        "address2",
        "tax_identification_number",
        "personal_identification_number",
        "national_court_register_number",
    ]

    def get_queryset(self):
        individual_persons = IndividualPerson.as_legal_subject().values(*self.queryset_values)
        legal_entities = LegalEntity.as_legal_subject().values(*self.queryset_values)
        combined_queryset = legal_entities.union(individual_persons).values(*self.queryset_values)
        return combined_queryset

Hi Draqun,

I tried to reproduce the code you provided. Had to do some adjustments (max_length was missing on some CharFields) and got rid of the ordering defined in Meta, as I was getting this error:

django.db.utils.DatabaseError: ORDER BY not allowed in subqueries of compound statements.

I then created one of both models and tried the query you do in your view. It produces this SQL:

SELECT "core_legalentity"."id" AS "col1",
       "core_legalentity"."name" AS "col2",
       "core_legalentity"."address1" AS "col3",
       "core_legalentity"."address2" AS "col4",
       "core_legalentity"."tax_identification_number" AS "col5",
       "core_legalentity"."national_court_register_number" AS "col6",
       False AS "is_person",
                NULL AS "personal_identification_number"
FROM "core_legalentity"
UNION
SELECT "core_individualperson"."id" AS "col1",
       "core_individualperson"."address1" AS "col2",
       "core_individualperson"."address2" AS "col3",
       "core_individualperson"."tax_identification_number" AS "col4",
       "core_individualperson"."personal_identification_number" AS "col5",
       (COALESCE("core_individualperson"."first_name",) || COALESCE((COALESCE(,) || COALESCE("core_individualperson"."last_name",)),)) AS "name",
       True AS "is_person",
               NULL AS "national_court_register_number"
FROM "core_individualperson"
LIMIT 21

The SQL seems ok, and the return for my dummy data too. Can you elaborate what exactly seems mixed up for you on your example?

Hi @wattux Thank you for a quick response. I actually forgot to specify max_length in a few places. I have already made the corrections. Thank you.
The data is mixed as I showed in the screenshot at the beginning of the post, The SQL query looks correct with me as well, but the ORM is mixing up the data. I should add that I’m using Python 3.13 and Django 5.1.5.