Union on two querysets mixes the data.

Hi. Below are my models and a view to display data from two models to me in one list. As the models have a lot in common I thought I would make a union and display the data. Unfortunately, a problem occurred with the display. It turns out that the data in the columns is mixed up as shown in the screenshot.

Any suggestions on how to fix this?
Best regards.

models.py

def validate_court_register_number(value: str):
    if not re.match(r"^\d{9}$|^\d{14}$", value):
        raise ValidationError(_("Numer KRS musi składać się z 9 lub 14 cyfr!"))


def validate_tax_identification_number(length: int, value: str):
    if len(value) != length:
        raise ValidationError(_("Długość numeru NIP jest niepoprawna!"))


def validate_allowed_chars(allowed_chars: str, value: str):
    if not set(value).issubset(allowed_chars):
        raise ValidationError(_("Podana wartość posiada nie niepoprawne znaki!"))


class LegalSubject(TimeStampedModel):
    address1 = models.CharField(_("Adres linia 1"), blank=True, null=True, max_length=255)
    address2 = models.CharField(_("Adres linia 2"), blank=True, null=True, max_length=255)
    tax_identification_number = models.CharField(
        _("NIP"),
        unique=True,
        blank=True,
        null=True,
        validators=[
            partial(validate_allowed_chars, string.digits),
            partial(validate_tax_identification_number, 10),
        ],
    )
    is_client = models.BooleanField(_("Mój klient"), default=False)
    notes = models.TextField(_("Notatki"))

    class Meta:
        abstract = True


class IndividualPerson(LegalSubject):
    first_name = models.CharField(_("Imię"), max_length=150)
    last_name = models.CharField(_("Nazwisko"), max_length=150)
    email = models.EmailField(_("Email"), blank=True, null=True)
    personal_identification_number = models.CharField(
        _("PESEL"),
        unique=True,
        blank=True,
        null=True,
        validators=[
            partial(validate_allowed_chars, string.digits),
            partial(validate_tax_identification_number, 11),
        ],
    )
    contacts = models.ManyToManyField("self", related_name="contacted_by", blank=True, verbose_name=_("Kontakty"))
    representatives = models.ManyToManyField(
        "self",
        related_name="represented_by",
        blank=True,
        verbose_name=_("Pełnomocnicy"),
    )
    company = models.ForeignKey(
        "LegalEntity",
        related_name="workers",
        blank=True,
        null=True,
        on_delete=models.SET_NULL,
        verbose_name=_("Firma"),
    )

    class Meta:
        ordering = ("last_name", "first_name")
        indexes = [
            models.Index(fields=["last_name"]),
            models.Index(fields=["first_name"]),
            models.Index(fields=["last_name", "first_name"]),
            models.Index(fields=["personal_identification_number"]),
        ]

    @property
    def name(self):
        return self.first_name + " " + self.last_name

    @classmethod
    def as_legal_subject(cls):
        return cls.objects.annotate(
            name=Concat("first_name", V(" "), "last_name"),
            national_court_register_number=V(None, output_field=models.CharField()),
            is_person=V(True, output_field=models.BooleanField()),
        )

    def __str__(self):
        return self.name


class LegalEntity(LegalSubject):
    name = models.CharField(_("Nazwa"), max_length=150)
    national_court_register_number = models.CharField(
        _("KRS"),
        unique=True,
        blank=True,
        null=True,
        validators=[partial(validate_allowed_chars, string.digits), validate_court_register_number],
    )
    contacts = models.ManyToManyField(
        "IndividualPerson",
        related_name="contacted_by",
        blank=True,
        verbose_name=_("Kontakty"),
    )
    representatives = models.ManyToManyField(
        "IndividualPerson",
        related_name="represented_by",
        blank=True,
        verbose_name=_("Pełnomocnicy"),
    )

    class Meta:
        ordering = ("name",)
        indexes = [
            models.Index(fields=["name"]),
            models.Index(fields=["national_court_register_number"]),
        ]

    @classmethod
    def as_legal_subject(cls):
        return cls.objects.annotate(
            first_name=V(None, output_field=models.CharField()),
            last_name=V(None, output_field=models.CharField()),
            personal_identification_number=V(None, output_field=models.CharField()),
            is_person=V(False, output_field=models.BooleanField()),
        )

    def __str__(self):
        return self.name

views.py

class LegalSubjectListView(LoginRequiredMixin, ListView):
    template_name = "tenant/legalsubject_list.html"
    paginate_by = 10
    ordering = ["created"]
    queryset_values = [
        "id",
        "name",
        "is_person",
        "address1",
        "address2",
        "tax_identification_number",
        "personal_identification_number",
        "national_court_register_number",
    ]

    def get_queryset(self):
        individual_persons = IndividualPerson.as_legal_subject().values(*self.queryset_values)
        legal_entities = LegalEntity.as_legal_subject().values(*self.queryset_values)
        combined_queryset = legal_entities.union(individual_persons).values(*self.queryset_values)
        return combined_queryset

Hi Draqun,

I tried to reproduce the code you provided. Had to do some adjustments (max_length was missing on some CharFields) and got rid of the ordering defined in Meta, as I was getting this error:

django.db.utils.DatabaseError: ORDER BY not allowed in subqueries of compound statements.

I then created one of both models and tried the query you do in your view. It produces this SQL:

SELECT "core_legalentity"."id" AS "col1",
       "core_legalentity"."name" AS "col2",
       "core_legalentity"."address1" AS "col3",
       "core_legalentity"."address2" AS "col4",
       "core_legalentity"."tax_identification_number" AS "col5",
       "core_legalentity"."national_court_register_number" AS "col6",
       False AS "is_person",
                NULL AS "personal_identification_number"
FROM "core_legalentity"
UNION
SELECT "core_individualperson"."id" AS "col1",
       "core_individualperson"."address1" AS "col2",
       "core_individualperson"."address2" AS "col3",
       "core_individualperson"."tax_identification_number" AS "col4",
       "core_individualperson"."personal_identification_number" AS "col5",
       (COALESCE("core_individualperson"."first_name",) || COALESCE((COALESCE(,) || COALESCE("core_individualperson"."last_name",)),)) AS "name",
       True AS "is_person",
               NULL AS "national_court_register_number"
FROM "core_individualperson"
LIMIT 21

The SQL seems ok, and the return for my dummy data too. Can you elaborate what exactly seems mixed up for you on your example?

Hi @wattux Thank you for a quick response. I actually forgot to specify max_length in a few places. I have already made the corrections. Thank you.
The data is mixed as I showed in the screenshot at the beginning of the post, The SQL query looks correct with me as well, but the ORM is mixing up the data. I should add that I’m using Python 3.13 and Django 5.1.5.

I’ve check everything once again. I see there is incorrect SQL generated by ORM and it looks lik the following way.

(
        SELECT "tenant_legalentity"."id" AS "col1",
               "tenant_legalentity"."register_number" AS "col2",
               "tenant_legalentity"."name" AS "col3",
               "tenant_legalentity"."address1" AS "col4",
               "tenant_legalentity"."address2" AS "col5",
               "tenant_legalentity"."tax_identification_number" AS "col6",
               "tenant_legalentity"."national_court_register_number" AS "col7",
               false AS "is_person",
               NULL AS "personal_identification_number"
          FROM "tenant_legalentity"
         ORDER BY "tenant_legalentity"."name" ASC
       )
 UNION (
        SELECT "tenant_individualperson"."id" AS "col1",
               "tenant_individualperson"."register_number" AS "col2",
               "tenant_individualperson"."address1" AS "col3",
               "tenant_individualperson"."address2" AS "col4",
               "tenant_individualperson"."tax_identification_number" AS "col5",
               "tenant_individualperson"."personal_identification_number" AS "col6",
               (COALESCE("tenant_individualperson"."first_name", '') || COALESCE((COALESCE(' ', '') || COALESCE("tenant_individualperson"."last_name", '')), '')) AS "name",
               true AS "is_person",
               NULL AS "national_court_register_number"
          FROM "tenant_individualperson"
         ORDER BY "tenant_individualperson"."last_name" ASC,
                  "tenant_individualperson"."first_name" ASC
       )
 ORDER BY "col3" ASC
 LIMIT 5

Any idea?

What is wrong from your point of view? That it uses for instance tenant_legalentity.name as col3 and also tenant_individualperson.address1 as col3 and now you got names and addrese in col3?

I would like to draw attention to the fact that calling the code

        combined_queryset = legal_entities.union(individual_persons).values(*self.queryset_values)

causes aliases to be added automatically and the aliases do not match the field names. For the first SELECT tax_identification_number has alias col5, for the second query col4. Any ideas on how to force union or prepare querysets to get this union to do the job correctly?

On the end the template code


                {% for legal_obj in page_obj %}
                    <tr class="table-default">
                        <td class="col w-auto">{{ legal_obj.register_number }}</td>
                        <td class="col w-auto">{{ legal_obj.name }}</td>
                        <td class="col w-auto">{{ legal_obj.address1 }}<br/>{{ legal_obj.address2 }}</td>
                        <td class="col w-auto">{{ legal_obj.tax_identification_number|default_if_none:"---" }}</td>
                        <td class="col w-auto">{{ legal_obj.personal_identification_number|default_if_none:"---" }}</td>
                        <td class="col w-auto">{{ legal_obj.national_court_register_number|default_if_none:"---" }}</td>
                    </tr>
                {% endfor %}

produces the following horror


As you can see the IndividualPerson class objects have a name field in the incorrect column.

Django < 5.2 unfortunately doesn’t respect the specified ordering of values(*select) when mixing field and annotations references.

No matter how hard you try the order will always be (*extra_fields, *fields, *annotations) so you’d have to exclusively use extra or annotate (no direct field references) if you want to completely control the order of SELECTed fields.

This is fixed in Django 5.2b1 by this PR (see ticket and exact commit) so I’d give a shot at installing the beta and reporting your findings.

2 Likes

Thank you very much. I am glad there is a fix. I can upgrade Django version in my project when it will be released. It is still under development so I just wait. Thank you once again. Best regards. Draqun