diff --git a/.gitignore b/.gitignore index 18efddd..b56eb2a 100644 --- a/.gitignore +++ b/.gitignore @@ -5,13 +5,18 @@ .project .pydevproject .settings -pprep.py + +mypoly.py + +tmp +libraries-local + pushgit pushhg pushreg -mypoly.py -tmp -poly2.py -libraries-local -README.html + +ppreadme.py +ppdocs.py +README.html +DOCS.html diff --git a/DOCS.rst b/DOCS.rst index a10c557..93b168a 100644 --- a/DOCS.rst +++ b/DOCS.rst @@ -1,22 +1,5 @@ -=============================== -Fully Polymorphic Django Models -=============================== - - -'polymorphic.py' is an add-on module that adds automatic -polymorphism to the Django model inheritance system. - -The effect is: For enabled models, objects retrieved from the -database are always delivered just as they were created and saved, -with the same type/class and fields - regardless how they are -retrieved. The resulting querysets are polymorphic, i.e. may deliver -objects of several different types in a single query result. - -Please see the examples below as they demonstrate this best. - -Please note that this module is still very experimental. See below for -current restrictions, caveats, and performance implications. - +.. contents:: Table of Contents + :depth: 1 Installation / Testing ====================== @@ -54,8 +37,8 @@ views.py files live). Defining Polymorphic Models =========================== -To make models polymorphic, use PolymorphicModel instead of Django's -models.Model as the superclass of your base model. All models +To make models polymorphic, use ``PolymorphicModel`` instead of Django's +``models.Model`` as the superclass of your base model. All models inheriting from your base class will be polymorphic as well:: from polymorphic import PolymorphicModel @@ -110,9 +93,9 @@ Polymorphic filtering (for fields in derived classes) For example, cherrypicking objects from multiple derived classes anywhere in the inheritance tree, using Q objects (with the - slightly enhanced syntax: exact model name + three _ + field name): + syntax: ``exact model name + three _ + field name``): - >>> ModelA.objects.filter( Q( ModelB___field2 = 'B2' ) | Q( ModelC___field3 = 'C3' ) ) + >>> ModelA.objects.filter( Q(ModelB___field2 = 'B2') | Q(ModelC___field3 = 'C3') ) . [ , ] @@ -163,7 +146,7 @@ ManyToManyField, ForeignKey, OneToOneField field1 = OneToOneField(ModelA) - then field1 may now also refer to objects of type ModelB or ModelC. + then field1 may now also refer to objects of type ``ModelB`` or ``ModelC``. A ManyToManyField example:: @@ -202,7 +185,7 @@ Using a Custom Manager ---------------------- For creating a custom polymorphic manager class, derive your manager -from PolymorphicManager instead of models.Manager. In your model +from ``PolymorphicManager`` instead of ``models.Manager``. In your model class, explicitly add the default manager first, and then your custom manager:: @@ -217,7 +200,7 @@ custom manager:: The first manager defined ('objects' in the example) is used by Django as automatic manager for several purposes, including accessing related objects. It must not filter objects and it's safest to use -the plain PolymorphicManager here. +the plain ``PolymorphicManager`` here. Manager Inheritance / Propagation --------------------------------- @@ -230,7 +213,7 @@ An example (inheriting from MyModel above):: class MyModel2(MyModel): pass - # Managers inherited from MyModel, delivering MyModel2 objects (including MyModel2 subclass objects) + # Managers inherited from MyModel, delivering MyModel2 (and subclass) objects >>> MyModel2.objects.all() >>> MyModel2.ordered_objects.all() @@ -242,7 +225,7 @@ managers are always fully propagated from all polymorphic base models Using a Custom Queryset Class ----------------------------- -The PolymorphicManager class accepts one initialization argument, +The ``PolymorphicManager`` class accepts one initialization argument, which is the queryset class the manager should use. A custom custom queryset class can be defined and used like this:: @@ -259,51 +242,51 @@ Performance Considerations ========================== The current implementation is pretty simple and does not use any -custom sql - it is purely based on the Django ORM. Right now the +custom SQL - it is purely based on the Django ORM. Right now the query :: result_objects = list( ModelA.objects.filter(...) ) -performs one sql query to retrieve ModelA objects and one additional +performs one SQL query to retrieve ``ModelA`` objects and one additional query for each unique derived class occurring in result_objects. The best case for retrieving 100 objects is 1 db query if all are -class ModelA. If 50 objects are ModelA and 50 are ModelB, then two +class ``ModelA``. If 50 objects are ``ModelA`` and 50 are ModelB, then two queries are executed. If result_objects contains only the base model -type (ModelA), the polymorphic models are just as efficient as plain +type (``ModelA``), the polymorphic models are just as efficient as plain Django models (in terms of executed queries). The pathological worst case is 101 db queries if result_objects contains 100 different -object types (with all of them subclasses of ModelA). +object types (with all of them subclasses of ``ModelA``). Performance ist relative: when Django users create their own -polymorphic ad-hoc solution (without a module like polymorphic.py), +polymorphic ad-hoc solution (without a module like ``polymorphic.py``), this usually results in a variation of :: result_objects = [ o.get_real_instance() for o in BaseModel.objects.filter(...) ] which of has really bad performance. Relative to this, the -performance of the current polymorphic.py is pretty good. +performance of the current ``polymorphic.py`` is pretty good. It may well be efficient enough for the majority of use cases. Chunking: The implementation always requests objects in chunks of -size Polymorphic_QuerySet_objects_per_request. This limits the +size ``Polymorphic_QuerySet_objects_per_request``. This limits the complexity/duration for each query, including the pathological cases. Possible Optimizations ====================== -PolymorphicQuerySet can be optimized to require only one SQL query +``PolymorphicQuerySet`` can be optimized to require only one SQL query for the queryset evaluation and retrieval of all objects. Basically, what ist needed is a possibility to pull in the fields -from all relevant sub-models with one sql query. However, some deeper +from all relevant sub-models with one SQL query. However, some deeper digging into the Django database layer will be required in order to make this happen. -A viable option might be to get the sql query from the QuerySet -(probably from django.db.models.sql.compiler.SQLCompiler.as_sql), +A viable option might be to get the SQL query from the QuerySet +(probably from ``django.db.models.SQL.compiler.SQLCompiler.as_sql``), making sure that all necessary joins are done, and then doing a -custom SQL request from there (like in SQLCompiler.execute_sql). +custom SQL request from there (like in ``SQLCompiler.execute_sql``). An optimized version could fall back to the current ORM-only implementation for all non-SQL databases. @@ -311,9 +294,9 @@ implementation for all non-SQL databases. SQL Complexity -------------- -With only one sql query, one sql join for each possible subclass -would be needed (BaseModel.__subclasses__(), recursively). -With two sql queries, the number of joins could be reduced to the +With only one SQL query, one SQL join for each possible subclass +would be needed (``BaseModel.__subclasses__()``, recursively). +With two SQL queries, the number of joins could be reduced to the number of actuallly occurring subclasses in the result. A final implementation might want to use one query only if the number of possible subclasses (and therefore joins) is not too large, and @@ -328,47 +311,55 @@ will actually be involved in typical use cases - the total number of classes in the inheritance tree as well as the number of distinct classes in query results. It may well turn out that the increased number of joins is no problem for the DBMS in all realistic use -cases. Alternatively, if the sql query execution time is +cases. Alternatively, if the SQL query execution time is significantly longer even in common use cases, this may still be acceptable in exchange for the added functionality. -Let's not forget that all of the above is just about optimizations. -The current simplistic implementation already works well - perhaps -well enough for the majority of applications. +General +------------------- + +Let's not forget that all of the above is just about optimization. +The current implementation already works well - and perhaps well +enough for the majority of applications. + +Also, it seems that further optimization (down to one DB request) +would be restricted to a small area of the code, straightforward +to implement, and mostly independent from the rest of the module. +So this optimization can be done at any later time (like when +it's needed). -Loose Ends -========== +Unsupported Methods, Restrictions & Caveats +=========================================== Currently Unsupported Queryset Methods -------------------------------------- -+ aggregate() probably makes only sense in a purely non-OO/relational ++ ``aggregate()`` probably makes only sense in a purely non-OO/relational way. So it seems an implementation would just fall back to the Django vanilla equivalent. -+ annotate(): The current '_get_real_instances' would need minor ++ ``annotate()``: The current '_get_real_instances' would need minor enhancement. -+ defer() and only(): Full support, including slight polymorphism ++ ``defer()`` and ``only()``: Full support, including slight polymorphism enhancements, seems to be straighforward (depends on '_get_real_instances'). -+ extra(): Does not really work with the current implementation of ++ ``extra()``: Does not really work with the current implementation of '_get_real_instances'. It's unclear if it should be supported. -+ select_related(): This would probably need Django core support ++ ``select_related()``: This would probably need Django core support for traversing the reverse model inheritance OneToOne relations with Django's select_related(), e.g.: - *select_related('modela__modelb__foreignkeyfield')*. + ``select_related('modela__modelb__foreignkeyfield')``. Also needs more thought/investigation. -+ distinct() needs more thought and investigation as well ++ ``distinct()`` needs more thought and investigation as well -+ values() & values_list(): Implementation seems to be mostly ++ ``values()`` & ``values_list()``: Implementation seems to be mostly straighforward - Restrictions & Caveats ---------------------- @@ -380,12 +371,11 @@ Restrictions & Caveats by subclassing it instead of modifying Django core (as we do here with PolymorphicModel). -+ The name and appname of the leaf model is stored in the base model - (the base model directly inheriting from PolymorphicModel). - If a model or an app is renamed, then these fields need to be - corrected too, if the db content should stay usable after the rename. - Aside from this, these two fields should probably be combined into - one field (more db/sql efficiency) ++ A reference (``ContentType``) to the real/leaf model is stored + in the base model (the base model directly inheriting from + PolymorphicModel). If a model or an app is renamed, then Django's + ContentType table needs to be corrected too, if the db content + should stay usable after the rename. + For all objects that are not instances of the base class type, but instances of a subclass, the base class fields are currently @@ -396,9 +386,9 @@ Restrictions & Caveats fields (like basemodel_ptr), as well as implicit model inheritance forward relation fields, Django internally tries to use our polymorphic manager/queryset in some places, which of course it - should not. Currently this is solved with hackish __getattribute__ - in PolymorphicModel. A minor patch to Django core would probably - get rid of that. + should not. Currently this is solved with a hacky __getattribute__ + in PolymorphicModel, which causes some overhead. A minor patch t + Django core would probably get rid of that. In General ---------- diff --git a/README.rst b/README.rst index 081f60c..55e98ba 100644 --- a/README.rst +++ b/README.rst @@ -1,47 +1,55 @@ -=============================== -Fully Polymorphic Django Models -=============================== - -News ----- - -* 2010-1-26: IMPORTANT - database schema change (more info in change log). +**2010-1-26** + IMPORTANT - database schema change (more info in change log). I hope I got this change in early enough before anyone started to use polymorphic.py in earnest. Sorry for any inconvenience. - This should be the final DB schema now! + This should be the final DB schema now. + + +Usage, Examples, Installation & Documentation, Links +---------------------------------------------------- + +* Documentation_ and Overview_ +* `Discussion, Questions, Suggestions`_ +* GitHub_ - Bitbucket_ - `Download as TGZ`_ or ZIP_ + +.. _Documentation: http://bserve.webhop.org/wiki/django_polymorphic/doc +.. _Discussion, Questions, Suggestions: http://django-polymorphic.blogspot.com/2010/01/messages.html +.. _GitHub: http://github.com/bconstantin/django_polymorphic +.. _Bitbucket: http://bitbucket.org/bconstantin/django_polymorphic +.. _Download as TGZ: http://github.com/bconstantin/django_polymorphic/tarball/master +.. _ZIP: http://github.com/bconstantin/django_polymorphic/zipball/master +.. _Overview: http://bserve.webhop.org/wiki/django_polymorphic What is django_polymorphic good for? ------------------------------------ -If ``ArtProject`` and ``ResearchProject`` inherit from the model ``Project``:: +If ``ArtProject`` and ``ResearchProject`` inherit from the model ``Project``: - >>> Project.objects.all() - . - [ , - , - ] +>>> Project.objects.all() +. +[ , + , + ] -In general, objects retrieved from the database are always delivered just as -they were created and saved, with the same type/class and fields. It doesn't -matter how you access these objects: be it through the model's own -managers/querysets, ForeignKey, ManyToMany or OneToOne fields. +In general, objects retrieved from the database are always returned back +with the same type/class and fields they were created and saved with. +It doesn't matter how these objects are retrieved: be it through the +model's own managers/querysets, ForeignKeys, ManyToManyFields +or OneToOneFields. -The resulting querysets are polymorphic, and may deliver +The resulting querysets are polymorphic, i.e they may deliver objects of several different types in a single query result. ``django_polymorphic`` consists of just one add-on module, ``polymorphic.py``, -that adds this kind of automatic polymorphism to Django's model -inheritance system (for models that request this behaviour). +that adds this functionality to Django's model inheritance system +(for models that request this behaviour). -Please see additional examples and the documentation here: - - http://bserve.webhop.org/wiki/django_polymorphic - -or in the DOCS.rst file in this repository. Status ------ This module is still very experimental. Please see the docs for current restrictions, caveats, and performance implications. + + diff --git a/poly/polymorphic.py b/poly/polymorphic.py index fca7b02..e4610ae 100644 --- a/poly/polymorphic.py +++ b/poly/polymorphic.py @@ -10,8 +10,7 @@ Please see the examples and documentation here: or in the included README.rst and DOCS.rst files. Copyright: -This code and affiliated files are (C) by -Bert Constantin and the individual contributors. +This code and affiliated files are (C) by Bert Constantin and individual contributors. Please see LICENSE and AUTHORS for more information. """ @@ -21,6 +20,7 @@ from django.db.models.query import QuerySet from collections import defaultdict from pprint import pprint from django.contrib.contenttypes.models import ContentType +import sys # chunk-size: maximum number of objects requested per db-request # by the polymorphic queryset.iterator() implementation @@ -48,7 +48,7 @@ class PolymorphicManager(models.Manager): return self.queryset_class(self.model) # Proxy all unknown method calls to the queryset, so that its members are - # directly accessible from PolymorphicModel.objects. + # directly accessible as PolymorphicModel.objects.* # The advantage is that not yet known member functions of derived querysets will be proxied as well. # We exclude any special functions (__) from this automatic proxying. def __getattr__(self, name): @@ -109,8 +109,8 @@ class PolymorphicQuerySet(QuerySet): ordered_id_list = [] # list of ids of result-objects in correct order results = {} # polymorphic dict of result-objects, keyed with their id (no order) - # dict contains one entry for the different model types occurring in result, - # in the format idlist_per_model['applabel.modelname']=[list-of-ids-for-this-model] + # dict contains one entry per unique model type occurring in result, + # in the format idlist_per_model[modelclass]=[list-of-object-ids] idlist_per_model = defaultdict(list) # - sort base_result_object ids into idlist_per_model lists, depending on their real class; @@ -307,7 +307,9 @@ def _translate_polymorphic_field_path(queryset_model, field_path): if issubclass(model, models.Model) and model != models.Model: # model name is occurring twice in submodel inheritance tree => Error if model.__name__ in result and model != result[model.__name__]: - assert model, 'PolymorphicModel: model name is ambiguous: %s.%s, %s.%s!' % ( + e = 'PolymorphicModel: model name alone is ambiguous: %s.%s and %s.%s!\n' + e += 'In this case, please use the syntax: applabel__ModelName___field' + assert model, e % ( model._meta.app_label, model.__name__, result[model.__name__]._meta.app_label, result[model.__name__].__name__) @@ -451,6 +453,7 @@ class PolymorphicModelBase(ModelBase): add_managers_keys.add(key) return add_managers + @classmethod def get_first_user_defined_manager(self, attrs): mgr_list = [] @@ -462,7 +465,7 @@ class PolymorphicModelBase(ModelBase): _, manager = sorted(mgr_list)[0] return manager return None - + @classmethod def validate_model_manager(self, manager, model_name, manager_name): """check if the manager is derived from PolymorphicManager @@ -617,7 +620,7 @@ class ShowFields(object): else: out += ': "' + getattr(self, f.name) + '"' if f != last: out += ', ' - return '<' + (self.__class__.__name__ + ': ').ljust(17) + out + '>' + return '<' + (self.__class__.__name__ + ': ') + out + '>' class ShowFieldsAndTypes(object):