diff --git a/DOCSTRING_STYLE.md b/DOCSTRING_STYLE.md
new file mode 100644
index 000000000..77b6dc90a
--- /dev/null
+++ b/DOCSTRING_STYLE.md
@@ -0,0 +1,499 @@
+# DataJoint Python Docstring Style Guide
+
+This document defines the canonical docstring format for datajoint-python.
+All public APIs must follow this NumPy-style format for consistency and
+automated documentation generation via mkdocstrings.
+
+## Quick Reference
+
+```python
+def function(param1, param2, *, keyword_only=None):
+ """
+ Short one-line summary (imperative mood, no period).
+
+ Extended description providing context and details. May span
+ multiple lines. Explain what the function does, not how.
+
+ Parameters
+ ----------
+ param1 : type
+ Description of param1.
+ param2 : type
+ Description of param2.
+ keyword_only : type, optional
+ Description. Default is None.
+
+ Returns
+ -------
+ type
+ Description of return value.
+
+ Raises
+ ------
+ ExceptionType
+ When and why this exception is raised.
+
+ Examples
+ --------
+ >>> result = function("value", 42)
+ >>> print(result)
+ expected_output
+
+ See Also
+ --------
+ related_function : Brief description.
+
+ Notes
+ -----
+ Additional technical notes, algorithms, or implementation details.
+ """
+```
+
+---
+
+## Module Docstrings
+
+Every module must begin with a docstring explaining its purpose.
+
+```python
+"""
+Connection management for DataJoint.
+
+This module provides the Connection class that manages database connections,
+transaction handling, and query execution. It also provides the ``conn()``
+function for accessing a persistent shared connection.
+
+Key Components
+--------------
+Connection : class
+ Manages a single database connection with transaction support.
+conn : function
+ Returns a persistent connection object shared across modules.
+
+Example
+-------
+>>> import datajoint as dj
+>>> connection = dj.conn()
+>>> connection.query("SHOW DATABASES")
+"""
+```
+
+---
+
+## Class Docstrings
+
+```python
+class Table(QueryExpression):
+ """
+ Base class for all DataJoint tables.
+
+ Table implements data manipulation (insert, delete, update) and inherits
+ query functionality from QueryExpression. Concrete table classes must
+ define the ``definition`` property specifying the table structure.
+
+ Parameters
+ ----------
+ None
+ Tables are typically instantiated via schema decoration, not directly.
+
+ Attributes
+ ----------
+ definition : str
+ DataJoint table definition string (DDL).
+ primary_key : list of str
+ Names of primary key attributes.
+ heading : Heading
+ Table heading with attribute metadata.
+
+ Examples
+ --------
+ Define a table using the schema decorator:
+
+ >>> @schema
+ ... class Mouse(dj.Manual):
+ ... definition = '''
+ ... mouse_id : int
+ ... ---
+ ... dob : date
+ ... sex : enum("M", "F", "U")
+ ... '''
+
+ Insert data:
+
+ >>> Mouse.insert1({"mouse_id": 1, "dob": "2024-01-15", "sex": "M"})
+
+ See Also
+ --------
+ Manual : Table for manually entered data.
+ Computed : Table for computed results.
+ QueryExpression : Query operator base class.
+ """
+```
+
+---
+
+## Method Docstrings
+
+### Standard Method
+
+```python
+def insert(self, rows, *, replace=False, skip_duplicates=False, ignore_extra_fields=False):
+ """
+ Insert one or more rows into the table.
+
+ Parameters
+ ----------
+ rows : iterable
+ Rows to insert. Each row can be:
+ - dict: ``{"attr": value, ...}``
+ - numpy.void: Record array element
+ - sequence: Values in heading order
+ - QueryExpression: Results of a query
+ - pathlib.Path: Path to CSV file
+ replace : bool, optional
+ If True, replace existing rows with matching primary keys.
+ Default is False.
+ skip_duplicates : bool, optional
+ If True, silently skip rows that would cause duplicate key errors.
+ Default is False.
+ ignore_extra_fields : bool, optional
+ If True, ignore fields not in the table heading.
+ Default is False.
+
+ Returns
+ -------
+ None
+
+ Raises
+ ------
+ DuplicateError
+ When inserting a row with an existing primary key and neither
+ ``replace`` nor ``skip_duplicates`` is True.
+ DataJointError
+ When required attributes are missing or types are incompatible.
+
+ Examples
+ --------
+ Insert a single row:
+
+ >>> Mouse.insert1({"mouse_id": 1, "dob": "2024-01-15", "sex": "M"})
+
+ Insert multiple rows:
+
+ >>> Mouse.insert([
+ ... {"mouse_id": 2, "dob": "2024-02-01", "sex": "F"},
+ ... {"mouse_id": 3, "dob": "2024-02-15", "sex": "M"},
+ ... ])
+
+ Insert from a query:
+
+ >>> TargetTable.insert(SourceTable & "condition > 5")
+
+ See Also
+ --------
+ insert1 : Insert exactly one row.
+ """
+```
+
+### Method with Complex Return
+
+```python
+def fetch(self, *attrs, offset=None, limit=None, order_by=None, format=None, as_dict=False):
+ """
+ Retrieve data from the table.
+
+ Parameters
+ ----------
+ *attrs : str
+ Attribute names to fetch. If empty, fetches all attributes.
+ Use "KEY" to fetch primary key as dict.
+ offset : int, optional
+ Number of rows to skip. Default is None (no offset).
+ limit : int, optional
+ Maximum number of rows to return. Default is None (no limit).
+ order_by : str or list of str, optional
+ Attribute(s) to sort by. Use "KEY" for primary key order,
+ append " DESC" for descending. Default is None (unordered).
+ format : {"array", "frame"}, optional
+ Output format when fetching all attributes:
+ - "array": numpy structured array (default)
+ - "frame": pandas DataFrame
+ as_dict : bool, optional
+ If True, return list of dicts instead of structured array.
+ Default is False.
+
+ Returns
+ -------
+ numpy.ndarray or list of dict or pandas.DataFrame
+ Query results in the requested format:
+ - Single attribute: 1D array of values
+ - Multiple attributes: tuple of 1D arrays
+ - No attributes specified: structured array, DataFrame, or list of dicts
+
+ Examples
+ --------
+ Fetch all data as structured array:
+
+ >>> data = Mouse.fetch()
+
+ Fetch specific attributes:
+
+ >>> ids, dobs = Mouse.fetch("mouse_id", "dob")
+
+ Fetch as list of dicts:
+
+ >>> rows = Mouse.fetch(as_dict=True)
+ >>> for row in rows:
+ ... print(row["mouse_id"])
+
+ Fetch with ordering and limit:
+
+ >>> recent = Mouse.fetch(order_by="dob DESC", limit=10)
+
+ See Also
+ --------
+ fetch1 : Fetch exactly one row.
+ head : Fetch first N rows ordered by key.
+ tail : Fetch last N rows ordered by key.
+ """
+```
+
+### Generator Method
+
+```python
+def make(self, key):
+ """
+ Compute and insert results for one key.
+
+ This method must be implemented by subclasses of Computed or Imported
+ tables. It is called by ``populate()`` for each key in ``key_source``
+ that is not yet in the table.
+
+ The method can be implemented in two ways:
+
+ **Simple mode** (regular method):
+ Fetch, compute, and insert within a single transaction.
+
+ **Tripartite mode** (generator method):
+ Split into ``make_fetch``, ``make_compute``, ``make_insert`` for
+ long-running computations with deferred transactions.
+
+ Parameters
+ ----------
+ key : dict
+ Primary key values identifying the entity to compute.
+
+ Yields
+ ------
+ tuple
+ In tripartite mode, yields fetched data and computed results.
+
+ Raises
+ ------
+ NotImplementedError
+ If neither ``make`` nor the tripartite methods are implemented.
+
+ Examples
+ --------
+ Simple implementation:
+
+ >>> class ProcessedData(dj.Computed):
+ ... definition = '''
+ ... -> RawData
+ ... ---
+ ... result : float
+ ... '''
+ ...
+ ... def make(self, key):
+ ... raw = (RawData & key).fetch1("data")
+ ... result = expensive_computation(raw)
+ ... self.insert1({**key, "result": result})
+
+ See Also
+ --------
+ populate : Execute make for all pending keys.
+ key_source : Query defining keys to populate.
+ """
+```
+
+---
+
+## Property Docstrings
+
+```python
+@property
+def primary_key(self):
+ """
+ list of str : Names of primary key attributes.
+
+ The primary key uniquely identifies each row in the table.
+ Derived from the table definition.
+
+ Examples
+ --------
+ >>> Mouse.primary_key
+ ['mouse_id']
+ """
+ return self.heading.primary_key
+```
+
+---
+
+## Parameter Types
+
+Use these type annotations in docstrings:
+
+| Python Type | Docstring Format |
+|-------------|------------------|
+| `str` | `str` |
+| `int` | `int` |
+| `float` | `float` |
+| `bool` | `bool` |
+| `None` | `None` |
+| `list` | `list` or `list of str` |
+| `dict` | `dict` or `dict[str, int]` |
+| `tuple` | `tuple` or `tuple of (str, int)` |
+| Optional | `str or None` or `str, optional` |
+| Union | `str or int` |
+| Literal | `{"option1", "option2"}` |
+| Callable | `callable` |
+| Class | `ClassName` |
+| Any | `object` |
+
+---
+
+## Section Order
+
+Sections must appear in this order (include only relevant sections):
+
+1. **Short Summary** (required) - One line, imperative mood
+2. **Deprecation Warning** - If applicable
+3. **Extended Summary** - Additional context
+4. **Parameters** - Input arguments
+5. **Returns** / **Yields** - Output values
+6. **Raises** - Exceptions
+7. **Warns** - Warnings issued
+8. **See Also** - Related functions/classes
+9. **Notes** - Technical details
+10. **References** - Citations
+11. **Examples** (strongly encouraged) - Usage demonstrations
+
+---
+
+## Style Rules
+
+### Do
+
+- Use imperative mood: "Insert rows" not "Inserts rows"
+- Start with capital letter, no period at end of summary
+- Document all public methods
+- Include at least one example for public APIs
+- Use backticks for code: ``parameter``, ``ClassName``
+- Reference related items in See Also
+
+### Don't
+
+- Don't document private methods extensively (brief is fine)
+- Don't repeat the function signature in the description
+- Don't use "This function..." or "This method..."
+- Don't include implementation details in Parameters
+- Don't use first person ("I", "we")
+
+---
+
+## Examples Section Best Practices
+
+```python
+"""
+Examples
+--------
+Basic usage:
+
+>>> table.insert1({"id": 1, "value": 42})
+
+With options:
+
+>>> table.insert(rows, skip_duplicates=True)
+
+Error handling:
+
+>>> try:
+... table.insert1({"id": 1}) # duplicate
+... except dj.errors.DuplicateError:
+... print("Already exists")
+Already exists
+"""
+```
+
+---
+
+## Converting from Sphinx Style
+
+Replace Sphinx-style docstrings:
+
+```python
+# Before (Sphinx style)
+def method(self, param1, param2):
+ """
+ Brief description.
+
+ :param param1: Description of param1.
+ :type param1: str
+ :param param2: Description of param2.
+ :type param2: int
+ :returns: Description of return value.
+ :rtype: bool
+ :raises ValueError: When param1 is empty.
+ """
+
+# After (NumPy style)
+def method(self, param1, param2):
+ """
+ Brief description.
+
+ Parameters
+ ----------
+ param1 : str
+ Description of param1.
+ param2 : int
+ Description of param2.
+
+ Returns
+ -------
+ bool
+ Description of return value.
+
+ Raises
+ ------
+ ValueError
+ When param1 is empty.
+ """
+```
+
+---
+
+## Validation
+
+Docstrings are validated by:
+
+1. **mkdocstrings** - Parses for API documentation
+2. **ruff** - Linting (D100-D417 rules when enabled)
+3. **pytest --doctest-modules** - Executes examples
+
+Run locally:
+
+```bash
+# Build docs to check parsing
+mkdocs build --config-file docs/mkdocs.yaml
+
+# Check docstring examples
+pytest --doctest-modules src/datajoint/
+```
+
+---
+
+## References
+
+- [NumPy Docstring Guide](https://numpydoc.readthedocs.io/en/latest/format.html)
+- [mkdocstrings Python Handler](https://mkdocstrings.github.io/python/)
+- [PEP 257 - Docstring Conventions](https://peps.python.org/pep-0257/)
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 000000000..3f8b99424
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,190 @@
+ Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+ 1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to the Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+ 2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+ 3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+ 4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+ 6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+ 7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+ 8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+ 9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+ END OF TERMS AND CONDITIONS
+
+ Copyright 2014-2026 DataJoint Inc. and contributors
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
diff --git a/LICENSE.txt b/LICENSE.txt
deleted file mode 100644
index 90f4edaaa..000000000
--- a/LICENSE.txt
+++ /dev/null
@@ -1,504 +0,0 @@
- GNU LESSER GENERAL PUBLIC LICENSE
- Version 2.1, February 1999
-
- Copyright (C) 1991, 1999 Free Software Foundation, Inc.
- 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- Everyone is permitted to copy and distribute verbatim copies
- of this license document, but changing it is not allowed.
-
-(This is the first released version of the Lesser GPL. It also counts
- as the successor of the GNU Library Public License, version 2, hence
- the version number 2.1.)
-
- Preamble
-
- The licenses for most software are designed to take away your
-freedom to share and change it. By contrast, the GNU General Public
-Licenses are intended to guarantee your freedom to share and change
-free software--to make sure the software is free for all its users.
-
- This license, the Lesser General Public License, applies to some
-specially designated software packages--typically libraries--of the
-Free Software Foundation and other authors who decide to use it. You
-can use it too, but we suggest you first think carefully about whether
-this license or the ordinary General Public License is the better
-strategy to use in any particular case, based on the explanations below.
-
- When we speak of free software, we are referring to freedom of use,
-not price. Our General Public Licenses are designed to make sure that
-you have the freedom to distribute copies of free software (and charge
-for this service if you wish); that you receive source code or can get
-it if you want it; that you can change the software and use pieces of
-it in new free programs; and that you are informed that you can do
-these things.
-
- To protect your rights, we need to make restrictions that forbid
-distributors to deny you these rights or to ask you to surrender these
-rights. These restrictions translate to certain responsibilities for
-you if you distribute copies of the library or if you modify it.
-
- For example, if you distribute copies of the library, whether gratis
-or for a fee, you must give the recipients all the rights that we gave
-you. You must make sure that they, too, receive or can get the source
-code. If you link other code with the library, you must provide
-complete object files to the recipients, so that they can relink them
-with the library after making changes to the library and recompiling
-it. And you must show them these terms so they know their rights.
-
- We protect your rights with a two-step method: (1) we copyright the
-library, and (2) we offer you this license, which gives you legal
-permission to copy, distribute and/or modify the library.
-
- To protect each distributor, we want to make it very clear that
-there is no warranty for the free library. Also, if the library is
-modified by someone else and passed on, the recipients should know
-that what they have is not the original version, so that the original
-author's reputation will not be affected by problems that might be
-introduced by others.
-
- Finally, software patents pose a constant threat to the existence of
-any free program. We wish to make sure that a company cannot
-effectively restrict the users of a free program by obtaining a
-restrictive license from a patent holder. Therefore, we insist that
-any patent license obtained for a version of the library must be
-consistent with the full freedom of use specified in this license.
-
- Most GNU software, including some libraries, is covered by the
-ordinary GNU General Public License. This license, the GNU Lesser
-General Public License, applies to certain designated libraries, and
-is quite different from the ordinary General Public License. We use
-this license for certain libraries in order to permit linking those
-libraries into non-free programs.
-
- When a program is linked with a library, whether statically or using
-a shared library, the combination of the two is legally speaking a
-combined work, a derivative of the original library. The ordinary
-General Public License therefore permits such linking only if the
-entire combination fits its criteria of freedom. The Lesser General
-Public License permits more lax criteria for linking other code with
-the library.
-
- We call this license the "Lesser" General Public License because it
-does Less to protect the user's freedom than the ordinary General
-Public License. It also provides other free software developers Less
-of an advantage over competing non-free programs. These disadvantages
-are the reason we use the ordinary General Public License for many
-libraries. However, the Lesser license provides advantages in certain
-special circumstances.
-
- For example, on rare occasions, there may be a special need to
-encourage the widest possible use of a certain library, so that it becomes
-a de-facto standard. To achieve this, non-free programs must be
-allowed to use the library. A more frequent case is that a free
-library does the same job as widely used non-free libraries. In this
-case, there is little to gain by limiting the free library to free
-software only, so we use the Lesser General Public License.
-
- In other cases, permission to use a particular library in non-free
-programs enables a greater number of people to use a large body of
-free software. For example, permission to use the GNU C Library in
-non-free programs enables many more people to use the whole GNU
-operating system, as well as its variant, the GNU/Linux operating
-system.
-
- Although the Lesser General Public License is Less protective of the
-users' freedom, it does ensure that the user of a program that is
-linked with the Library has the freedom and the wherewithal to run
-that program using a modified version of the Library.
-
- The precise terms and conditions for copying, distribution and
-modification follow. Pay close attention to the difference between a
-"work based on the library" and a "work that uses the library". The
-former contains code derived from the library, whereas the latter must
-be combined with the library in order to run.
-
- GNU LESSER GENERAL PUBLIC LICENSE
- TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
-
- 0. This License Agreement applies to any software library or other
-program which contains a notice placed by the copyright holder or
-other authorized party saying it may be distributed under the terms of
-this Lesser General Public License (also called "this License").
-Each licensee is addressed as "you".
-
- A "library" means a collection of software functions and/or data
-prepared so as to be conveniently linked with application programs
-(which use some of those functions and data) to form executables.
-
- The "Library", below, refers to any such software library or work
-which has been distributed under these terms. A "work based on the
-Library" means either the Library or any derivative work under
-copyright law: that is to say, a work containing the Library or a
-portion of it, either verbatim or with modifications and/or translated
-straightforwardly into another language. (Hereinafter, translation is
-included without limitation in the term "modification".)
-
- "Source code" for a work means the preferred form of the work for
-making modifications to it. For a library, complete source code means
-all the source code for all modules it contains, plus any associated
-interface definition files, plus the scripts used to control compilation
-and installation of the library.
-
- Activities other than copying, distribution and modification are not
-covered by this License; they are outside its scope. The act of
-running a program using the Library is not restricted, and output from
-such a program is covered only if its contents constitute a work based
-on the Library (independent of the use of the Library in a tool for
-writing it). Whether that is true depends on what the Library does
-and what the program that uses the Library does.
-
- 1. You may copy and distribute verbatim copies of the Library's
-complete source code as you receive it, in any medium, provided that
-you conspicuously and appropriately publish on each copy an
-appropriate copyright notice and disclaimer of warranty; keep intact
-all the notices that refer to this License and to the absence of any
-warranty; and distribute a copy of this License along with the
-Library.
-
- You may charge a fee for the physical act of transferring a copy,
-and you may at your option offer warranty protection in exchange for a
-fee.
-
- 2. You may modify your copy or copies of the Library or any portion
-of it, thus forming a work based on the Library, and copy and
-distribute such modifications or work under the terms of Section 1
-above, provided that you also meet all of these conditions:
-
- a) The modified work must itself be a software library.
-
- b) You must cause the files modified to carry prominent notices
- stating that you changed the files and the date of any change.
-
- c) You must cause the whole of the work to be licensed at no
- charge to all third parties under the terms of this License.
-
- d) If a facility in the modified Library refers to a function or a
- table of data to be supplied by an application program that uses
- the facility, other than as an argument passed when the facility
- is invoked, then you must make a good faith effort to ensure that,
- in the event an application does not supply such function or
- table, the facility still operates, and performs whatever part of
- its purpose remains meaningful.
-
- (For example, a function in a library to compute square roots has
- a purpose that is entirely well-defined independent of the
- application. Therefore, Subsection 2d requires that any
- application-supplied function or table used by this function must
- be optional: if the application does not supply it, the square
- root function must still compute square roots.)
-
-These requirements apply to the modified work as a whole. If
-identifiable sections of that work are not derived from the Library,
-and can be reasonably considered independent and separate works in
-themselves, then this License, and its terms, do not apply to those
-sections when you distribute them as separate works. But when you
-distribute the same sections as part of a whole which is a work based
-on the Library, the distribution of the whole must be on the terms of
-this License, whose permissions for other licensees extend to the
-entire whole, and thus to each and every part regardless of who wrote
-it.
-
-Thus, it is not the intent of this section to claim rights or contest
-your rights to work written entirely by you; rather, the intent is to
-exercise the right to control the distribution of derivative or
-collective works based on the Library.
-
-In addition, mere aggregation of another work not based on the Library
-with the Library (or with a work based on the Library) on a volume of
-a storage or distribution medium does not bring the other work under
-the scope of this License.
-
- 3. You may opt to apply the terms of the ordinary GNU General Public
-License instead of this License to a given copy of the Library. To do
-this, you must alter all the notices that refer to this License, so
-that they refer to the ordinary GNU General Public License, version 2,
-instead of to this License. (If a newer version than version 2 of the
-ordinary GNU General Public License has appeared, then you can specify
-that version instead if you wish.) Do not make any other change in
-these notices.
-
- Once this change is made in a given copy, it is irreversible for
-that copy, so the ordinary GNU General Public License applies to all
-subsequent copies and derivative works made from that copy.
-
- This option is useful when you wish to copy part of the code of
-the Library into a program that is not a library.
-
- 4. You may copy and distribute the Library (or a portion or
-derivative of it, under Section 2) in object code or executable form
-under the terms of Sections 1 and 2 above provided that you accompany
-it with the complete corresponding machine-readable source code, which
-must be distributed under the terms of Sections 1 and 2 above on a
-medium customarily used for software interchange.
-
- If distribution of object code is made by offering access to copy
-from a designated place, then offering equivalent access to copy the
-source code from the same place satisfies the requirement to
-distribute the source code, even though third parties are not
-compelled to copy the source along with the object code.
-
- 5. A program that contains no derivative of any portion of the
-Library, but is designed to work with the Library by being compiled or
-linked with it, is called a "work that uses the Library". Such a
-work, in isolation, is not a derivative work of the Library, and
-therefore falls outside the scope of this License.
-
- However, linking a "work that uses the Library" with the Library
-creates an executable that is a derivative of the Library (because it
-contains portions of the Library), rather than a "work that uses the
-library". The executable is therefore covered by this License.
-Section 6 states terms for distribution of such executables.
-
- When a "work that uses the Library" uses material from a header file
-that is part of the Library, the object code for the work may be a
-derivative work of the Library even though the source code is not.
-Whether this is true is especially significant if the work can be
-linked without the Library, or if the work is itself a library. The
-threshold for this to be true is not precisely defined by law.
-
- If such an object file uses only numerical parameters, data
-structure layouts and accessors, and small macros and small inline
-functions (ten lines or less in length), then the use of the object
-file is unrestricted, regardless of whether it is legally a derivative
-work. (Executables containing this object code plus portions of the
-Library will still fall under Section 6.)
-
- Otherwise, if the work is a derivative of the Library, you may
-distribute the object code for the work under the terms of Section 6.
-Any executables containing that work also fall under Section 6,
-whether or not they are linked directly with the Library itself.
-
- 6. As an exception to the Sections above, you may also combine or
-link a "work that uses the Library" with the Library to produce a
-work containing portions of the Library, and distribute that work
-under terms of your choice, provided that the terms permit
-modification of the work for the customer's own use and reverse
-engineering for debugging such modifications.
-
- You must give prominent notice with each copy of the work that the
-Library is used in it and that the Library and its use are covered by
-this License. You must supply a copy of this License. If the work
-during execution displays copyright notices, you must include the
-copyright notice for the Library among them, as well as a reference
-directing the user to the copy of this License. Also, you must do one
-of these things:
-
- a) Accompany the work with the complete corresponding
- machine-readable source code for the Library including whatever
- changes were used in the work (which must be distributed under
- Sections 1 and 2 above); and, if the work is an executable linked
- with the Library, with the complete machine-readable "work that
- uses the Library", as object code and/or source code, so that the
- user can modify the Library and then relink to produce a modified
- executable containing the modified Library. (It is understood
- that the user who changes the contents of definitions files in the
- Library will not necessarily be able to recompile the application
- to use the modified definitions.)
-
- b) Use a suitable shared library mechanism for linking with the
- Library. A suitable mechanism is one that (1) uses at run time a
- copy of the library already present on the user's computer system,
- rather than copying library functions into the executable, and (2)
- will operate properly with a modified version of the library, if
- the user installs one, as long as the modified version is
- interface-compatible with the version that the work was made with.
-
- c) Accompany the work with a written offer, valid for at
- least three years, to give the same user the materials
- specified in Subsection 6a, above, for a charge no more
- than the cost of performing this distribution.
-
- d) If distribution of the work is made by offering access to copy
- from a designated place, offer equivalent access to copy the above
- specified materials from the same place.
-
- e) Verify that the user has already received a copy of these
- materials or that you have already sent this user a copy.
-
- For an executable, the required form of the "work that uses the
-Library" must include any data and utility programs needed for
-reproducing the executable from it. However, as a special exception,
-the materials to be distributed need not include anything that is
-normally distributed (in either source or binary form) with the major
-components (compiler, kernel, and so on) of the operating system on
-which the executable runs, unless that component itself accompanies
-the executable.
-
- It may happen that this requirement contradicts the license
-restrictions of other proprietary libraries that do not normally
-accompany the operating system. Such a contradiction means you cannot
-use both them and the Library together in an executable that you
-distribute.
-
- 7. You may place library facilities that are a work based on the
-Library side-by-side in a single library together with other library
-facilities not covered by this License, and distribute such a combined
-library, provided that the separate distribution of the work based on
-the Library and of the other library facilities is otherwise
-permitted, and provided that you do these two things:
-
- a) Accompany the combined library with a copy of the same work
- based on the Library, uncombined with any other library
- facilities. This must be distributed under the terms of the
- Sections above.
-
- b) Give prominent notice with the combined library of the fact
- that part of it is a work based on the Library, and explaining
- where to find the accompanying uncombined form of the same work.
-
- 8. You may not copy, modify, sublicense, link with, or distribute
-the Library except as expressly provided under this License. Any
-attempt otherwise to copy, modify, sublicense, link with, or
-distribute the Library is void, and will automatically terminate your
-rights under this License. However, parties who have received copies,
-or rights, from you under this License will not have their licenses
-terminated so long as such parties remain in full compliance.
-
- 9. You are not required to accept this License, since you have not
-signed it. However, nothing else grants you permission to modify or
-distribute the Library or its derivative works. These actions are
-prohibited by law if you do not accept this License. Therefore, by
-modifying or distributing the Library (or any work based on the
-Library), you indicate your acceptance of this License to do so, and
-all its terms and conditions for copying, distributing or modifying
-the Library or works based on it.
-
- 10. Each time you redistribute the Library (or any work based on the
-Library), the recipient automatically receives a license from the
-original licensor to copy, distribute, link with or modify the Library
-subject to these terms and conditions. You may not impose any further
-restrictions on the recipients' exercise of the rights granted herein.
-You are not responsible for enforcing compliance by third parties with
-this License.
-
- 11. If, as a consequence of a court judgment or allegation of patent
-infringement or for any other reason (not limited to patent issues),
-conditions are imposed on you (whether by court order, agreement or
-otherwise) that contradict the conditions of this License, they do not
-excuse you from the conditions of this License. If you cannot
-distribute so as to satisfy simultaneously your obligations under this
-License and any other pertinent obligations, then as a consequence you
-may not distribute the Library at all. For example, if a patent
-license would not permit royalty-free redistribution of the Library by
-all those who receive copies directly or indirectly through you, then
-the only way you could satisfy both it and this License would be to
-refrain entirely from distribution of the Library.
-
-If any portion of this section is held invalid or unenforceable under any
-particular circumstance, the balance of the section is intended to apply,
-and the section as a whole is intended to apply in other circumstances.
-
-It is not the purpose of this section to induce you to infringe any
-patents or other property right claims or to contest validity of any
-such claims; this section has the sole purpose of protecting the
-integrity of the free software distribution system which is
-implemented by public license practices. Many people have made
-generous contributions to the wide range of software distributed
-through that system in reliance on consistent application of that
-system; it is up to the author/donor to decide if he or she is willing
-to distribute software through any other system and a licensee cannot
-impose that choice.
-
-This section is intended to make thoroughly clear what is believed to
-be a consequence of the rest of this License.
-
- 12. If the distribution and/or use of the Library is restricted in
-certain countries either by patents or by copyrighted interfaces, the
-original copyright holder who places the Library under this License may add
-an explicit geographical distribution limitation excluding those countries,
-so that distribution is permitted only in or among countries not thus
-excluded. In such case, this License incorporates the limitation as if
-written in the body of this License.
-
- 13. The Free Software Foundation may publish revised and/or new
-versions of the Lesser General Public License from time to time.
-Such new versions will be similar in spirit to the present version,
-but may differ in detail to address new problems or concerns.
-
-Each version is given a distinguishing version number. If the Library
-specifies a version number of this License which applies to it and
-"any later version", you have the option of following the terms and
-conditions either of that version or of any later version published by
-the Free Software Foundation. If the Library does not specify a
-license version number, you may choose any version ever published by
-the Free Software Foundation.
-
- 14. If you wish to incorporate parts of the Library into other free
-programs whose distribution conditions are incompatible with these,
-write to the author to ask for permission. For software which is
-copyrighted by the Free Software Foundation, write to the Free
-Software Foundation; we sometimes make exceptions for this. Our
-decision will be guided by the two goals of preserving the free status
-of all derivatives of our free software and of promoting the sharing
-and reuse of software generally.
-
- NO WARRANTY
-
- 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO
-WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW.
-EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR
-OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY
-KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE
-LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME
-THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
-
- 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN
-WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY
-AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU
-FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR
-CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE
-LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING
-RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A
-FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF
-SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
-DAMAGES.
-
- END OF TERMS AND CONDITIONS
-
- How to Apply These Terms to Your New Libraries
-
- If you develop a new library, and you want it to be of the greatest
-possible use to the public, we recommend making it free software that
-everyone can redistribute and change. You can do so by permitting
-redistribution under these terms (or, alternatively, under the terms of the
-ordinary General Public License).
-
- To apply these terms, attach the following notices to the library. It is
-safest to attach them to the start of each source file to most effectively
-convey the exclusion of warranty; and each file should have at least the
-"copyright" line and a pointer to where the full notice is found.
-
- {description}
- Copyright (C) {year} {fullname}
-
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with this library; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301
- USA
-
-Also add information on how to contact you by electronic and paper mail.
-
-You should also get your employer (if you work as a programmer) or your
-school, if any, to sign a "copyright disclaimer" for the library, if
-necessary. Here is a sample; alter the names:
-
- Yoyodyne, Inc., hereby disclaims all copyright interest in the
- library `Frob' (a library for tweaking knobs) written by James Random
- Hacker.
-
- {signature of Ty Coon}, 1 April 1990
- Ty Coon, President of Vice
-
-That's all there is to it!
diff --git a/README.md b/README.md
index f4a6f8352..85c3269e7 100644
--- a/README.md
+++ b/README.md
@@ -1,116 +1,69 @@
-# Welcome to DataJoint for Python!
+# DataJoint for Python
+
+DataJoint is a framework for scientific data pipelines that introduces the **Relational Workflow Model**—a paradigm where your database schema is an executable specification of your workflow.
+
+Traditional databases store data but don't understand how it was computed. DataJoint extends relational databases with native workflow semantics:
+
+- **Tables represent workflow steps** — Each table is a step in your pipeline where entities are created
+- **Foreign keys encode dependencies** — Parent tables must be populated before child tables
+- **Computations are declarative** — Define *what* to compute; DataJoint determines *when* and tracks *what's done*
+- **Results are immutable** — Computed results preserve full provenance and reproducibility
+
+### Object-Augmented Schemas
+
+Scientific data includes both structured metadata and large data objects (time series, images, movies, neural recordings, gene sequences). DataJoint solves this with **Object-Augmented Schemas (OAS)**—a unified architecture where relational tables and object storage are managed as one system with identical guarantees for integrity, transactions, and lifecycle.
+
+### DataJoint 2.0
+
+**DataJoint 2.0** solidifies these core concepts with a modernized API, improved type system, and enhanced object storage integration. Existing users can refer to the [Migration Guide](https://docs.datajoint.com/migration/) for upgrading from earlier versions.
+
+**Documentation:** https://docs.datajoint.com
-DataJoint for Python is a framework for scientific workflow management based on
-relational principles. DataJoint is built on the foundation of the relational data
-model and prescribes a consistent method for organizing, populating, computing, and
-querying data.
-
-DataJoint was initially developed in 2009 by Dimitri Yatsenko in Andreas Tolias' Lab at
-Baylor College of Medicine for the distributed processing and management of large
-volumes of data streaming from regular experiments. Starting in 2011, DataJoint has
-been available as an open-source project adopted by other labs and improved through
-contributions from several developers.
-Presently, the primary developer of DataJoint open-source software is the company
-DataJoint ().
-
## Data Pipeline Example

diff --git a/docs/mkdocs.yaml b/docs/mkdocs.yaml
index 03c10f69b..db2ea16f9 100644
--- a/docs/mkdocs.yaml
+++ b/docs/mkdocs.yaml
@@ -1,82 +1,16 @@
# ---------------------- PROJECT SPECIFIC ---------------------------
-site_name: DataJoint Documentation
+site_name: DataJoint Python - Developer Documentation
+site_description: Developer documentation for DataJoint Python contributors
repo_url: https://github.com/datajoint/datajoint-python
repo_name: datajoint/datajoint-python
nav:
- - DataJoint Python: index.md
- - Quick Start Guide: quick-start.md
- - Concepts:
- - Principles: concepts/principles.md
- - Data Model: concepts/data-model.md
- - Data Pipelines: concepts/data-pipelines.md
- - Teamwork: concepts/teamwork.md
- - Terminology: concepts/terminology.md
- - System Administration:
- - Database Administration: sysadmin/database-admin.md
- - Bulk Storage Systems: sysadmin/bulk-storage.md
- - External Store: sysadmin/external-store.md
- - Client Configuration:
- - Install: client/install.md
- - Credentials: client/credentials.md
- - Settings: client/settings.md
- - File Stores: client/stores.md
- - Schema Design:
- - Schema Creation: design/schema.md
- - Table Definition:
- - Table Tiers: design/tables/tiers.md
- - Declaration Syntax: design/tables/declare.md
- - Primary Key: design/tables/primary.md
- - Attributes: design/tables/attributes.md
- - Lookup Tables: design/tables/lookup.md
- - Manual Tables: design/tables/manual.md
- - Blobs: design/tables/blobs.md
- - Attachments: design/tables/attach.md
- - Filepaths: design/tables/filepath.md
- - Custom Codecs: design/tables/codecs.md
- - Dependencies: design/tables/dependencies.md
- - Indexes: design/tables/indexes.md
- - Master-Part Relationships: design/tables/master-part.md
- - Schema Diagrams: design/diagrams.md
- - Entity Normalization: design/normalization.md
- - Data Integrity: design/integrity.md
- - Schema Recall: design/recall.md
- - Schema Drop: design/drop.md
- - Schema Modification: design/alter.md
- - Data Manipulations:
- - manipulation/index.md
- - Insert: manipulation/insert.md
- - Delete: manipulation/delete.md
- - Update: manipulation/update.md
- - Transactions: manipulation/transactions.md
- - Data Queries:
- - Principles: query/principles.md
- - Example Schema: query/example-schema.md
- - Fetch: query/fetch.md
- - Iteration: query/iteration.md
- - Operators: query/operators.md
- - Restrict: query/restrict.md
- - Projection: query/project.md
- - Join: query/join.md
- - Aggregation: query/aggregation.md
- - Union: query/union.md
- - Universal Sets: query/universals.md
- - Query Caching: query/query-caching.md
- - Computations:
- - Make Method: compute/make.md
- - Populate: compute/populate.md
- - Key Source: compute/key-source.md
- - Distributed Computing: compute/distributed.md
- - Publish Data: publish-data.md
- - Internals:
- - SQL Transpilation: internal/transpilation.md
- - Tutorials:
- - JSON Datatype: tutorials/json.ipynb
- - FAQ: faq.md
- - Developer Guide: develop.md
- - Citation: citation.md
- - Changelog: changelog.md
- - API: api/ # defer to gen-files + literate-nav
+ - Home: index.md
+ - Contributing: develop.md
+ - Architecture:
+ - architecture/index.md
+ - SQL Transpilation: architecture/transpilation.md
+ - API Reference: api/ # defer to gen-files + literate-nav
# ---------------------------- STANDARD -----------------------------
@@ -93,7 +27,7 @@ theme:
favicon: assets/images/company-logo-blue.png
features:
- toc.integrate
- - content.code.annotate # Add codeblock annotations
+ - content.code.annotate
palette:
- media: "(prefers-color-scheme: light)"
scheme: datajoint
@@ -113,26 +47,18 @@ plugins:
handlers:
python:
paths:
- - "."
- - /main/
+ - "../src"
options:
- filters:
- - "!^_"
- docstring_style: sphinx # Replaces google default pending docstring updates
+ docstring_style: numpy
members_order: source
group_by_category: false
line_length: 88
+ show_source: false
- gen-files:
scripts:
- ./src/api/make_pages.py
- literate-nav:
nav_file: navigation.md
- - exclude-search:
- exclude:
- - "*/navigation.md"
- - "*/archive/*md"
- - mkdocs-jupyter:
- include: ["*.ipynb"]
- section-index
markdown_extensions:
- attr_list
@@ -154,41 +80,23 @@ markdown_extensions:
- name: mermaid
class: mermaid
format: !!python/name:pymdownx.superfences.fence_code_format
- - pymdownx.magiclink # Displays bare URLs as links
- - pymdownx.tasklist: # Renders check boxes in tasks lists
+ - pymdownx.magiclink
+ - pymdownx.tasklist:
custom_checkbox: true
- md_in_html
extra:
- generator: false # Disable watermark
+ generator: false
version:
provider: mike
social:
- icon: main/company-logo
link: https://www.datajoint.com
name: DataJoint
- - icon: fontawesome/brands/slack
- link: https://datajoint.slack.com
- name: Slack
- - icon: fontawesome/brands/linkedin
- link: https://www.linkedin.com/company/datajoint
- name: LinkedIn
- - icon: fontawesome/brands/twitter
- link: https://twitter.com/datajoint
- name: Twitter
- icon: fontawesome/brands/github
link: https://github.com/datajoint
name: GitHub
- - icon: fontawesome/brands/docker
- link: https://hub.docker.com/u/datajoint
- name: DockerHub
- - icon: fontawesome/brands/python
- link: https://pypi.org/user/datajointbot
- name: PyPI
- - icon: fontawesome/brands/stack-overflow
- link: https://stackoverflow.com/questions/tagged/datajoint
- name: StackOverflow
- - icon: fontawesome/brands/youtube
- link: https://www.youtube.com/channel/UCdeCuFOTCXlVMRzh6Wk-lGg
- name: YouTube
+ - icon: fontawesome/brands/slack
+ link: https://datajoint.slack.com
+ name: Slack
extra_css:
- assets/stylesheets/extra.css
diff --git a/docs/src/architecture/index.md b/docs/src/architecture/index.md
new file mode 100644
index 000000000..953fd7962
--- /dev/null
+++ b/docs/src/architecture/index.md
@@ -0,0 +1,34 @@
+# Architecture
+
+Internal design documentation for DataJoint developers.
+
+## Query System
+
+- [SQL Transpilation](transpilation.md) — How DataJoint translates query expressions to SQL
+
+## Design Principles
+
+DataJoint's architecture follows several key principles:
+
+1. **Immutable Query Expressions** — Query expressions are immutable; operators create new objects
+2. **Lazy Evaluation** — Queries are not executed until data is fetched
+3. **Query Optimization** — Unnecessary attributes are projected out before execution
+4. **Semantic Matching** — Joins use lineage-based attribute matching
+
+## Module Overview
+
+| Module | Purpose |
+|--------|---------|
+| `expression.py` | QueryExpression base class and operators |
+| `table.py` | Table class with data manipulation |
+| `fetch.py` | Data retrieval implementation |
+| `declare.py` | Table definition parsing |
+| `heading.py` | Attribute and heading management |
+| `blob.py` | Blob serialization |
+| `codecs.py` | Type codec system |
+| `connection.py` | Database connection management |
+| `schemas.py` | Schema binding and activation |
+
+## Contributing
+
+See the [Contributing Guide](../develop.md) for development setup instructions.
diff --git a/docs/src/internal/transpilation.md b/docs/src/architecture/transpilation.md
similarity index 100%
rename from docs/src/internal/transpilation.md
rename to docs/src/architecture/transpilation.md
diff --git a/docs/src/citation.md b/docs/src/archive/citation.md
similarity index 100%
rename from docs/src/citation.md
rename to docs/src/archive/citation.md
diff --git a/docs/src/client/credentials.md b/docs/src/archive/client/credentials.md
similarity index 100%
rename from docs/src/client/credentials.md
rename to docs/src/archive/client/credentials.md
diff --git a/docs/src/client/install.md b/docs/src/archive/client/install.md
similarity index 100%
rename from docs/src/client/install.md
rename to docs/src/archive/client/install.md
diff --git a/docs/src/client/settings.md b/docs/src/archive/client/settings.md
similarity index 100%
rename from docs/src/client/settings.md
rename to docs/src/archive/client/settings.md
diff --git a/docs/src/compute/autopopulate2.0-spec.md b/docs/src/archive/compute/autopopulate2.0-spec.md
similarity index 100%
rename from docs/src/compute/autopopulate2.0-spec.md
rename to docs/src/archive/compute/autopopulate2.0-spec.md
diff --git a/docs/src/compute/distributed.md b/docs/src/archive/compute/distributed.md
similarity index 100%
rename from docs/src/compute/distributed.md
rename to docs/src/archive/compute/distributed.md
diff --git a/docs/src/compute/key-source.md b/docs/src/archive/compute/key-source.md
similarity index 100%
rename from docs/src/compute/key-source.md
rename to docs/src/archive/compute/key-source.md
diff --git a/docs/src/compute/make.md b/docs/src/archive/compute/make.md
similarity index 100%
rename from docs/src/compute/make.md
rename to docs/src/archive/compute/make.md
diff --git a/docs/src/compute/populate.md b/docs/src/archive/compute/populate.md
similarity index 100%
rename from docs/src/compute/populate.md
rename to docs/src/archive/compute/populate.md
diff --git a/docs/src/concepts/data-model.md b/docs/src/archive/concepts/data-model.md
similarity index 100%
rename from docs/src/concepts/data-model.md
rename to docs/src/archive/concepts/data-model.md
diff --git a/docs/src/concepts/data-pipelines.md b/docs/src/archive/concepts/data-pipelines.md
similarity index 100%
rename from docs/src/concepts/data-pipelines.md
rename to docs/src/archive/concepts/data-pipelines.md
diff --git a/docs/src/concepts/principles.md b/docs/src/archive/concepts/principles.md
similarity index 100%
rename from docs/src/concepts/principles.md
rename to docs/src/archive/concepts/principles.md
diff --git a/docs/src/concepts/teamwork.md b/docs/src/archive/concepts/teamwork.md
similarity index 100%
rename from docs/src/concepts/teamwork.md
rename to docs/src/archive/concepts/teamwork.md
diff --git a/docs/src/concepts/terminology.md b/docs/src/archive/concepts/terminology.md
similarity index 100%
rename from docs/src/concepts/terminology.md
rename to docs/src/archive/concepts/terminology.md
diff --git a/docs/src/design/alter.md b/docs/src/archive/design/alter.md
similarity index 100%
rename from docs/src/design/alter.md
rename to docs/src/archive/design/alter.md
diff --git a/docs/src/design/diagrams.md b/docs/src/archive/design/diagrams.md
similarity index 100%
rename from docs/src/design/diagrams.md
rename to docs/src/archive/design/diagrams.md
diff --git a/docs/src/design/drop.md b/docs/src/archive/design/drop.md
similarity index 100%
rename from docs/src/design/drop.md
rename to docs/src/archive/design/drop.md
diff --git a/docs/src/design/fetch-api-2.0-spec.md b/docs/src/archive/design/fetch-api-2.0-spec.md
similarity index 100%
rename from docs/src/design/fetch-api-2.0-spec.md
rename to docs/src/archive/design/fetch-api-2.0-spec.md
diff --git a/docs/src/design/hidden-job-metadata-spec.md b/docs/src/archive/design/hidden-job-metadata-spec.md
similarity index 100%
rename from docs/src/design/hidden-job-metadata-spec.md
rename to docs/src/archive/design/hidden-job-metadata-spec.md
diff --git a/docs/src/design/integrity.md b/docs/src/archive/design/integrity.md
similarity index 100%
rename from docs/src/design/integrity.md
rename to docs/src/archive/design/integrity.md
diff --git a/docs/src/design/normalization.md b/docs/src/archive/design/normalization.md
similarity index 100%
rename from docs/src/design/normalization.md
rename to docs/src/archive/design/normalization.md
diff --git a/docs/src/design/pk-rules-spec.md b/docs/src/archive/design/pk-rules-spec.md
similarity index 100%
rename from docs/src/design/pk-rules-spec.md
rename to docs/src/archive/design/pk-rules-spec.md
diff --git a/docs/src/design/recall.md b/docs/src/archive/design/recall.md
similarity index 100%
rename from docs/src/design/recall.md
rename to docs/src/archive/design/recall.md
diff --git a/docs/src/design/schema.md b/docs/src/archive/design/schema.md
similarity index 100%
rename from docs/src/design/schema.md
rename to docs/src/archive/design/schema.md
diff --git a/docs/src/design/semantic-matching-spec.md b/docs/src/archive/design/semantic-matching-spec.md
similarity index 100%
rename from docs/src/design/semantic-matching-spec.md
rename to docs/src/archive/design/semantic-matching-spec.md
diff --git a/docs/src/design/tables/attach.md b/docs/src/archive/design/tables/attach.md
similarity index 100%
rename from docs/src/design/tables/attach.md
rename to docs/src/archive/design/tables/attach.md
diff --git a/docs/src/design/tables/attributes.md b/docs/src/archive/design/tables/attributes.md
similarity index 100%
rename from docs/src/design/tables/attributes.md
rename to docs/src/archive/design/tables/attributes.md
diff --git a/docs/src/design/tables/blobs.md b/docs/src/archive/design/tables/blobs.md
similarity index 100%
rename from docs/src/design/tables/blobs.md
rename to docs/src/archive/design/tables/blobs.md
diff --git a/docs/src/design/tables/codec-spec.md b/docs/src/archive/design/tables/codec-spec.md
similarity index 100%
rename from docs/src/design/tables/codec-spec.md
rename to docs/src/archive/design/tables/codec-spec.md
diff --git a/docs/src/design/tables/codecs.md b/docs/src/archive/design/tables/codecs.md
similarity index 100%
rename from docs/src/design/tables/codecs.md
rename to docs/src/archive/design/tables/codecs.md
diff --git a/docs/src/design/tables/declare.md b/docs/src/archive/design/tables/declare.md
similarity index 100%
rename from docs/src/design/tables/declare.md
rename to docs/src/archive/design/tables/declare.md
diff --git a/docs/src/design/tables/dependencies.md b/docs/src/archive/design/tables/dependencies.md
similarity index 100%
rename from docs/src/design/tables/dependencies.md
rename to docs/src/archive/design/tables/dependencies.md
diff --git a/docs/src/design/tables/filepath.md b/docs/src/archive/design/tables/filepath.md
similarity index 100%
rename from docs/src/design/tables/filepath.md
rename to docs/src/archive/design/tables/filepath.md
diff --git a/docs/src/design/tables/indexes.md b/docs/src/archive/design/tables/indexes.md
similarity index 100%
rename from docs/src/design/tables/indexes.md
rename to docs/src/archive/design/tables/indexes.md
diff --git a/docs/src/design/tables/lookup.md b/docs/src/archive/design/tables/lookup.md
similarity index 100%
rename from docs/src/design/tables/lookup.md
rename to docs/src/archive/design/tables/lookup.md
diff --git a/docs/src/design/tables/manual.md b/docs/src/archive/design/tables/manual.md
similarity index 100%
rename from docs/src/design/tables/manual.md
rename to docs/src/archive/design/tables/manual.md
diff --git a/docs/src/design/tables/master-part.md b/docs/src/archive/design/tables/master-part.md
similarity index 100%
rename from docs/src/design/tables/master-part.md
rename to docs/src/archive/design/tables/master-part.md
diff --git a/docs/src/design/tables/object.md b/docs/src/archive/design/tables/object.md
similarity index 100%
rename from docs/src/design/tables/object.md
rename to docs/src/archive/design/tables/object.md
diff --git a/docs/src/design/tables/primary.md b/docs/src/archive/design/tables/primary.md
similarity index 100%
rename from docs/src/design/tables/primary.md
rename to docs/src/archive/design/tables/primary.md
diff --git a/docs/src/design/tables/storage-types-spec.md b/docs/src/archive/design/tables/storage-types-spec.md
similarity index 100%
rename from docs/src/design/tables/storage-types-spec.md
rename to docs/src/archive/design/tables/storage-types-spec.md
diff --git a/docs/src/design/tables/tiers.md b/docs/src/archive/design/tables/tiers.md
similarity index 100%
rename from docs/src/design/tables/tiers.md
rename to docs/src/archive/design/tables/tiers.md
diff --git a/docs/src/faq.md b/docs/src/archive/faq.md
similarity index 100%
rename from docs/src/faq.md
rename to docs/src/archive/faq.md
diff --git a/docs/src/images/StudentTable.png b/docs/src/archive/images/StudentTable.png
similarity index 100%
rename from docs/src/images/StudentTable.png
rename to docs/src/archive/images/StudentTable.png
diff --git a/docs/src/images/added-example-ERD.svg b/docs/src/archive/images/added-example-ERD.svg
similarity index 100%
rename from docs/src/images/added-example-ERD.svg
rename to docs/src/archive/images/added-example-ERD.svg
diff --git a/docs/src/images/data-engineering.png b/docs/src/archive/images/data-engineering.png
similarity index 100%
rename from docs/src/images/data-engineering.png
rename to docs/src/archive/images/data-engineering.png
diff --git a/docs/src/images/data-science-after.png b/docs/src/archive/images/data-science-after.png
similarity index 100%
rename from docs/src/images/data-science-after.png
rename to docs/src/archive/images/data-science-after.png
diff --git a/docs/src/images/data-science-before.png b/docs/src/archive/images/data-science-before.png
similarity index 100%
rename from docs/src/images/data-science-before.png
rename to docs/src/archive/images/data-science-before.png
diff --git a/docs/src/images/diff-example1.png b/docs/src/archive/images/diff-example1.png
similarity index 100%
rename from docs/src/images/diff-example1.png
rename to docs/src/archive/images/diff-example1.png
diff --git a/docs/src/images/diff-example2.png b/docs/src/archive/images/diff-example2.png
similarity index 100%
rename from docs/src/images/diff-example2.png
rename to docs/src/archive/images/diff-example2.png
diff --git a/docs/src/images/diff-example3.png b/docs/src/archive/images/diff-example3.png
similarity index 100%
rename from docs/src/images/diff-example3.png
rename to docs/src/archive/images/diff-example3.png
diff --git a/docs/src/images/dimitri-ERD.svg b/docs/src/archive/images/dimitri-ERD.svg
similarity index 100%
rename from docs/src/images/dimitri-ERD.svg
rename to docs/src/archive/images/dimitri-ERD.svg
diff --git a/docs/src/images/doc_1-1.png b/docs/src/archive/images/doc_1-1.png
similarity index 100%
rename from docs/src/images/doc_1-1.png
rename to docs/src/archive/images/doc_1-1.png
diff --git a/docs/src/images/doc_1-many.png b/docs/src/archive/images/doc_1-many.png
similarity index 100%
rename from docs/src/images/doc_1-many.png
rename to docs/src/archive/images/doc_1-many.png
diff --git a/docs/src/images/doc_many-1.png b/docs/src/archive/images/doc_many-1.png
similarity index 100%
rename from docs/src/images/doc_many-1.png
rename to docs/src/archive/images/doc_many-1.png
diff --git a/docs/src/images/doc_many-many.png b/docs/src/archive/images/doc_many-many.png
similarity index 100%
rename from docs/src/images/doc_many-many.png
rename to docs/src/archive/images/doc_many-many.png
diff --git a/docs/src/images/how-it-works.png b/docs/src/archive/images/how-it-works.png
similarity index 100%
rename from docs/src/images/how-it-works.png
rename to docs/src/archive/images/how-it-works.png
diff --git a/docs/src/images/install-cmd-prompt.png b/docs/src/archive/images/install-cmd-prompt.png
similarity index 100%
rename from docs/src/images/install-cmd-prompt.png
rename to docs/src/archive/images/install-cmd-prompt.png
diff --git a/docs/src/images/install-datajoint-1.png b/docs/src/archive/images/install-datajoint-1.png
similarity index 100%
rename from docs/src/images/install-datajoint-1.png
rename to docs/src/archive/images/install-datajoint-1.png
diff --git a/docs/src/images/install-datajoint-2.png b/docs/src/archive/images/install-datajoint-2.png
similarity index 100%
rename from docs/src/images/install-datajoint-2.png
rename to docs/src/archive/images/install-datajoint-2.png
diff --git a/docs/src/images/install-git-1.png b/docs/src/archive/images/install-git-1.png
similarity index 100%
rename from docs/src/images/install-git-1.png
rename to docs/src/archive/images/install-git-1.png
diff --git a/docs/src/images/install-graphviz-1.png b/docs/src/archive/images/install-graphviz-1.png
similarity index 100%
rename from docs/src/images/install-graphviz-1.png
rename to docs/src/archive/images/install-graphviz-1.png
diff --git a/docs/src/images/install-graphviz-2a.png b/docs/src/archive/images/install-graphviz-2a.png
similarity index 100%
rename from docs/src/images/install-graphviz-2a.png
rename to docs/src/archive/images/install-graphviz-2a.png
diff --git a/docs/src/images/install-graphviz-2b.png b/docs/src/archive/images/install-graphviz-2b.png
similarity index 100%
rename from docs/src/images/install-graphviz-2b.png
rename to docs/src/archive/images/install-graphviz-2b.png
diff --git a/docs/src/images/install-jupyter-1.png b/docs/src/archive/images/install-jupyter-1.png
similarity index 100%
rename from docs/src/images/install-jupyter-1.png
rename to docs/src/archive/images/install-jupyter-1.png
diff --git a/docs/src/images/install-jupyter-2.png b/docs/src/archive/images/install-jupyter-2.png
similarity index 100%
rename from docs/src/images/install-jupyter-2.png
rename to docs/src/archive/images/install-jupyter-2.png
diff --git a/docs/src/images/install-matplotlib.png b/docs/src/archive/images/install-matplotlib.png
similarity index 100%
rename from docs/src/images/install-matplotlib.png
rename to docs/src/archive/images/install-matplotlib.png
diff --git a/docs/src/images/install-pydotplus.png b/docs/src/archive/images/install-pydotplus.png
similarity index 100%
rename from docs/src/images/install-pydotplus.png
rename to docs/src/archive/images/install-pydotplus.png
diff --git a/docs/src/images/install-python-advanced-1.png b/docs/src/archive/images/install-python-advanced-1.png
similarity index 100%
rename from docs/src/images/install-python-advanced-1.png
rename to docs/src/archive/images/install-python-advanced-1.png
diff --git a/docs/src/images/install-python-advanced-2.png b/docs/src/archive/images/install-python-advanced-2.png
similarity index 100%
rename from docs/src/images/install-python-advanced-2.png
rename to docs/src/archive/images/install-python-advanced-2.png
diff --git a/docs/src/images/install-python-simple.png b/docs/src/archive/images/install-python-simple.png
similarity index 100%
rename from docs/src/images/install-python-simple.png
rename to docs/src/archive/images/install-python-simple.png
diff --git a/docs/src/images/install-run-jupyter-1.png b/docs/src/archive/images/install-run-jupyter-1.png
similarity index 100%
rename from docs/src/images/install-run-jupyter-1.png
rename to docs/src/archive/images/install-run-jupyter-1.png
diff --git a/docs/src/images/install-run-jupyter-2.png b/docs/src/archive/images/install-run-jupyter-2.png
similarity index 100%
rename from docs/src/images/install-run-jupyter-2.png
rename to docs/src/archive/images/install-run-jupyter-2.png
diff --git a/docs/src/images/install-verify-graphviz.png b/docs/src/archive/images/install-verify-graphviz.png
similarity index 100%
rename from docs/src/images/install-verify-graphviz.png
rename to docs/src/archive/images/install-verify-graphviz.png
diff --git a/docs/src/images/install-verify-jupyter.png b/docs/src/archive/images/install-verify-jupyter.png
similarity index 100%
rename from docs/src/images/install-verify-jupyter.png
rename to docs/src/archive/images/install-verify-jupyter.png
diff --git a/docs/src/images/install-verify-python.png b/docs/src/archive/images/install-verify-python.png
similarity index 100%
rename from docs/src/images/install-verify-python.png
rename to docs/src/archive/images/install-verify-python.png
diff --git a/docs/src/images/join-example1.png b/docs/src/archive/images/join-example1.png
similarity index 100%
rename from docs/src/images/join-example1.png
rename to docs/src/archive/images/join-example1.png
diff --git a/docs/src/images/join-example2.png b/docs/src/archive/images/join-example2.png
similarity index 100%
rename from docs/src/images/join-example2.png
rename to docs/src/archive/images/join-example2.png
diff --git a/docs/src/images/join-example3.png b/docs/src/archive/images/join-example3.png
similarity index 100%
rename from docs/src/images/join-example3.png
rename to docs/src/archive/images/join-example3.png
diff --git a/docs/src/images/key_source_combination.png b/docs/src/archive/images/key_source_combination.png
similarity index 100%
rename from docs/src/images/key_source_combination.png
rename to docs/src/archive/images/key_source_combination.png
diff --git a/docs/src/images/map-dataflow.png b/docs/src/archive/images/map-dataflow.png
similarity index 100%
rename from docs/src/images/map-dataflow.png
rename to docs/src/archive/images/map-dataflow.png
diff --git a/docs/src/images/matched_tuples1.png b/docs/src/archive/images/matched_tuples1.png
similarity index 100%
rename from docs/src/images/matched_tuples1.png
rename to docs/src/archive/images/matched_tuples1.png
diff --git a/docs/src/images/matched_tuples2.png b/docs/src/archive/images/matched_tuples2.png
similarity index 100%
rename from docs/src/images/matched_tuples2.png
rename to docs/src/archive/images/matched_tuples2.png
diff --git a/docs/src/images/matched_tuples3.png b/docs/src/archive/images/matched_tuples3.png
similarity index 100%
rename from docs/src/images/matched_tuples3.png
rename to docs/src/archive/images/matched_tuples3.png
diff --git a/docs/src/images/mp-diagram.png b/docs/src/archive/images/mp-diagram.png
similarity index 100%
rename from docs/src/images/mp-diagram.png
rename to docs/src/archive/images/mp-diagram.png
diff --git a/docs/src/images/op-restrict.png b/docs/src/archive/images/op-restrict.png
similarity index 100%
rename from docs/src/images/op-restrict.png
rename to docs/src/archive/images/op-restrict.png
diff --git a/docs/src/images/outer-example1.png b/docs/src/archive/images/outer-example1.png
similarity index 100%
rename from docs/src/images/outer-example1.png
rename to docs/src/archive/images/outer-example1.png
diff --git a/docs/src/images/pipeline-database.png b/docs/src/archive/images/pipeline-database.png
similarity index 100%
rename from docs/src/images/pipeline-database.png
rename to docs/src/archive/images/pipeline-database.png
diff --git a/docs/src/images/pipeline.png b/docs/src/archive/images/pipeline.png
similarity index 100%
rename from docs/src/images/pipeline.png
rename to docs/src/archive/images/pipeline.png
diff --git a/docs/src/images/python_collection.png b/docs/src/archive/images/python_collection.png
similarity index 100%
rename from docs/src/images/python_collection.png
rename to docs/src/archive/images/python_collection.png
diff --git a/docs/src/images/queries_example_diagram.png b/docs/src/archive/images/queries_example_diagram.png
similarity index 100%
rename from docs/src/images/queries_example_diagram.png
rename to docs/src/archive/images/queries_example_diagram.png
diff --git a/docs/src/images/query_object_preview.png b/docs/src/archive/images/query_object_preview.png
similarity index 100%
rename from docs/src/images/query_object_preview.png
rename to docs/src/archive/images/query_object_preview.png
diff --git a/docs/src/images/restrict-example1.png b/docs/src/archive/images/restrict-example1.png
similarity index 100%
rename from docs/src/images/restrict-example1.png
rename to docs/src/archive/images/restrict-example1.png
diff --git a/docs/src/images/restrict-example2.png b/docs/src/archive/images/restrict-example2.png
similarity index 100%
rename from docs/src/images/restrict-example2.png
rename to docs/src/archive/images/restrict-example2.png
diff --git a/docs/src/images/restrict-example3.png b/docs/src/archive/images/restrict-example3.png
similarity index 100%
rename from docs/src/images/restrict-example3.png
rename to docs/src/archive/images/restrict-example3.png
diff --git a/docs/src/images/shapes_pipeline.svg b/docs/src/archive/images/shapes_pipeline.svg
similarity index 100%
rename from docs/src/images/shapes_pipeline.svg
rename to docs/src/archive/images/shapes_pipeline.svg
diff --git a/docs/src/images/spawned-classes-ERD.svg b/docs/src/archive/images/spawned-classes-ERD.svg
similarity index 100%
rename from docs/src/images/spawned-classes-ERD.svg
rename to docs/src/archive/images/spawned-classes-ERD.svg
diff --git a/docs/src/images/union-example1.png b/docs/src/archive/images/union-example1.png
similarity index 100%
rename from docs/src/images/union-example1.png
rename to docs/src/archive/images/union-example1.png
diff --git a/docs/src/images/union-example2.png b/docs/src/archive/images/union-example2.png
similarity index 100%
rename from docs/src/images/union-example2.png
rename to docs/src/archive/images/union-example2.png
diff --git a/docs/src/images/virtual-module-ERD.svg b/docs/src/archive/images/virtual-module-ERD.svg
similarity index 100%
rename from docs/src/images/virtual-module-ERD.svg
rename to docs/src/archive/images/virtual-module-ERD.svg
diff --git a/docs/src/manipulation/delete.md b/docs/src/archive/manipulation/delete.md
similarity index 100%
rename from docs/src/manipulation/delete.md
rename to docs/src/archive/manipulation/delete.md
diff --git a/docs/src/manipulation/index.md b/docs/src/archive/manipulation/index.md
similarity index 100%
rename from docs/src/manipulation/index.md
rename to docs/src/archive/manipulation/index.md
diff --git a/docs/src/manipulation/insert.md b/docs/src/archive/manipulation/insert.md
similarity index 100%
rename from docs/src/manipulation/insert.md
rename to docs/src/archive/manipulation/insert.md
diff --git a/docs/src/manipulation/transactions.md b/docs/src/archive/manipulation/transactions.md
similarity index 100%
rename from docs/src/manipulation/transactions.md
rename to docs/src/archive/manipulation/transactions.md
diff --git a/docs/src/manipulation/update.md b/docs/src/archive/manipulation/update.md
similarity index 100%
rename from docs/src/manipulation/update.md
rename to docs/src/archive/manipulation/update.md
diff --git a/docs/src/publish-data.md b/docs/src/archive/publish-data.md
similarity index 100%
rename from docs/src/publish-data.md
rename to docs/src/archive/publish-data.md
diff --git a/docs/src/query/aggregation.md b/docs/src/archive/query/aggregation.md
similarity index 100%
rename from docs/src/query/aggregation.md
rename to docs/src/archive/query/aggregation.md
diff --git a/docs/src/query/example-schema.md b/docs/src/archive/query/example-schema.md
similarity index 100%
rename from docs/src/query/example-schema.md
rename to docs/src/archive/query/example-schema.md
diff --git a/docs/src/query/fetch.md b/docs/src/archive/query/fetch.md
similarity index 100%
rename from docs/src/query/fetch.md
rename to docs/src/archive/query/fetch.md
diff --git a/docs/src/query/iteration.md b/docs/src/archive/query/iteration.md
similarity index 100%
rename from docs/src/query/iteration.md
rename to docs/src/archive/query/iteration.md
diff --git a/docs/src/query/join.md b/docs/src/archive/query/join.md
similarity index 100%
rename from docs/src/query/join.md
rename to docs/src/archive/query/join.md
diff --git a/docs/src/query/operators.md b/docs/src/archive/query/operators.md
similarity index 100%
rename from docs/src/query/operators.md
rename to docs/src/archive/query/operators.md
diff --git a/docs/src/query/principles.md b/docs/src/archive/query/principles.md
similarity index 100%
rename from docs/src/query/principles.md
rename to docs/src/archive/query/principles.md
diff --git a/docs/src/query/project.md b/docs/src/archive/query/project.md
similarity index 100%
rename from docs/src/query/project.md
rename to docs/src/archive/query/project.md
diff --git a/docs/src/query/query-caching.md b/docs/src/archive/query/query-caching.md
similarity index 100%
rename from docs/src/query/query-caching.md
rename to docs/src/archive/query/query-caching.md
diff --git a/docs/src/query/restrict.md b/docs/src/archive/query/restrict.md
similarity index 100%
rename from docs/src/query/restrict.md
rename to docs/src/archive/query/restrict.md
diff --git a/docs/src/query/union.md b/docs/src/archive/query/union.md
similarity index 100%
rename from docs/src/query/union.md
rename to docs/src/archive/query/union.md
diff --git a/docs/src/query/universals.md b/docs/src/archive/query/universals.md
similarity index 100%
rename from docs/src/query/universals.md
rename to docs/src/archive/query/universals.md
diff --git a/docs/src/quick-start.md b/docs/src/archive/quick-start.md
similarity index 100%
rename from docs/src/quick-start.md
rename to docs/src/archive/quick-start.md
diff --git a/docs/src/sysadmin/bulk-storage.md b/docs/src/archive/sysadmin/bulk-storage.md
similarity index 100%
rename from docs/src/sysadmin/bulk-storage.md
rename to docs/src/archive/sysadmin/bulk-storage.md
diff --git a/docs/src/sysadmin/database-admin.md b/docs/src/archive/sysadmin/database-admin.md
similarity index 100%
rename from docs/src/sysadmin/database-admin.md
rename to docs/src/archive/sysadmin/database-admin.md
diff --git a/docs/src/sysadmin/external-store.md b/docs/src/archive/sysadmin/external-store.md
similarity index 100%
rename from docs/src/sysadmin/external-store.md
rename to docs/src/archive/sysadmin/external-store.md
diff --git a/docs/src/tutorials/dj-top.ipynb b/docs/src/archive/tutorials/dj-top.ipynb
similarity index 100%
rename from docs/src/tutorials/dj-top.ipynb
rename to docs/src/archive/tutorials/dj-top.ipynb
diff --git a/docs/src/tutorials/json.ipynb b/docs/src/archive/tutorials/json.ipynb
similarity index 100%
rename from docs/src/tutorials/json.ipynb
rename to docs/src/archive/tutorials/json.ipynb
diff --git a/docs/src/changelog.md b/docs/src/changelog.md
deleted file mode 120000
index 699cc9e7b..000000000
--- a/docs/src/changelog.md
+++ /dev/null
@@ -1 +0,0 @@
-../../CHANGELOG.md
\ No newline at end of file
diff --git a/docs/src/develop.md b/docs/src/develop.md
index a4a1fc534..4643683b6 100644
--- a/docs/src/develop.md
+++ b/docs/src/develop.md
@@ -1,202 +1,101 @@
-# Developer Guide
+# Contributing Guide
-## Table of Contents
-
-- [Contribute to DataJoint Python Documentation](#contribute-to-datajoint-python-documentation)
-- [Setup Development Environment](#setup-development-environment)
- - [Prerequisites](#prerequisites)
- - [With Virtual Environment](#with-virtual-environment)
- - [With DevContainer](#with-devcontainer)
- - [Extra Efficiency, Optional But Recommended](#extra-efficiency-optional-but-recommended)
- - [Pre-commit Hooks](#pre-commit-hooks)
- - [Integration Tests](#integration-tests)
- - [VSCode](#vscode)
- - [Jupyter Extension](#jupyter-extension)
- - [Debugger](#debugger)
- - [MySQL CLI](#mysql-cli)
-
-## Contribute to DataJoint Python Documentation
-
-> Contributions to documentations are equivalently important to any code for the community, please help us to resolve any confusions in documentations.
-
-[Here](https://github.com/datajoint/datajoint-python/blob/master/docs/README.md) is the instructions for contributing documentations, or you can find the same instructions at `$PROJECT_DIR/docs/README.md` in the repository.
-
-[Back to top](#table-of-contents)
-
-## Setup Development Environment
-
-> We have [DevContainer](https://containers.dev/) ready for contributors to develop without setting up their environment. If you are familiar with DevContainer, Docker or Github Codespace, this is the recommended development environment for you.
-> If you have never used Docker, it might be easier for you to use a virtual environment through `conda/mamba/venv`, it is also very straightforward to set up.
-
-### Prerequisites
-
-- Clone datajoint-python repository
+## Quick Start
```bash
-# If you have your SSH key set up with GitHub, you can clone using SSH
-git clone git@github.com:datajoint/datajoint-python.git
-# Otherwise, you can clone using HTTPS
+# Clone the repository
git clone https://github.com/datajoint/datajoint-python.git
-```
-- If you don't use DevContainer, then either install Anaconda/[Miniconda](https://www.anaconda.com/docs/getting-started/miniconda/install)/Mamba, or just use Python's built-in `venv` module without install anything else.
-
-### With Virtual Environment
+cd datajoint-python
-```bash
-# Check if you have Python 3.10 or higher, if not please upgrade
-python --version
-# Create a virtual environment with venv
+# Create virtual environment (Python 3.10+)
python -m venv .venv
-source .venv/bin/activate
-pip install -e .[dev]
+source .venv/bin/activate # On Windows: .venv\Scripts\activate
-# Or create a virtual environment with conda
-conda create -n dj python=3.13 # any 3.10+ is fine
-conda activate dj
-pip install -e .[dev]
-```
+# Install with development dependencies
+pip install -e ".[dev]"
-[Back to top](#table-of-contents)
+# Install pre-commit hooks
+pre-commit install
-### With DevContainer
+# Run tests
+pytest tests
+```
-#### Launch Environment
+## Development Environment
-Here are some options that provide a great developer experience:
+### Local Setup
-- **Cloud-based IDE**: (*recommended*)
- - Launch using [GitHub Codespaces](https://github.com/features/codespaces) using the option `Create codespace on master` in the codebase repository on your fork.
- - Build time for a 2-Core codespace is **~6m**. This is done infrequently and cached for convenience.
- - Start time for a 2-Core codespace is **~2m**. This will pull the built codespace from cache when you need it.
- - *Tip*: GitHub auto names the codespace but you can rename the codespace so that it is easier to identify later.
-- **Local IDE (VSCode - Dev Containers)**:
- - Ensure you have [Git](https://git-scm.com/book/en/v2/Getting-Started-Installing-Git)
- - Ensure you have [Docker](https://docs.docker.com/get-docker/)
- - Ensure you have [VSCode](https://code.visualstudio.com/)
- - Install the [Dev Containers extension](https://marketplace.visualstudio.com/items?itemName=ms-vscode-remote.remote-containers)
- - `git clone` the codebase repository and open it in VSCode
- - Use the `Dev Containers extension` to `Reopen in Container` (More info in the `Getting started` included with the extension)
- - You will know your environment has finished loading once you see a terminal open related to `Running postStartCommand` with a final message: `Done. Press any key to close the terminal.`.
-- **Local IDE (Docker Compose)**:
- - Ensure you have [Git](https://git-scm.com/book/en/v2/Getting-Started-Installing-Git)
- - Ensure you have [Docker](https://docs.docker.com/get-docker/)
- - `git clone` the codebase repository and open it in VSCode
- - Issue the following command in the terminal to build and run the Docker container: `HOST_UID=$(id -u) PY_VER=3.11 DJ_VERSION=$(grep -oP '\d+\.\d+\.\d+' src/datajoint/version.py) docker compose --profile test run --rm -it djtest -- sh -c 'pip install -qe ".[dev]" && bash'`
- - Issue the following command in the terminal to stop the Docker compose stack: `docker compose --profile test down`
+Requirements:
-[Back to top](#table-of-contents)
+- Python 3.10 or higher
+- MySQL 8.0+ or Docker (for running tests)
-## Extra Efficiency, Optional But Recommended
+The `[dev]` extras install all development tools: pytest, pre-commit, black, ruff, and documentation builders.
-### Pre-commit Hooks
+### Using Docker for Database
-We recommend using [pre-commit](https://pre-commit.com/) to automatically run linters and formatters on your code before committing.
-To set up pre-commit, run the following command in your terminal:
+Tests require a MySQL database. Start one with Docker:
```bash
-pip install pre-commit
-pre-commit install
+docker compose up -d db
```
-You can manually run pre-commit on all files with the following command:
+Configure connection (or set environment variables):
```bash
-pre-commit run --all-files
+export DJ_HOST=localhost
+export DJ_USER=root
+export DJ_PASS=password
```
-This will run all the linters and formatters specified in the `.pre-commit-config.yaml` file. If all check passed, you can commit your code. Otherwise, you need to fix the failed checks and run the command again.
-> Pre-commit will automatically run the linters and formatters on all staged files before committing. However, if your code doesn't follow the linters and formatters, the commit will fail.
-> Some hooks will automatically fix your problem, and add the fixed files as git's `unstaged` files, you just need to add them(`git add .`) to git's `staged` files and commit again.
-> Some hooks will not automatically fix your problem, so you need to check the pre-commit failed log to fix them manually and include the update to your `staged` files and commit again.
+### Alternative: GitHub Codespaces
-If you really don't want to use pre-commit, or if you don't like it, you can uninstall it with the following command:
+For a pre-configured environment, use [GitHub Codespaces](https://github.com/features/codespaces):
-```bash
-pre-commit uninstall
-```
+1. Fork the repository
+2. Click "Create codespace on master"
+3. Wait for environment to build (~6 minutes first time, ~2 minutes from cache)
-But when you issue a pull request, the same linter and formatter check will run against your contribution, you are going to have the same failure as well. So without pre-commit, you need to **manually run these linters and formatters before committing your code**:
+## Code Quality
-- Syntax tests
+### Pre-commit Hooks
-The following will verify that there are no syntax errors.
+Pre-commit runs automatically on `git commit`. To run manually:
-```
-flake8 datajoint --count --select=E9,F63,F7,F82 --show-source --statistics
+```bash
+pre-commit run --all-files
```
-- Style tests
+Hooks include:
-The following will verify that there are no code styling errors.
+- **ruff** — Linting and import sorting
+- **black** — Code formatting
+- **mypy** — Type checking (optional)
-```
-flake8 --ignore=E203,E722,W503 datajoint --count --max-complexity=62 --max-line-length=127 --statistics
-```
-
-The following will ensure the codebase has been formatted with [black](https://black.readthedocs.io/en/stable/).
+### Running Tests
-```
-black datajoint --check -v --diff
-```
+```bash
+# Full test suite with coverage
+pytest -sv --cov=datajoint tests
-The following will ensure the test suite has been formatted with [black](https://black.readthedocs.io/en/stable/).
+# Single test file
+pytest tests/test_connection.py
+# Single test function
+pytest tests/test_connection.py::test_dj_conn -v
```
-black tests --check -v --diff
-```
-
-[Back to top](#table-of-contents)
-
-### Integration Tests
-
-The following will verify there are no regression errors by running our test suite of unit and integration tests.
-
-- Entire test suite:
- ```
- pytest -sv --cov-report term-missing --cov=datajoint tests
- ```
-
-- A single functional test:
- ```
- pytest -sv tests/test_connection.py::test_dj_conn
- ```
-- A single class test:
- ```
- pytest -sv tests/test_aggr_regressions.py::TestIssue558
- ```
-[Back to top](#table-of-contents)
+## Submitting Changes
-### VSCode
+1. Create a feature branch from `master`
+2. Make your changes
+3. Ensure tests pass and pre-commit is clean
+4. Submit a pull request
-#### Jupyter Extension
+PRs trigger CI checks automatically. All checks must pass before merge.
-Be sure to go through this documentation if you are new to [Running Jupyter Notebooks with VSCode](https://code.visualstudio.com/docs/datascience/jupyter-notebooks#_create-or-open-a-jupyter-notebook).
+## Documentation
-#### Debugger
-
-[VSCode Debugger](https://code.visualstudio.com/docs/editor/debugging) is a powerful tool that can really accelerate fixes.
-
-Try it as follows:
-
-- Create a python script of your choice
-- `import datajoint` (This will use the current state of the source)
-- Add breakpoints by adding red dots next to line numbers
-- Select the `Run and Debug` tab
-- Start by clicking the button `Run and Debug`
-
-[Back to top](#table-of-contents)
-
-### MySQL CLI
-
-> Installation instruction is in [here](https://dev.mysql.com/doc/mysql-shell/8.0/en/mysql-shell-install.html)
-
-It is often useful in development to connect to DataJoint's relational database backend directly using the MySQL CLI.
-
-Connect as follows to the database running within your developer environment:
-
-```
-mysql -hdb -uroot -ppassword
-```
+Docstrings use NumPy style. See [DOCSTRING_STYLE.md](https://github.com/datajoint/datajoint-python/blob/master/DOCSTRING_STYLE.md) for guidelines.
-[Back to top](#table-of-contents)
\ No newline at end of file
+User documentation is maintained at [docs.datajoint.com](https://docs.datajoint.com).
diff --git a/docs/src/index.md b/docs/src/index.md
index 6e3bf2a2d..63b318a1c 100644
--- a/docs/src/index.md
+++ b/docs/src/index.md
@@ -1,44 +1,44 @@
-# Welcome to DataJoint for Python!
+# DataJoint for Python
-DataJoint for Python is a framework for scientific workflow management based on
-relational principles. DataJoint is built on the foundation of the relational data
-model and prescribes a consistent method for organizing, populating, computing, and
-querying data.
+DataJoint is an open-source Python framework for building scientific data pipelines.
+It implements the **Relational Workflow Model**—a paradigm that extends relational
+databases with native support for computational workflows.
-DataJoint was initially developed in 2009 by Dimitri Yatsenko in Andreas Tolias' Lab at
-Baylor College of Medicine for the distributed processing and management of large
-volumes of data streaming from regular experiments. Starting in 2011, DataJoint has
-been available as an open-source project adopted by other labs and improved through
-contributions from several developers.
-Presently, the primary developer of DataJoint open-source software is the company [DataJoint](https://datajoint.com){:target="_blank"}.
+## Documentation
-## Data Pipeline Example
+**User documentation** is available at **[docs.datajoint.com](https://docs.datajoint.com)**, including:
-
+- Tutorials and getting started guides
+- Concepts and explanations
+- How-to guides
+- API reference
-[Yatsenko et al., bioRxiv 2021](https://doi.org/10.1101/2021.03.30.437358){:target="_blank"}
+## This Site
-## Getting Started
+This site contains **developer documentation** for contributors to the DataJoint codebase:
-- Install with Conda
+- [Contributing Guide](develop.md) — Development environment setup
+- [Architecture](architecture/index.md) — Internal design documentation
+- [API Reference](api/) — Auto-generated from source
- ```bash
- conda install -c conda-forge datajoint
- ```
+## Quick Links
-- Install with pip
+| Resource | Link |
+|----------|------|
+| User Documentation | [docs.datajoint.com](https://docs.datajoint.com) |
+| GitHub Repository | [github.com/datajoint/datajoint-python](https://github.com/datajoint/datajoint-python) |
+| PyPI Package | [pypi.org/project/datajoint](https://pypi.org/project/datajoint) |
+| Issue Tracker | [GitHub Issues](https://github.com/datajoint/datajoint-python/issues) |
+| Community | [DataJoint Slack](https://datajoint.slack.com) |
- ```bash
- pip install datajoint
- ```
+## Installation
-- [Quick Start Guide](./quick-start.md)
+```bash
+pip install datajoint
+```
-- [Interactive Tutorials](https://github.com/datajoint/datajoint-tutorials){:target="_blank"} on GitHub Codespaces
+## License
-- [DataJoint Elements](https://docs.datajoint.com/elements/) - Catalog of example pipelines for neuroscience experiments
+DataJoint is released under the [Apache 2.0 License](https://github.com/datajoint/datajoint-python/blob/master/LICENSE).
-- Contribute
- - [Development Environment](./develop)
-
- - [Guidelines](https://docs.datajoint.com/about/contribute/)
+Copyright 2024 DataJoint Inc. and contributors.
diff --git a/pyproject.toml b/pyproject.toml
index b719cdb73..f3eee2313 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -39,7 +39,7 @@ maintainers = [
# manually sync here: https://docs.datajoint.com/core/datajoint-python/latest/#welcome-to-datajoint-for-python
description = "DataJoint for Python is a framework for scientific workflow management based on relational principles. DataJoint is built on the foundation of the relational data model and prescribes a consistent method for organizing, populating, computing, and querying data."
readme = "README.md"
-license = {file = "LICENSE.txt"}
+license = {file = "LICENSE"}
keywords = [
"database",
"automated",
@@ -62,7 +62,7 @@ classifiers = [
"Development Status :: 5 - Production/Stable",
"Intended Audience :: Science/Research",
"Intended Audience :: Healthcare Industry",
- "License :: OSI Approved :: GNU Lesser General Public License v2 or later (LGPLv2+)",
+ "License :: OSI Approved :: Apache Software License",
"Topic :: Software Development :: Libraries :: Python Modules",
"Topic :: Scientific/Engineering",
"Topic :: Scientific/Engineering :: Bio-Informatics",
diff --git a/src/datajoint/__init__.py b/src/datajoint/__init__.py
index 14c6ebd4e..7c72b71db 100644
--- a/src/datajoint/__init__.py
+++ b/src/datajoint/__init__.py
@@ -1,18 +1,18 @@
"""
-DataJoint for Python is a framework for building data pipelines using MySQL databases
-to represent pipeline structure and bulk storage systems for large objects.
-DataJoint is built on the foundation of the relational data model and prescribes a
-consistent method for organizing, populating, and querying data.
+DataJoint for Python — a framework for scientific data pipelines.
-The DataJoint data model is described in https://arxiv.org/abs/1807.11104
+DataJoint introduces the Relational Workflow Model, where your database schema
+is an executable specification of your workflow. Tables represent workflow steps,
+foreign keys encode dependencies, and computations are declarative.
-DataJoint is free software under the LGPL License. In addition, we request
-that any use of DataJoint leading to a publication be acknowledged in the publication.
+Documentation: https://docs.datajoint.com
+Source: https://github.com/datajoint/datajoint-python
-Please cite:
+Copyright 2014-2026 DataJoint Inc. and contributors.
+Licensed under the Apache License, Version 2.0.
- - http://biorxiv.org/content/early/2015/11/14/031658
- - http://dx.doi.org/10.1101/031658
+If DataJoint contributes to a publication, please cite:
+https://doi.org/10.1101/031658
"""
__author__ = "DataJoint Contributors"
diff --git a/src/datajoint/admin.py b/src/datajoint/admin.py
index 64a91bb48..275e9a823 100644
--- a/src/datajoint/admin.py
+++ b/src/datajoint/admin.py
@@ -9,18 +9,24 @@
def kill(restriction=None, connection=None, order_by=None):
"""
- view and kill database connections.
-
- :param restriction: restriction to be applied to processlist
- :param connection: a datajoint.Connection object. Default calls datajoint.conn()
- :param order_by: order by a single attribute or the list of attributes. defaults to 'id'.
-
- Restrictions are specified as strings and can involve any of the attributes of
- information_schema.processlist: ID, USER, HOST, DB, COMMAND, TIME, STATE, INFO.
-
- Examples:
- dj.kill('HOST LIKE "%compute%"') lists only connections from hosts containing "compute".
- dj.kill('TIME > 600') lists only connections in their current state for more than 10 minutes
+ View and kill database connections interactively.
+
+ Displays a list of active connections and prompts for connections to kill.
+
+ Parameters
+ ----------
+ restriction : str, optional
+ SQL WHERE clause to filter connections. Can use any attribute from
+ information_schema.processlist: ID, USER, HOST, DB, COMMAND, TIME, STATE, INFO.
+ connection : Connection, optional
+ A datajoint.Connection object. Defaults to datajoint.conn().
+ order_by : str or list[str], optional
+ Attribute(s) to order results by. Defaults to 'id'.
+
+ Examples
+ --------
+ >>> dj.kill('HOST LIKE "%compute%"') # List connections from hosts containing "compute"
+ >>> dj.kill('TIME > 600') # List connections idle for more than 10 minutes
"""
if connection is None:
@@ -61,16 +67,24 @@ def kill(restriction=None, connection=None, order_by=None):
def kill_quick(restriction=None, connection=None):
"""
- Kill database connections without prompting. Returns number of terminated connections.
-
- :param restriction: restriction to be applied to processlist
- :param connection: a datajoint.Connection object. Default calls datajoint.conn()
-
- Restrictions are specified as strings and can involve any of the attributes of
- information_schema.processlist: ID, USER, HOST, DB, COMMAND, TIME, STATE, INFO.
-
- Examples:
- dj.kill('HOST LIKE "%compute%"') terminates connections from hosts containing "compute".
+ Kill database connections without prompting.
+
+ Parameters
+ ----------
+ restriction : str, optional
+ SQL WHERE clause to filter connections. Can use any attribute from
+ information_schema.processlist: ID, USER, HOST, DB, COMMAND, TIME, STATE, INFO.
+ connection : Connection, optional
+ A datajoint.Connection object. Defaults to datajoint.conn().
+
+ Returns
+ -------
+ int
+ Number of terminated connections.
+
+ Examples
+ --------
+ >>> dj.kill_quick('HOST LIKE "%compute%"') # Kill connections from hosts with "compute"
"""
if connection is None:
connection = conn()
diff --git a/src/datajoint/autopopulate.py b/src/datajoint/autopopulate.py
index d92a1edf6..6c4539760 100644
--- a/src/datajoint/autopopulate.py
+++ b/src/datajoint/autopopulate.py
@@ -1,5 +1,7 @@
"""This module defines class dj.AutoPopulate"""
+from __future__ import annotations
+
import contextlib
import datetime
import inspect
@@ -7,6 +9,7 @@
import multiprocessing as mp
import signal
import traceback
+from typing import TYPE_CHECKING, Any, Generator
import deepdiff
from tqdm import tqdm
@@ -14,6 +17,10 @@
from .errors import DataJointError, LostConnectionError
from .expression import AndList, QueryExpression
+if TYPE_CHECKING:
+ from .jobs import Job
+ from .table import Table
+
# noinspection PyExceptionInherit,PyCallingNonCallable
logger = logging.getLogger(__name__.split(".")[0])
@@ -22,10 +29,20 @@
# --- helper functions for multiprocessing --
-def _initialize_populate(table, jobs, populate_kwargs):
+def _initialize_populate(table: Table, jobs: Job | None, populate_kwargs: dict[str, Any]) -> None:
"""
- Initialize the process for multiprocessing.
- Saves the unpickled copy of the table to the current process and reconnects.
+ Initialize a worker process for multiprocessing.
+
+ Saves the unpickled table to the current process and reconnects to database.
+
+ Parameters
+ ----------
+ table : Table
+ Table instance to populate.
+ jobs : Job or None
+ Job management object or None for direct mode.
+ populate_kwargs : dict
+ Arguments for _populate1().
"""
process = mp.current_process()
process.table = table
@@ -34,11 +51,19 @@ def _initialize_populate(table, jobs, populate_kwargs):
table.connection.connect() # reconnect
-def _call_populate1(key):
+def _call_populate1(key: dict[str, Any]) -> bool | tuple[dict[str, Any], Any]:
"""
- Call current process' table._populate1()
- :key - a dict specifying job to compute
- :return: key, error if error, otherwise None
+ Call _populate1() for a single key in the worker process.
+
+ Parameters
+ ----------
+ key : dict
+ Primary key specifying job to compute.
+
+ Returns
+ -------
+ bool or tuple
+ Result from _populate1().
"""
process = mp.current_process()
return process.table._populate1(key, process.jobs, **process.populate_kwargs)
@@ -46,9 +71,22 @@ def _call_populate1(key):
class AutoPopulate:
"""
- AutoPopulate is a mixin class that adds the method populate() to a Table class.
- Auto-populated tables must inherit from both Table and AutoPopulate,
- must define the property `key_source`, and must define the callback method `make`.
+ Mixin class that adds automated population to Table classes.
+
+ Auto-populated tables (Computed, Imported) inherit from both Table and
+ AutoPopulate. They must implement the ``make()`` method that computes
+ and inserts data for one primary key.
+
+ Attributes
+ ----------
+ key_source : QueryExpression
+ Query yielding keys to be populated. Default is join of FK parents.
+ jobs : Job
+ Job table (``~~table_name``) for distributed processing.
+
+ Notes
+ -----
+ Subclasses may override ``key_source`` to customize population scope.
"""
_key_source = None
@@ -56,15 +94,18 @@ class AutoPopulate:
_jobs = None
@property
- def jobs(self):
+ def jobs(self) -> Job:
"""
Access the job table for this auto-populated table.
- The job table (~~table_name) is created lazily on first access.
+ The job table (``~~table_name``) is created lazily on first access.
It tracks job status, priority, scheduling, and error information
for distributed populate operations.
- :return: Job object for this table
+ Returns
+ -------
+ Job
+ Job management object for this table.
"""
if self._jobs is None:
from .jobs import Job
@@ -74,7 +115,7 @@ def jobs(self):
self._jobs.declare()
return self._jobs
- def _declare_check(self, primary_key, fk_attribute_map):
+ def _declare_check(self, primary_key: list[str], fk_attribute_map: dict[str, tuple[str, str]]) -> None:
"""
Validate FK-only primary key constraint for auto-populated tables.
@@ -82,12 +123,18 @@ def _declare_check(self, primary_key, fk_attribute_map):
attributes from foreign key references. This ensures proper job granularity
for distributed populate operations.
- This validation can be bypassed by setting:
- dj.config.jobs.allow_new_pk_fields_in_computed_tables = True
-
- :param primary_key: list of primary key attribute names
- :param fk_attribute_map: dict mapping child_attr -> (parent_table, parent_attr)
- :raises DataJointError: if native PK attributes are found (unless bypassed)
+ Parameters
+ ----------
+ primary_key : list
+ List of primary key attribute names.
+ fk_attribute_map : dict
+ Mapping of child_attr -> (parent_table, parent_attr).
+
+ Raises
+ ------
+ DataJointError
+ If native (non-FK) PK attributes are found, unless bypassed via
+ ``dj.config.jobs.allow_new_pk_fields_in_computed_tables = True``.
"""
from .settings import config
@@ -110,13 +157,22 @@ def _declare_check(self, primary_key, fk_attribute_map):
)
@property
- def key_source(self):
+ def key_source(self) -> QueryExpression:
"""
- :return: the query expression that yields primary key values to be passed,
- sequentially, to the ``make`` method when populate() is called.
- The default value is the join of the parent tables references from the primary key.
- Subclasses may override they key_source to change the scope or the granularity
- of the make calls.
+ Query expression yielding keys to be populated.
+
+ Returns the primary key values to be passed sequentially to ``make()``
+ when ``populate()`` is called. The default is the join of parent tables
+ referenced from the primary key.
+
+ Returns
+ -------
+ QueryExpression
+ Expression yielding keys for population.
+
+ Notes
+ -----
+ Subclasses may override to change the scope or granularity of make calls.
"""
def _rename_attributes(table, props):
@@ -135,50 +191,40 @@ def _rename_attributes(table, props):
self._key_source *= _rename_attributes(*q)
return self._key_source
- def make(self, key):
+ def make(self, key: dict[str, Any]) -> None | Generator[Any, Any, None]:
"""
- This method must be implemented by derived classes to perform automated computation.
- The method must implement the following three steps:
-
- 1. Fetch data from tables above in the dependency hierarchy, restricted by the given key.
- 2. Compute secondary attributes based on the fetched data.
- 3. Insert the new tuple(s) into the current table.
-
- The method can be implemented either as:
- (a) Regular method: All three steps are performed in a single database transaction.
- The method must return None.
- (b) Generator method:
- The make method is split into three functions:
- - `make_fetch`: Fetches data from the parent tables.
- - `make_compute`: Computes secondary attributes based on the fetched data.
- - `make_insert`: Inserts the computed data into the current table.
-
- Then populate logic is executes as follows:
-
-
- fetched_data1 = self.make_fetch(key)
- computed_result = self.make_compute(key, *fetched_data1)
- begin transaction:
- fetched_data2 = self.make_fetch(key)
- if fetched_data1 != fetched_data2:
- cancel transaction
- else:
- self.make_insert(key, *computed_result)
- commit_transaction
-
+ Compute and insert data for one key.
- Importantly, the output of make_fetch is a tuple that serves as the input into `make_compute`.
- The output of `make_compute` is a tuple that serves as the input into `make_insert`.
+ Must be implemented by subclasses to perform automated computation.
+ The method implements three steps:
- The functionality must be strictly divided between these three methods:
- - All database queries must be completed in `make_fetch`.
- - All computation must be completed in `make_compute`.
- - All database inserts must be completed in `make_insert`.
+ 1. Fetch data from parent tables, restricted by the given key
+ 2. Compute secondary attributes based on the fetched data
+ 3. Insert the new row(s) into the current table
- DataJoint may programmatically enforce this separation in the future.
+ Parameters
+ ----------
+ key : dict
+ Primary key value identifying the entity to compute.
- :param key: The primary key value used to restrict the data fetching.
- :raises NotImplementedError: If the derived class does not implement the required methods.
+ Raises
+ ------
+ NotImplementedError
+ If neither ``make()`` nor the tripartite methods are implemented.
+
+ Notes
+ -----
+ **Simple make**: Implement as a regular method that performs all three
+ steps in a single database transaction. Must return None.
+
+ **Tripartite make**: For long-running computations, implement:
+
+ - ``make_fetch(key)``: Fetch data from parent tables
+ - ``make_compute(key, *fetched_data)``: Compute results
+ - ``make_insert(key, *computed_result)``: Insert results
+
+ The tripartite pattern allows computation outside the transaction,
+ with referential integrity checking before commit.
"""
if not (hasattr(self, "make_fetch") and hasattr(self, "make_insert") and hasattr(self, "make_compute")):
@@ -204,9 +250,19 @@ def make(self, key):
self.make_insert(key, *computed_result)
yield
- def _jobs_to_do(self, restrictions):
+ def _jobs_to_do(self, restrictions: tuple) -> QueryExpression:
"""
- :return: the query yielding the keys to be computed (derived from self.key_source)
+ Return the query yielding keys to be computed.
+
+ Parameters
+ ----------
+ restrictions : tuple
+ Conditions to filter key_source.
+
+ Returns
+ -------
+ QueryExpression
+ Keys derived from key_source that need computation.
"""
if self.restriction:
raise DataJointError(
@@ -234,44 +290,61 @@ def _jobs_to_do(self, restrictions):
def populate(
self,
- *restrictions,
- suppress_errors=False,
- return_exception_objects=False,
- reserve_jobs=False,
- max_calls=None,
- display_progress=False,
- processes=1,
- make_kwargs=None,
- priority=None,
- refresh=None,
- ):
+ *restrictions: Any,
+ suppress_errors: bool = False,
+ return_exception_objects: bool = False,
+ reserve_jobs: bool = False,
+ max_calls: int | None = None,
+ display_progress: bool = False,
+ processes: int = 1,
+ make_kwargs: dict[str, Any] | None = None,
+ priority: int | None = None,
+ refresh: bool | None = None,
+ ) -> dict[str, Any]:
"""
- ``table.populate()`` calls ``table.make(key)`` for every primary key in
- ``self.key_source`` for which there is not already a tuple in table.
-
- Two execution modes:
-
- **Direct mode** (reserve_jobs=False, default):
- Keys computed directly from: (key_source & restrictions) - target
- No job table involvement. Suitable for single-worker scenarios,
- development, and debugging.
-
- **Distributed mode** (reserve_jobs=True):
- Uses the job table (~~table_name) for multi-worker coordination.
- Supports priority, scheduling, and status tracking.
-
- :param restrictions: conditions to filter key_source
- :param suppress_errors: if True, collect errors instead of raising
- :param return_exception_objects: return error objects instead of just error messages
- :param reserve_jobs: if True, use job table for distributed processing
- :param max_calls: maximum number of make() calls (total across all processes)
- :param display_progress: if True, show progress bar
- :param processes: number of worker processes
- :param make_kwargs: keyword arguments passed to each make() call
- :param priority: (reserve_jobs only) only process jobs at this priority or more urgent
- :param refresh: (reserve_jobs only) refresh job queue before processing.
- Default from config.jobs.auto_refresh
- :return: dict with "success_count" and "error_list"
+ Populate the table by calling ``make()`` for unpopulated keys.
+
+ Calls ``make(key)`` for every primary key in ``key_source`` for which
+ there is not already a row in this table.
+
+ Parameters
+ ----------
+ *restrictions
+ Conditions to filter key_source.
+ suppress_errors : bool, optional
+ If True, collect errors instead of raising. Default False.
+ return_exception_objects : bool, optional
+ If True, return exception objects instead of messages. Default False.
+ reserve_jobs : bool, optional
+ If True, use job table for distributed processing. Default False.
+ max_calls : int, optional
+ Maximum number of ``make()`` calls.
+ display_progress : bool, optional
+ If True, show progress bar. Default False.
+ processes : int, optional
+ Number of worker processes. Default 1.
+ make_kwargs : dict, optional
+ Keyword arguments passed to each ``make()`` call.
+ priority : int, optional
+ (Distributed mode) Only process jobs at this priority or higher.
+ refresh : bool, optional
+ (Distributed mode) Refresh job queue before processing.
+ Default from ``config.jobs.auto_refresh``.
+
+ Returns
+ -------
+ dict
+ ``{"success_count": int, "error_list": list}``.
+
+ Notes
+ -----
+ **Direct mode** (``reserve_jobs=False``): Keys computed from
+ ``(key_source & restrictions) - target``. No job table. Suitable for
+ single-worker, development, and debugging.
+
+ **Distributed mode** (``reserve_jobs=True``): Uses job table
+ (``~~table_name``) for multi-worker coordination with priority and
+ status tracking.
"""
if self.connection.in_transaction:
raise DataJointError("Populate cannot be called during a transaction.")
@@ -457,16 +530,35 @@ def handler(signum, frame):
finally:
signal.signal(signal.SIGTERM, old_handler)
- def _populate1(self, key, jobs, suppress_errors, return_exception_objects, make_kwargs=None):
+ def _populate1(
+ self,
+ key: dict[str, Any],
+ jobs: Job | None,
+ suppress_errors: bool,
+ return_exception_objects: bool,
+ make_kwargs: dict[str, Any] | None = None,
+ ) -> bool | tuple[dict[str, Any], Any]:
"""
- Populate table for one source key, calling self.make inside a transaction.
-
- :param key: dict specifying job to populate
- :param jobs: the Job object or None if not reserve_jobs
- :param suppress_errors: if True, errors are suppressed and returned
- :param return_exception_objects: if True, errors returned as objects
- :return: (key, error) when suppress_errors=True,
- True if successfully invoke one make() call, otherwise False
+ Populate table for one key, calling make() inside a transaction.
+
+ Parameters
+ ----------
+ key : dict
+ Primary key specifying the job to populate.
+ jobs : Job or None
+ Job object for distributed mode, None for direct mode.
+ suppress_errors : bool
+ If True, errors are suppressed and returned.
+ return_exception_objects : bool
+ If True, return exception objects instead of messages.
+ make_kwargs : dict, optional
+ Keyword arguments passed to ``make()``.
+
+ Returns
+ -------
+ bool or tuple
+ True if make() succeeded, False if skipped (already done or reserved),
+ (key, error) tuple if suppress_errors=True and error occurred.
"""
import time
@@ -552,16 +644,24 @@ def _populate1(self, key, jobs, suppress_errors, return_exception_objects, make_
finally:
self.__class__._allow_insert = False
- def progress(self, *restrictions, display=False):
+ def progress(self, *restrictions: Any, display: bool = False) -> tuple[int, int]:
"""
Report the progress of populating the table.
Uses a single aggregation query to efficiently compute both total and
remaining counts.
- :param restrictions: conditions to restrict key_source
- :param display: if True, log the progress
- :return: (remaining, total) -- numbers of tuples to be populated
+ Parameters
+ ----------
+ *restrictions
+ Conditions to restrict key_source.
+ display : bool, optional
+ If True, log the progress. Default False.
+
+ Returns
+ -------
+ tuple
+ (remaining, total) - number of keys yet to populate and total keys.
"""
todo = self._jobs_to_do(restrictions)
@@ -633,11 +733,16 @@ def _update_job_metadata(self, key, start_time, duration, version):
"""
Update hidden job metadata for the given key.
- Args:
- key: Primary key dict identifying the row(s) to update
- start_time: datetime when computation started
- duration: float seconds elapsed
- version: str code version (truncated to 64 chars)
+ Parameters
+ ----------
+ key : dict
+ Primary key identifying the row(s) to update.
+ start_time : datetime
+ When computation started.
+ duration : float
+ Computation duration in seconds.
+ version : str
+ Code version (truncated to 64 chars).
"""
from .condition import make_condition
diff --git a/src/datajoint/blob.py b/src/datajoint/blob.py
index 2ac0e62cd..8651a57af 100644
--- a/src/datajoint/blob.py
+++ b/src/datajoint/blob.py
@@ -1,8 +1,13 @@
"""
-(De)serialization methods for basic datatypes and numpy.ndarrays with provisions for mutual
-compatibility with Matlab-based serialization implemented by mYm.
+Binary serialization for DataJoint blob storage.
+
+Provides (de)serialization for Python/NumPy objects with backward compatibility
+for MATLAB mYm-format blobs. Supports arrays, scalars, structs, cells, and
+Python built-in types (dict, list, tuple, set, datetime, UUID, Decimal).
"""
+from __future__ import annotations
+
import collections
import datetime
import uuid
@@ -69,31 +74,74 @@ def len_u32(obj):
class MatCell(np.ndarray):
- """a numpy ndarray representing a Matlab cell array"""
+ """
+ NumPy ndarray subclass representing a MATLAB cell array.
+
+ Used to distinguish cell arrays from regular arrays during serialization
+ for MATLAB compatibility.
+ """
pass
class MatStruct(np.recarray):
- """numpy.recarray representing a Matlab struct array"""
+ """
+ NumPy recarray subclass representing a MATLAB struct array.
+
+ Used to distinguish struct arrays from regular recarrays during
+ serialization for MATLAB compatibility.
+ """
pass
class Blob:
- def __init__(self, squeeze=False):
+ """
+ Binary serializer/deserializer for DataJoint blob storage.
+
+ Handles packing Python objects into binary format and unpacking binary
+ data back to Python objects. Supports two protocols:
+
+ - ``mYm``: Original MATLAB-compatible format (default)
+ - ``dj0``: Extended format for Python-specific types
+
+ Parameters
+ ----------
+ squeeze : bool, optional
+ If True, remove singleton dimensions from arrays and convert
+ 0-dimensional arrays to scalars. Default False.
+
+ Attributes
+ ----------
+ protocol : bytes or None
+ Current serialization protocol (``b"mYm\\0"`` or ``b"dj0\\0"``).
+ """
+
+ def __init__(self, squeeze: bool = False) -> None:
self._squeeze = squeeze
self._blob = None
self._pos = 0
self.protocol = None
- def set_dj0(self):
+ def set_dj0(self) -> None:
+ """Switch to dj0 protocol for extended type support."""
self.protocol = b"dj0\0" # when using new blob features
- def squeeze(self, array, convert_to_scalar=True):
+ def squeeze(self, array: np.ndarray, convert_to_scalar: bool = True) -> np.ndarray:
"""
- Simplify the input array - squeeze out all singleton dimensions.
- If convert_to_scalar, then convert zero-dimensional arrays to scalars
+ Remove singleton dimensions from an array.
+
+ Parameters
+ ----------
+ array : np.ndarray
+ Input array.
+ convert_to_scalar : bool, optional
+ If True, convert 0-dimensional arrays to Python scalars. Default True.
+
+ Returns
+ -------
+ np.ndarray or scalar
+ Squeezed array or scalar value.
"""
if not self._squeeze:
return array
@@ -233,9 +281,19 @@ def read_array(self):
data = data + 1j * self.read_value(dtype, count=n_elem)
return self.squeeze(data.reshape(shape, order="F"))
- def pack_array(self, array):
+ def pack_array(self, array: np.ndarray) -> bytes:
"""
- Serialize an np.ndarray into bytes. Scalars are encoded with ndim=0.
+ Serialize a NumPy array into bytes.
+
+ Parameters
+ ----------
+ array : np.ndarray
+ Array to serialize. Scalars are encoded with ndim=0.
+
+ Returns
+ -------
+ bytes
+ Serialized array data.
"""
if "datetime64" in array.dtype.name:
self.set_dj0()
@@ -497,10 +555,60 @@ def pack(self, obj, compress):
return blob
-def pack(obj, compress=True):
+def pack(obj, compress: bool = True) -> bytes:
+ """
+ Serialize a Python object to binary blob format.
+
+ Parameters
+ ----------
+ obj : any
+ Object to serialize. Supports NumPy arrays, Python scalars,
+ collections (dict, list, tuple, set), datetime objects, UUID,
+ Decimal, and MATLAB-compatible MatCell/MatStruct.
+ compress : bool, optional
+ If True (default), compress blobs larger than 1000 bytes using zlib.
+
+ Returns
+ -------
+ bytes
+ Serialized binary data.
+
+ Raises
+ ------
+ DataJointError
+ If the object type is not supported.
+
+ Examples
+ --------
+ >>> data = np.array([1, 2, 3])
+ >>> blob = pack(data)
+ >>> unpacked = unpack(blob)
+ """
return Blob().pack(obj, compress=compress)
-def unpack(blob, squeeze=False):
+def unpack(blob: bytes, squeeze: bool = False):
+ """
+ Deserialize a binary blob to a Python object.
+
+ Parameters
+ ----------
+ blob : bytes
+ Binary data from ``pack()`` or MATLAB mYm serialization.
+ squeeze : bool, optional
+ If True, remove singleton dimensions from arrays. Default False.
+
+ Returns
+ -------
+ any
+ Deserialized Python object.
+
+ Examples
+ --------
+ >>> blob = pack({'a': 1, 'b': [1, 2, 3]})
+ >>> data = unpack(blob)
+ >>> data['b']
+ [1, 2, 3]
+ """
if blob is not None:
return Blob(squeeze=squeeze).unpack(blob)
diff --git a/src/datajoint/builtin_codecs.py b/src/datajoint/builtin_codecs.py
index a55494e82..66589dc36 100644
--- a/src/datajoint/builtin_codecs.py
+++ b/src/datajoint/builtin_codecs.py
@@ -167,13 +167,19 @@ def encode(self, value: bytes, *, key: dict | None = None, store_name: str | Non
"""
Store content and return metadata.
- Args:
- value: Raw bytes to store.
- key: Primary key values (unused).
- store_name: Store to use. If None, uses default store.
-
- Returns:
- Metadata dict: {hash, store, size}
+ Parameters
+ ----------
+ value : bytes
+ Raw bytes to store.
+ key : dict, optional
+ Primary key values (unused).
+ store_name : str, optional
+ Store to use. If None, uses default store.
+
+ Returns
+ -------
+ dict
+ Metadata dict: ``{hash, store, size}``.
"""
from .content_registry import put_content
@@ -183,11 +189,16 @@ def decode(self, stored: dict, *, key: dict | None = None) -> bytes:
"""
Retrieve content by hash.
- Args:
- stored: Metadata dict with 'hash' and optionally 'store'.
- key: Primary key values (unused).
+ Parameters
+ ----------
+ stored : dict
+ Metadata dict with ``'hash'`` and optionally ``'store'``.
+ key : dict, optional
+ Primary key values (unused).
- Returns:
+ Returns
+ -------
+ bytes
Original bytes.
"""
from .content_registry import get_content
@@ -275,19 +286,20 @@ def encode(
"""
Store content and return metadata.
- Args:
- value: Content to store. Can be:
- - bytes: Raw bytes to store as file
- - str/Path: Path to local file or folder to upload
- key: Dict containing context for path construction:
- - _schema: Schema name
- - _table: Table name
- - _field: Field/attribute name
- - Other entries are primary key values
- store_name: Store to use. If None, uses default store.
-
- Returns:
- Metadata dict suitable for ObjectRef.from_json()
+ Parameters
+ ----------
+ value : bytes, str, or Path
+ Content to store: bytes (raw data), or str/Path (file/folder to upload).
+ key : dict, optional
+ Context for path construction with keys ``_schema``, ``_table``,
+ ``_field``, plus primary key values.
+ store_name : str, optional
+ Store to use. If None, uses default store.
+
+ Returns
+ -------
+ dict
+ Metadata dict suitable for ``ObjectRef.from_json()``.
"""
from datetime import datetime, timezone
from pathlib import Path
@@ -381,12 +393,17 @@ def decode(self, stored: dict, *, key: dict | None = None) -> Any:
"""
Create ObjectRef handle for lazy access.
- Args:
- stored: Metadata dict from database.
- key: Primary key values (unused).
-
- Returns:
- ObjectRef for accessing the stored content.
+ Parameters
+ ----------
+ stored : dict
+ Metadata dict from database.
+ key : dict, optional
+ Primary key values (unused).
+
+ Returns
+ -------
+ ObjectRef
+ Handle for accessing the stored content.
"""
from .objectref import ObjectRef
from .content_registry import get_store_backend
@@ -396,7 +413,7 @@ def decode(self, stored: dict, *, key: dict | None = None) -> Any:
return ObjectRef.from_json(stored, backend=backend)
def validate(self, value: Any) -> None:
- """Validate that value is bytes, path, dict metadata, or (extension, data) tuple."""
+ """Validate value is bytes, path, dict metadata, or (ext, data) tuple."""
from pathlib import Path
if isinstance(value, bytes):
@@ -463,13 +480,19 @@ def encode(self, value: Any, *, key: dict | None = None, store_name: str | None
"""
Read file and encode as filename + contents.
- Args:
- value: Path to file (str or Path).
- key: Primary key values (unused).
- store_name: Unused for internal storage.
-
- Returns:
- Bytes: filename (UTF-8) + null byte + file contents
+ Parameters
+ ----------
+ value : str or Path
+ Path to file.
+ key : dict, optional
+ Primary key values (unused).
+ store_name : str, optional
+ Unused for internal storage.
+
+ Returns
+ -------
+ bytes
+ Filename (UTF-8) + null byte + file contents.
"""
from pathlib import Path
@@ -487,12 +510,17 @@ def decode(self, stored: bytes, *, key: dict | None = None) -> str:
"""
Extract file to download path and return local path.
- Args:
- stored: Blob containing filename + null + contents.
- key: Primary key values (unused).
-
- Returns:
- Path to extracted file as string.
+ Parameters
+ ----------
+ stored : bytes
+ Blob containing filename + null + contents.
+ key : dict, optional
+ Primary key values (unused).
+
+ Returns
+ -------
+ str
+ Path to extracted file.
"""
from pathlib import Path
@@ -592,13 +620,19 @@ def encode(self, value: Any, *, key: dict | None = None, store_name: str | None
"""
Store path reference as JSON metadata.
- Args:
- value: Relative path within the store (str).
- key: Primary key values (unused).
- store_name: Store where the file exists.
-
- Returns:
- Metadata dict: {path, store}
+ Parameters
+ ----------
+ value : str
+ Relative path within the store.
+ key : dict, optional
+ Primary key values (unused).
+ store_name : str, optional
+ Store where the file exists.
+
+ Returns
+ -------
+ dict
+ Metadata dict: ``{path, store}``.
"""
from datetime import datetime, timezone
@@ -629,12 +663,17 @@ def decode(self, stored: dict, *, key: dict | None = None) -> Any:
"""
Create ObjectRef handle for lazy access.
- Args:
- stored: Metadata dict with path and store.
- key: Primary key values (unused).
-
- Returns:
- ObjectRef for accessing the file.
+ Parameters
+ ----------
+ stored : dict
+ Metadata dict with path and store.
+ key : dict, optional
+ Primary key values (unused).
+
+ Returns
+ -------
+ ObjectRef
+ Handle for accessing the file.
"""
from .objectref import ObjectRef
from .content_registry import get_store_backend
diff --git a/src/datajoint/codecs.py b/src/datajoint/codecs.py
index 840dd1593..cf2e2105f 100644
--- a/src/datajoint/codecs.py
+++ b/src/datajoint/codecs.py
@@ -54,27 +54,30 @@ class Codec(ABC):
Requires Python 3.10+.
- Attributes:
- name: Unique identifier used in ```` syntax. Must be set by subclasses.
-
- Example:
- class GraphCodec(dj.Codec):
- name = "graph"
-
- def get_dtype(self, is_external: bool) -> str:
- return ""
-
- def encode(self, graph, *, key=None, store_name=None):
- return {'nodes': list(graph.nodes()), 'edges': list(graph.edges())}
-
- def decode(self, stored, *, key=None):
- import networkx as nx
- G = nx.Graph()
- G.add_nodes_from(stored['nodes'])
- G.add_edges_from(stored['edges'])
- return G
-
- The codec can then be used in table definitions::
+ Attributes
+ ----------
+ name : str or None
+ Unique identifier used in ```` syntax. Must be set by subclasses.
+
+ Examples
+ --------
+ >>> class GraphCodec(dj.Codec):
+ ... name = "graph"
+ ...
+ ... def get_dtype(self, is_external: bool) -> str:
+ ... return ""
+ ...
+ ... def encode(self, graph, *, key=None, store_name=None):
+ ... return {'nodes': list(graph.nodes()), 'edges': list(graph.edges())}
+ ...
+ ... def decode(self, stored, *, key=None):
+ ... import networkx as nx
+ ... G = nx.Graph()
+ ... G.add_nodes_from(stored['nodes'])
+ ... G.add_edges_from(stored['edges'])
+ ... return G
+
+ Use in table definitions::
class Connectivity(dj.Manual):
definition = '''
@@ -83,7 +86,7 @@ class Connectivity(dj.Manual):
graph_data :
'''
- To skip auto-registration (for abstract base classes)::
+ Skip auto-registration for abstract base classes::
class ExternalOnlyCodec(dj.Codec, register=False):
'''Abstract base - not registered.'''
@@ -116,33 +119,46 @@ def __init_subclass__(cls, *, register: bool = True, **kwargs):
_codec_registry[cls.name] = cls()
logger.debug(f"Registered codec <{cls.name}> from {cls.__module__}.{cls.__name__}")
+ @abstractmethod
def get_dtype(self, is_external: bool) -> str:
"""
Return the storage dtype for this codec.
- Args:
- is_external: True if @ modifier present (external storage)
-
- Returns:
- A core type (e.g., "bytes", "json") or another codec (e.g., "")
-
- Raises:
- NotImplementedError: If not overridden by subclass.
- DataJointError: If external storage not supported but requested.
+ Parameters
+ ----------
+ is_external : bool
+ True if ``@`` modifier present (external storage).
+
+ Returns
+ -------
+ str
+ A core type (e.g., ``"bytes"``, ``"json"``) or another codec
+ (e.g., ``""``).
+
+ Raises
+ ------
+ DataJointError
+ If external storage not supported but requested.
"""
- raise NotImplementedError(f"Codec <{self.name}> must implement get_dtype()")
+ ...
@abstractmethod
def encode(self, value: Any, *, key: dict | None = None, store_name: str | None = None) -> Any:
"""
Encode Python value for storage.
- Args:
- value: The Python object to store.
- key: Primary key values as a dict. May be needed for path construction.
- store_name: Target store name for external storage.
-
- Returns:
+ Parameters
+ ----------
+ value : any
+ The Python object to store.
+ key : dict, optional
+ Primary key values. May be needed for path construction.
+ store_name : str, optional
+ Target store name for external storage.
+
+ Returns
+ -------
+ any
Value in the format expected by the dtype.
"""
...
@@ -152,11 +168,16 @@ def decode(self, stored: Any, *, key: dict | None = None) -> Any:
"""
Decode stored value back to Python.
- Args:
- stored: Data retrieved from storage.
- key: Primary key values as a dict.
+ Parameters
+ ----------
+ stored : any
+ Data retrieved from storage.
+ key : dict, optional
+ Primary key values.
- Returns:
+ Returns
+ -------
+ any
The reconstructed Python object.
"""
...
@@ -169,12 +190,17 @@ def validate(self, value: Any) -> None:
Called automatically before ``encode()`` during INSERT operations.
The default implementation accepts any value.
- Args:
- value: The value to validate.
-
- Raises:
- TypeError: If the value has an incompatible type.
- ValueError: If the value fails domain validation.
+ Parameters
+ ----------
+ value : any
+ The value to validate.
+
+ Raises
+ ------
+ TypeError
+ If the value has an incompatible type.
+ ValueError
+ If the value fails domain validation.
"""
pass
@@ -186,19 +212,25 @@ def parse_type_spec(spec: str) -> tuple[str, str | None]:
"""
Parse a type specification into type name and optional store parameter.
- Handles formats like:
- - "" -> ("blob", None)
- - "" -> ("blob", "cold")
- - "" -> ("blob", "") # default store
- - "blob@cold" -> ("blob", "cold")
- - "blob" -> ("blob", None)
-
- Args:
- spec: Type specification string, with or without angle brackets.
-
- Returns:
- Tuple of (type_name, store_name). store_name is None if not specified,
- empty string if @ present without name (default store).
+ Parameters
+ ----------
+ spec : str
+ Type specification string, with or without angle brackets.
+
+ Returns
+ -------
+ tuple[str, str | None]
+ ``(type_name, store_name)``. ``store_name`` is None if not specified,
+ empty string if ``@`` present without name (default store).
+
+ Examples
+ --------
+ >>> parse_type_spec("")
+ ("blob", None)
+ >>> parse_type_spec("")
+ ("blob", "cold")
+ >>> parse_type_spec("")
+ ("blob", "")
"""
# Strip angle brackets
spec = spec.strip("<>").strip()
@@ -216,11 +248,15 @@ def unregister_codec(name: str) -> None:
Primarily useful for testing. Use with caution in production code.
- Args:
- name: The codec name to unregister.
+ Parameters
+ ----------
+ name : str
+ The codec name to unregister.
- Raises:
- DataJointError: If the codec is not registered.
+ Raises
+ ------
+ DataJointError
+ If the codec is not registered.
"""
name = name.strip("<>")
if name not in _codec_registry:
@@ -235,15 +271,21 @@ def get_codec(name: str) -> Codec:
Looks up the codec in the explicit registry first, then attempts
to load from installed packages via entry points.
- Args:
- name: The codec name, with or without angle brackets.
- Store parameters (e.g., "") are stripped.
+ Parameters
+ ----------
+ name : str
+ The codec name, with or without angle brackets.
+ Store parameters (e.g., ``""``) are stripped.
- Returns:
+ Returns
+ -------
+ Codec
The registered Codec instance.
- Raises:
- DataJointError: If the codec is not found.
+ Raises
+ ------
+ DataJointError
+ If the codec is not found.
"""
# Strip angle brackets and store parameter
type_name, _ = parse_type_spec(name)
@@ -267,7 +309,9 @@ def list_codecs() -> list[str]:
"""
List all registered codec names.
- Returns:
+ Returns
+ -------
+ list[str]
Sorted list of registered codec names.
"""
_load_entry_points()
@@ -278,10 +322,14 @@ def is_codec_registered(name: str) -> bool:
"""
Check if a codec name is registered.
- Args:
- name: The codec name to check (store parameters are ignored).
+ Parameters
+ ----------
+ name : str
+ The codec name to check (store parameters are ignored).
- Returns:
+ Returns
+ -------
+ bool
True if the codec is registered.
"""
type_name, _ = parse_type_spec(name)
@@ -346,31 +394,38 @@ def resolve_dtype(
"""
Resolve a dtype string, following codec chains.
- If dtype references another codec (e.g., ""), recursively
+ If dtype references another codec (e.g., ``""``), recursively
resolves to find the ultimate storage type. Store parameters are propagated
through the chain.
- Args:
- dtype: The dtype string to resolve (e.g., "", "", "bytes").
- seen: Set of already-seen codec names (for cycle detection).
- store_name: Store name from outer type specification (propagated inward).
-
- Returns:
- Tuple of (final_storage_type, list_of_codecs_in_chain, resolved_store_name).
- The chain is ordered from outermost to innermost codec.
-
- Raises:
- DataJointError: If a circular type reference is detected.
-
- Examples:
- >>> resolve_dtype("")
- ("bytes", [BlobCodec], None)
-
- >>> resolve_dtype("")
- ("", [BlobCodec], "cold") # BlobCodec.get_dtype(True) returns ""
-
- >>> resolve_dtype("bytes")
- ("bytes", [], None)
+ Parameters
+ ----------
+ dtype : str
+ The dtype string to resolve (e.g., ``""``, ``""``, ``"bytes"``).
+ seen : set[str], optional
+ Set of already-seen codec names (for cycle detection).
+ store_name : str, optional
+ Store name from outer type specification (propagated inward).
+
+ Returns
+ -------
+ tuple[str, list[Codec], str | None]
+ ``(final_storage_type, codec_chain, resolved_store_name)``.
+ Chain is ordered from outermost to innermost codec.
+
+ Raises
+ ------
+ DataJointError
+ If a circular type reference is detected.
+
+ Examples
+ --------
+ >>> resolve_dtype("")
+ ("bytes", [BlobCodec], None)
+ >>> resolve_dtype("")
+ ("", [BlobCodec], "cold")
+ >>> resolve_dtype("bytes")
+ ("bytes", [], None)
"""
if seen is None:
seen = set()
@@ -420,18 +475,24 @@ def lookup_codec(codec_spec: str) -> tuple[Codec, str | None]:
"""
Look up a codec from a type specification string.
- Parses a codec specification (e.g., "") and returns
+ Parses a codec specification (e.g., ``""``) and returns
the codec instance along with any store name.
- Args:
- codec_spec: The codec specification, with or without angle brackets.
- May include store parameter (e.g., "").
-
- Returns:
- Tuple of (Codec instance, store_name or None).
-
- Raises:
- DataJointError: If the codec is not found.
+ Parameters
+ ----------
+ codec_spec : str
+ The codec specification, with or without angle brackets.
+ May include store parameter (e.g., ``""``).
+
+ Returns
+ -------
+ tuple[Codec, str | None]
+ ``(codec_instance, store_name)`` or ``(codec_instance, None)``.
+
+ Raises
+ ------
+ DataJointError
+ If the codec is not found.
"""
type_name, store_name = parse_type_spec(codec_spec)
diff --git a/src/datajoint/condition.py b/src/datajoint/condition.py
index 24c898112..8ab19ca5d 100644
--- a/src/datajoint/condition.py
+++ b/src/datajoint/condition.py
@@ -1,4 +1,11 @@
-"""methods for generating SQL WHERE clauses from datajoint restriction conditions"""
+"""
+SQL WHERE clause generation from DataJoint restriction conditions.
+
+This module provides utilities for converting various restriction formats
+(dicts, strings, QueryExpressions) into SQL WHERE clauses.
+"""
+
+from __future__ import annotations
import collections
import datetime
@@ -9,18 +16,36 @@
import re
import uuid
from dataclasses import dataclass
+from typing import TYPE_CHECKING, Any
import numpy
import pandas
from .errors import DataJointError
+if TYPE_CHECKING:
+ from .expression import QueryExpression
+
logger = logging.getLogger(__name__.split(".")[0])
JSON_PATTERN = re.compile(r"^(?P\w+)(\.(?P[\w.*\[\]]+))?(:(?P[\w(,\s)]+))?$")
-def translate_attribute(key):
+def translate_attribute(key: str) -> tuple[dict | None, str]:
+ """
+ Translate an attribute key, handling JSON path notation.
+
+ Parameters
+ ----------
+ key : str
+ Attribute name, optionally with JSON path (e.g., ``"attr.path.field"``).
+
+ Returns
+ -------
+ tuple
+ (match_dict, sql_expression) where match_dict contains parsed
+ components or None if no JSON path.
+ """
match = JSON_PATTERN.match(key)
if match is None:
return match, key
@@ -35,26 +60,35 @@ def translate_attribute(key):
class PromiscuousOperand:
"""
- A container for an operand to ignore join compatibility
+ Wrapper to bypass join compatibility checking.
+
+ Used when you want to force a natural join without semantic matching.
+
+ Parameters
+ ----------
+ operand : QueryExpression
+ The operand to wrap.
"""
- def __init__(self, operand):
+ def __init__(self, operand: QueryExpression) -> None:
self.operand = operand
class AndList(list):
"""
- A list of conditions to by applied to a query expression by logical conjunction: the
- conditions are AND-ed. All other collections (lists, sets, other entity sets, etc) are
- applied by logical disjunction (OR).
-
- Example:
- expr2 = expr & dj.AndList((cond1, cond2, cond3))
- is equivalent to
- expr2 = expr & cond1 & cond2 & cond3
+ List of conditions combined with logical AND.
+
+ All conditions in the list are AND-ed together. Other collections
+ (lists, sets, QueryExpressions) are OR-ed.
+
+ Examples
+ --------
+ >>> expr & dj.AndList((cond1, cond2, cond3))
+ # equivalent to
+ >>> expr & cond1 & cond2 & cond3
"""
- def append(self, restriction):
+ def append(self, restriction: Any) -> None:
if isinstance(restriction, AndList):
# extend to reduce nesting
self.extend(restriction)
@@ -65,15 +99,25 @@ def append(self, restriction):
@dataclass
class Top:
"""
- A restriction to the top entities of a query.
- In SQL, this corresponds to ORDER BY ... LIMIT ... OFFSET
+ Restrict query to top N entities with ordering.
+
+ In SQL, corresponds to ``ORDER BY ... LIMIT ... OFFSET``.
+
+ Parameters
+ ----------
+ limit : int, optional
+ Maximum number of rows to return. Default 1.
+ order_by : str or list[str], optional
+ Attributes to order by. ``"KEY"`` for primary key. Default ``"KEY"``.
+ offset : int, optional
+ Number of rows to skip. Default 0.
"""
limit: int | None = 1
order_by: str | list[str] = "KEY"
offset: int = 0
- def __post_init__(self):
+ def __post_init__(self) -> None:
self.order_by = self.order_by or ["KEY"]
self.offset = self.offset or 0
@@ -92,30 +136,54 @@ def __post_init__(self):
class Not:
- """invert restriction"""
-
- def __init__(self, restriction):
- self.restriction = restriction
+ """
+ Invert a restriction condition.
+ Parameters
+ ----------
+ restriction : any
+ Restriction condition to negate.
-def assert_join_compatibility(expr1, expr2, semantic_check=True):
+ Examples
+ --------
+ >>> table - condition # equivalent to table & Not(condition)
"""
- Determine if expressions expr1 and expr2 are join-compatible.
- With semantic_check=True (default):
- Raises an error if there are non-homologous namesakes (same name, different lineage).
- This prevents accidental joins on attributes that share names but represent
- different entities.
+ def __init__(self, restriction: Any) -> None:
+ self.restriction = restriction
- If the ~lineage table doesn't exist for either schema, a warning is issued
- and semantic checking is disabled (join proceeds as natural join).
- With semantic_check=False:
- No lineage checking. All namesake attributes are matched (natural join behavior).
+def assert_join_compatibility(
+ expr1: QueryExpression,
+ expr2: QueryExpression,
+ semantic_check: bool = True,
+) -> None:
+ """
+ Check if two expressions are join-compatible.
+
+ Parameters
+ ----------
+ expr1 : QueryExpression
+ First expression.
+ expr2 : QueryExpression
+ Second expression.
+ semantic_check : bool, optional
+ If True (default), use semantic matching and error on non-homologous
+ namesakes (same name, different lineage). If False, use natural join.
+
+ Raises
+ ------
+ DataJointError
+ If semantic_check is True and expressions have non-homologous namesakes.
+
+ Notes
+ -----
+ With semantic_check=True:
+ Prevents accidental joins on attributes that share names but represent
+ different entities. If ~lineage table doesn't exist, a warning is issued.
- :param expr1: A QueryExpression object
- :param expr2: A QueryExpression object
- :param semantic_check: If True (default), use semantic matching and error on conflicts
+ With semantic_check=False:
+ All namesake attributes are matched (natural join behavior).
"""
from .expression import QueryExpression, U
@@ -151,16 +219,44 @@ def assert_join_compatibility(expr1, expr2, semantic_check=True):
)
-def make_condition(query_expression, condition, columns, semantic_check=True):
+def make_condition(
+ query_expression: QueryExpression,
+ condition: Any,
+ columns: set[str],
+ semantic_check: bool = True,
+) -> str | bool:
"""
- Translate the input condition into the equivalent SQL condition (a string)
-
- :param query_expression: a dj.QueryExpression object to apply condition
- :param condition: any valid restriction object.
- :param columns: a set passed by reference to collect all column names used in the
- condition.
- :param semantic_check: If True (default), use semantic matching and error on conflicts.
- :return: an SQL condition string or a boolean value.
+ Translate a restriction into an SQL WHERE clause condition.
+
+ Parameters
+ ----------
+ query_expression : QueryExpression
+ The expression to apply the condition to.
+ condition : any
+ Valid restriction: str, dict, bool, QueryExpression, AndList,
+ numpy.void, pandas.DataFrame, or iterable of restrictions.
+ columns : set[str]
+ Set passed by reference to collect column names used in the condition.
+ semantic_check : bool, optional
+ If True (default), use semantic matching and error on conflicts.
+
+ Returns
+ -------
+ str or bool
+ SQL condition string, or bool if condition evaluates to constant.
+
+ Notes
+ -----
+ Restriction types are processed as follows:
+
+ - ``str``: Used directly as SQL condition
+ - ``dict``: AND of equality conditions for matching attributes
+ - ``bool``: Returns the boolean value (possibly negated)
+ - ``QueryExpression``: Generates subquery (semijoin/antijoin)
+ - ``AndList``: AND of all conditions
+ - ``list/set/tuple``: OR of all conditions
+ - ``numpy.void``: Like dict, from record array
+ - ``pandas.DataFrame``: Converted to records, then OR-ed
"""
from .expression import Aggregation, QueryExpression, U
@@ -296,14 +392,27 @@ def combine_conditions(negate, conditions):
return f"{'NOT ' if negate else ''} ({' OR '.join(or_list)})" if or_list else negate
-def extract_column_names(sql_expression):
- """
- extract all presumed column names from an sql expression such as the WHERE clause,
- for example.
+def extract_column_names(sql_expression: str) -> set[str]:
+ r"""
+ Extract column names from an SQL expression.
+
+ Parameters
+ ----------
+ sql_expression : str
+ SQL expression (e.g., WHERE clause) to parse.
+
+ Returns
+ -------
+ set[str]
+ Set of extracted column names.
+
+ Notes
+ -----
+ Parsing is MySQL-specific. Identifies columns by:
- :param sql_expression: a string containing an SQL expression
- :return: set of extracted column names
- This may be MySQL-specific for now.
+ 1. Names in backticks (``\`column\```)
+ 2. Bare identifiers not followed by ``(`` (excludes functions)
+ 3. Excludes SQL reserved words (IS, IN, AND, OR, etc.)
"""
assert isinstance(sql_expression, str)
result = set()
diff --git a/src/datajoint/connection.py b/src/datajoint/connection.py
index 66d926694..394952886 100644
--- a/src/datajoint/connection.py
+++ b/src/datajoint/connection.py
@@ -3,12 +3,15 @@
the ``conn`` function that provides access to a persistent connection in datajoint.
"""
+from __future__ import annotations
+
import logging
import pathlib
import re
import warnings
from contextlib import contextmanager
from getpass import getpass
+from typing import Callable
import pymysql as client
@@ -26,67 +29,98 @@
cache_key = "query_cache" # the key to lookup the query_cache folder in dj.config
-def translate_query_error(client_error, query):
+def translate_query_error(client_error: Exception, query: str) -> Exception:
"""
- Take client error and original query and return the corresponding DataJoint exception.
-
- :param client_error: the exception raised by the client interface
- :param query: sql query with placeholders
- :return: an instance of the corresponding subclass of datajoint.errors.DataJointError
+ Translate client error to the corresponding DataJoint exception.
+
+ Parameters
+ ----------
+ client_error : Exception
+ The exception raised by the client interface.
+ query : str
+ SQL query with placeholders.
+
+ Returns
+ -------
+ Exception
+ An instance of the corresponding DataJoint error subclass,
+ or the original error if no mapping exists.
"""
logger.debug("type: {}, args: {}".format(type(client_error), client_error.args))
err, *args = client_error.args
- # Loss of connection errors
- if err in (0, "(0, '')"):
- return errors.LostConnectionError("Server connection lost due to an interface error.", *args)
- if err == 2006:
- return errors.LostConnectionError("Connection timed out", *args)
- if err == 2013:
- return errors.LostConnectionError("Server connection lost", *args)
- # Access errors
- if err in (1044, 1142):
- return errors.AccessError("Insufficient privileges.", args[0], query)
- # Integrity errors
- if err == 1062:
- return errors.DuplicateError(*args)
- if err == 1217: # MySQL 8 error code
- return errors.IntegrityError(*args)
- if err == 1451:
- return errors.IntegrityError(*args)
- if err == 1452:
- return errors.IntegrityError(*args)
- # Syntax errors
- if err == 1064:
- return errors.QuerySyntaxError(args[0], query)
- # Existence errors
- if err == 1146:
- return errors.MissingTableError(args[0], query)
- if err == 1364:
- return errors.MissingAttributeError(*args)
- if err == 1054:
- return errors.UnknownAttributeError(*args)
- # all the other errors are re-raised in original form
- return client_error
-
-
-def conn(host=None, user=None, password=None, *, init_fun=None, reset=False, use_tls=None):
+ match err:
+ # Loss of connection errors
+ case 0 | "(0, '')":
+ return errors.LostConnectionError("Server connection lost due to an interface error.", *args)
+ case 2006:
+ return errors.LostConnectionError("Connection timed out", *args)
+ case 2013:
+ return errors.LostConnectionError("Server connection lost", *args)
+
+ # Access errors
+ case 1044 | 1142:
+ return errors.AccessError("Insufficient privileges.", args[0], query)
+
+ # Integrity errors
+ case 1062:
+ return errors.DuplicateError(*args)
+ case 1217 | 1451 | 1452:
+ return errors.IntegrityError(*args)
+
+ # Syntax errors
+ case 1064:
+ return errors.QuerySyntaxError(args[0], query)
+
+ # Existence errors
+ case 1146:
+ return errors.MissingTableError(args[0], query)
+ case 1364:
+ return errors.MissingAttributeError(*args)
+ case 1054:
+ return errors.UnknownAttributeError(*args)
+
+ # All other errors pass through unchanged
+ case _:
+ return client_error
+
+
+def conn(
+ host: str | None = None,
+ user: str | None = None,
+ password: str | None = None,
+ *,
+ init_fun: Callable | None = None,
+ reset: bool = False,
+ use_tls: bool | dict | None = None,
+) -> Connection:
"""
- Returns a persistent connection object to be shared by multiple modules.
+ Return a persistent connection object shared by multiple modules.
+
If the connection is not yet established or reset=True, a new connection is set up.
- If connection information is not provided, it is taken from config which takes the
- information from dj_local_conf.json. If the password is not specified in that file
- datajoint prompts for the password.
-
- :param host: hostname
- :param user: mysql user
- :param password: mysql password
- :param init_fun: initialization function
- :param reset: whether the connection should be reset or not
- :param use_tls: TLS encryption option. Valid options are: True (required), False
- (required no TLS), None (TLS preferred, default), dict (Manually specify values per
- https://dev.mysql.com/doc/refman/8.0/en/connection-options.html#encrypted-connection-options).
+ If connection information is not provided, it is taken from config.
+
+ Parameters
+ ----------
+ host : str, optional
+ Database hostname.
+ user : str, optional
+ MySQL username.
+ password : str, optional
+ MySQL password. Prompts if not provided.
+ init_fun : callable, optional
+ Initialization function called after connection.
+ reset : bool, optional
+ If True, reset existing connection. Default False.
+ use_tls : bool or dict, optional
+ TLS encryption option: True (required), False (no TLS),
+ None (preferred, default), or dict for manual configuration.
+
+ Returns
+ -------
+ Connection
+ Persistent database connection.
"""
if not hasattr(conn, "connection") or reset:
host = host if host is not None else config["database.host"]
@@ -128,20 +162,43 @@ def rowcount(self):
class Connection:
"""
- A dj.Connection object manages a connection to a database server.
- It also catalogues modules, schemas, tables, and their dependencies (foreign keys).
-
- Most of the parameters below should be set in the local configuration file.
-
- :param host: host name, may include port number as hostname:port, in which case it overrides the value in port
- :param user: user name
- :param password: password
- :param port: port number
- :param init_fun: connection initialization function (SQL)
- :param use_tls: TLS encryption option
+ Manages a connection to a database server.
+
+ Catalogues schemas, tables, and their dependencies (foreign keys).
+ Most parameters should be set in the configuration file.
+
+ Parameters
+ ----------
+ host : str
+ Hostname, may include port as ``hostname:port``.
+ user : str
+ Database username.
+ password : str
+ Database password.
+ port : int, optional
+ Port number. Overridden if specified in host.
+ init_fun : str, optional
+ SQL initialization command.
+ use_tls : bool or dict, optional
+ TLS encryption option.
+
+ Attributes
+ ----------
+ schemas : dict
+ Registered schema objects.
+ dependencies : Dependencies
+ Foreign key dependency graph.
"""
- def __init__(self, host, user, password, port=None, init_fun=None, use_tls=None):
+ def __init__(
+ self,
+ host: str,
+ user: str,
+ password: str,
+ port: int | None = None,
+ init_fun: str | None = None,
+ use_tls: bool | dict | None = None,
+ ) -> None:
if ":" in host:
# the port in the hostname overrides the port argument
host, port = host.split(":")
@@ -172,8 +229,8 @@ def __repr__(self):
connected = "connected" if self.is_connected else "disconnected"
return "DataJoint connection ({connected}) {user}@{host}:{port}".format(connected=connected, **self.conn_info)
- def connect(self):
- """Connect to the database server."""
+ def connect(self) -> None:
+ """Establish or re-establish connection to the database server."""
with warnings.catch_warnings():
warnings.filterwarnings("ignore", ".*deprecated.*")
try:
@@ -198,38 +255,67 @@ def connect(self):
)
self._conn.autocommit(True)
- def set_query_cache(self, query_cache=None):
+ def set_query_cache(self, query_cache: str | None = None) -> None:
"""
- When query_cache is not None, the connection switches into the query caching mode, which entails:
- 1. Only SELECT queries are allowed.
- 2. The results of queries are cached under the path indicated by dj.config['query_cache']
- 3. query_cache is a string that differentiates different cache states.
-
- :param query_cache: a string to initialize the hash for query results
+ Enable query caching mode.
+
+ When enabled:
+ 1. Only SELECT queries are allowed
+ 2. Results are cached under ``dj.config['query_cache']``
+ 3. Cache key differentiates cache states
+
+ Parameters
+ ----------
+ query_cache : str, optional
+ String to initialize the hash for query results.
+ None disables caching.
"""
self._query_cache = query_cache
- def purge_query_cache(self):
- """Purges all query cache."""
+ def purge_query_cache(self) -> None:
+ """Delete all cached query results."""
if isinstance(config.get(cache_key), str) and pathlib.Path(config[cache_key]).is_dir():
for path in pathlib.Path(config[cache_key]).iterdir():
if not path.is_dir():
path.unlink()
- def close(self):
+ def close(self) -> None:
+ """Close the database connection."""
self._conn.close()
- def register(self, schema):
+ def register(self, schema) -> None:
+ """
+ Register a schema with this connection.
+
+ Parameters
+ ----------
+ schema : Schema
+ Schema object to register.
+ """
self.schemas[schema.database] = schema
self.dependencies.clear()
- def ping(self):
- """Ping the connection or raises an exception if the connection is closed."""
+ def ping(self) -> None:
+ """
+ Ping the server to verify connection is alive.
+
+ Raises
+ ------
+ Exception
+ If the connection is closed.
+ """
self._conn.ping(reconnect=False)
@property
- def is_connected(self):
- """Return true if the object is connected to the database server."""
+ def is_connected(self) -> bool:
+ """
+ Check if connected to the database server.
+
+ Returns
+ -------
+ bool
+ True if connected.
+ """
try:
self.ping()
except:
@@ -247,16 +333,40 @@ def _execute_query(cursor, query, args, suppress_warnings):
except client.err.Error as err:
raise translate_query_error(err, query)
- def query(self, query, args=(), *, as_dict=False, suppress_warnings=True, reconnect=None):
+ def query(
+ self,
+ query: str,
+ args: tuple = (),
+ *,
+ as_dict: bool = False,
+ suppress_warnings: bool = True,
+ reconnect: bool | None = None,
+ ):
"""
- Execute the specified query and return the tuple generator (cursor).
-
- :param query: SQL query
- :param args: additional arguments for the client.cursor
- :param as_dict: If as_dict is set to True, the returned cursor objects returns
- query results as dictionary.
- :param suppress_warnings: If True, suppress all warnings arising from underlying query library
- :param reconnect: when None, get from config, when True, attempt to reconnect if disconnected
+ Execute a SQL query and return the cursor.
+
+ Parameters
+ ----------
+ query : str
+ SQL query to execute.
+ args : tuple, optional
+ Query parameters for prepared statement.
+ as_dict : bool, optional
+ If True, return rows as dictionaries. Default False.
+ suppress_warnings : bool, optional
+ If True, suppress SQL library warnings. Default True.
+ reconnect : bool, optional
+ If True, reconnect if disconnected. None uses config setting.
+
+ Returns
+ -------
+ cursor
+ Database cursor with query results.
+
+ Raises
+ ------
+ DataJointError
+ If non-SELECT query during query caching mode.
"""
# check cache first:
use_query_cache = bool(self._query_cache)
@@ -300,24 +410,39 @@ def query(self, query, args=(), *, as_dict=False, suppress_warnings=True, reconn
return cursor
- def get_user(self):
+ def get_user(self) -> str:
"""
- :return: the user name and host name provided by the client to the server.
+ Get the current user and host.
+
+ Returns
+ -------
+ str
+ User name and host as ``'user@host'``.
"""
return self.query("SELECT user()").fetchone()[0]
# ---------- transaction processing
@property
- def in_transaction(self):
+ def in_transaction(self) -> bool:
"""
- :return: True if there is an open transaction.
+ Check if a transaction is open.
+
+ Returns
+ -------
+ bool
+ True if a transaction is in progress.
"""
self._in_transaction = self._in_transaction and self.is_connected
return self._in_transaction
- def start_transaction(self):
+ def start_transaction(self) -> None:
"""
- Starts a transaction error.
+ Start a new transaction.
+
+ Raises
+ ------
+ DataJointError
+ If a transaction is already in progress.
"""
if self.in_transaction:
raise errors.DataJointError("Nested connections are not supported.")
@@ -325,19 +450,14 @@ def start_transaction(self):
self._in_transaction = True
logger.debug("Transaction started")
- def cancel_transaction(self):
- """
- Cancels the current transaction and rolls back all changes made during the transaction.
- """
+ def cancel_transaction(self) -> None:
+ """Cancel the current transaction and roll back all changes."""
self.query("ROLLBACK")
self._in_transaction = False
logger.debug("Transaction cancelled. Rolling back ...")
- def commit_transaction(self):
- """
- Commit all changes made during the transaction and close it.
-
- """
+ def commit_transaction(self) -> None:
+ """Commit all changes and close the transaction."""
self.query("COMMIT")
self._in_transaction = False
logger.debug("Transaction committed and closed.")
@@ -347,14 +467,21 @@ def commit_transaction(self):
@contextmanager
def transaction(self):
"""
- Context manager for transactions. Opens an transaction and closes it after the with statement.
- If an error is caught during the transaction, the commits are automatically rolled back.
- All errors are raised again.
-
- Example:
- >>> import datajoint as dj
- >>> with dj.conn().transaction as conn:
- >>> # transaction is open here
+ Context manager for transactions.
+
+ Opens a transaction and automatically commits on success or rolls back
+ on exception.
+
+ Yields
+ ------
+ Connection
+ This connection object.
+
+ Examples
+ --------
+ >>> with dj.conn().transaction:
+ ... # All operations here are in one transaction
+ ... table.insert(data)
"""
try:
self.start_transaction()
diff --git a/src/datajoint/content_registry.py b/src/datajoint/content_registry.py
index abed955a0..f5da65ff5 100644
--- a/src/datajoint/content_registry.py
+++ b/src/datajoint/content_registry.py
@@ -23,11 +23,15 @@ def compute_content_hash(data: bytes) -> str:
"""
Compute SHA256 hash of content.
- Args:
- data: Content bytes
+ Parameters
+ ----------
+ data : bytes
+ Content bytes.
- Returns:
- Hex-encoded SHA256 hash (64 characters)
+ Returns
+ -------
+ str
+ Hex-encoded SHA256 hash (64 characters).
"""
return hashlib.sha256(data).hexdigest()
@@ -39,11 +43,15 @@ def build_content_path(content_hash: str) -> str:
Content is stored in a hierarchical structure to avoid too many files
in a single directory: _content/{hash[:2]}/{hash[2:4]}/{hash}
- Args:
- content_hash: SHA256 hex hash (64 characters)
+ Parameters
+ ----------
+ content_hash : str
+ SHA256 hex hash (64 characters).
- Returns:
- Relative path within the store
+ Returns
+ -------
+ str
+ Relative path within the store.
"""
if len(content_hash) != 64:
raise DataJointError(f"Invalid content hash length: {len(content_hash)} (expected 64)")
@@ -54,12 +62,16 @@ def get_store_backend(store_name: str | None = None) -> StorageBackend:
"""
Get a StorageBackend for content storage.
- Args:
- store_name: Name of the store to use. If None, uses the default object storage
- configuration or the configured default_store.
+ Parameters
+ ----------
+ store_name : str, optional
+ Name of the store to use. If None, uses the default object storage
+ configuration or the configured default_store.
- Returns:
- StorageBackend instance
+ Returns
+ -------
+ StorageBackend
+ StorageBackend instance.
"""
# If store_name is None, check for configured default_store
if store_name is None and config.object_storage.default_store:
@@ -77,12 +89,17 @@ def put_content(data: bytes, store_name: str | None = None) -> dict[str, Any]:
If the content already exists (same hash), it is not re-uploaded.
Returns metadata including the hash, store, and size.
- Args:
- data: Content bytes to store
- store_name: Name of the store. If None, uses default store.
+ Parameters
+ ----------
+ data : bytes
+ Content bytes to store.
+ store_name : str, optional
+ Name of the store. If None, uses default store.
- Returns:
- Metadata dict with keys: hash, store, size
+ Returns
+ -------
+ dict[str, Any]
+ Metadata dict with keys: hash, store, size.
"""
content_hash = compute_content_hash(data)
path = build_content_path(content_hash)
@@ -107,16 +124,24 @@ def get_content(content_hash: str, store_name: str | None = None) -> bytes:
"""
Retrieve content by its hash.
- Args:
- content_hash: SHA256 hex hash of the content
- store_name: Name of the store. If None, uses default store.
-
- Returns:
- Content bytes
-
- Raises:
- MissingExternalFile: If content is not found
- DataJointError: If hash verification fails
+ Parameters
+ ----------
+ content_hash : str
+ SHA256 hex hash of the content.
+ store_name : str, optional
+ Name of the store. If None, uses default store.
+
+ Returns
+ -------
+ bytes
+ Content bytes.
+
+ Raises
+ ------
+ MissingExternalFile
+ If content is not found.
+ DataJointError
+ If hash verification fails.
"""
path = build_content_path(content_hash)
backend = get_store_backend(store_name)
@@ -135,12 +160,17 @@ def content_exists(content_hash: str, store_name: str | None = None) -> bool:
"""
Check if content exists in storage.
- Args:
- content_hash: SHA256 hex hash of the content
- store_name: Name of the store. If None, uses default store.
+ Parameters
+ ----------
+ content_hash : str
+ SHA256 hex hash of the content.
+ store_name : str, optional
+ Name of the store. If None, uses default store.
- Returns:
- True if content exists
+ Returns
+ -------
+ bool
+ True if content exists.
"""
path = build_content_path(content_hash)
backend = get_store_backend(store_name)
@@ -151,15 +181,24 @@ def delete_content(content_hash: str, store_name: str | None = None) -> bool:
"""
Delete content from storage.
- WARNING: This should only be called after verifying no references exist.
+ This should only be called after verifying no references exist.
Use garbage collection to safely remove unreferenced content.
- Args:
- content_hash: SHA256 hex hash of the content
- store_name: Name of the store. If None, uses default store.
+ Parameters
+ ----------
+ content_hash : str
+ SHA256 hex hash of the content.
+ store_name : str, optional
+ Name of the store. If None, uses default store.
- Returns:
- True if content was deleted, False if it didn't exist
+ Returns
+ -------
+ bool
+ True if content was deleted, False if it didn't exist.
+
+ Warnings
+ --------
+ This permanently deletes content. Ensure no references exist first.
"""
path = build_content_path(content_hash)
backend = get_store_backend(store_name)
@@ -175,12 +214,17 @@ def get_content_size(content_hash: str, store_name: str | None = None) -> int:
"""
Get the size of stored content.
- Args:
- content_hash: SHA256 hex hash of the content
- store_name: Name of the store. If None, uses default store.
-
- Returns:
- Size in bytes
+ Parameters
+ ----------
+ content_hash : str
+ SHA256 hex hash of the content.
+ store_name : str, optional
+ Name of the store. If None, uses default store.
+
+ Returns
+ -------
+ int
+ Size in bytes.
"""
path = build_content_path(content_hash)
backend = get_store_backend(store_name)
diff --git a/src/datajoint/declare.py b/src/datajoint/declare.py
index 77638d4f7..d8479b124 100644
--- a/src/datajoint/declare.py
+++ b/src/datajoint/declare.py
@@ -1,8 +1,12 @@
"""
-This module hosts functions to convert DataJoint table definitions into mysql table definitions, and to
-declare the corresponding mysql tables.
+Table definition parsing and SQL generation.
+
+This module converts DataJoint table definitions into MySQL CREATE TABLE
+statements, handling type mapping, foreign key resolution, and index creation.
"""
+from __future__ import annotations
+
import logging
import re
@@ -92,8 +96,25 @@
assert SPECIAL_TYPES <= set(TYPE_PATTERN)
-def match_type(attribute_type):
- """Match an attribute type string to a category."""
+def match_type(attribute_type: str) -> str:
+ """
+ Match an attribute type string to its category.
+
+ Parameters
+ ----------
+ attribute_type : str
+ The type string from the table definition (e.g., ``"float32"``, ``"varchar(255)"``).
+
+ Returns
+ -------
+ str
+ Category name from TYPE_PATTERN (e.g., ``"FLOAT32"``, ``"STRING"``, ``"CODEC"``).
+
+ Raises
+ ------
+ DataJointError
+ If the type string doesn't match any known pattern.
+ """
try:
return next(category for category, pattern in TYPE_PATTERN.items() if pattern.match(attribute_type))
except StopIteration:
@@ -103,7 +124,16 @@ def match_type(attribute_type):
logger = logging.getLogger(__name__.split(".")[0])
-def build_foreign_key_parser():
+def build_foreign_key_parser() -> pp.ParserElement:
+ """
+ Build a pyparsing parser for foreign key definitions.
+
+ Returns
+ -------
+ pp.ParserElement
+ Parser that extracts ``options`` and ``ref_table`` from lines like
+ ``-> [nullable] ParentTable``.
+ """
arrow = pp.Literal("->").suppress()
lbracket = pp.Literal("[").suppress()
rbracket = pp.Literal("]").suppress()
@@ -113,7 +143,16 @@ def build_foreign_key_parser():
return arrow + options + ref_table
-def build_attribute_parser():
+def build_attribute_parser() -> pp.ParserElement:
+ """
+ Build a pyparsing parser for attribute definitions.
+
+ Returns
+ -------
+ pp.ParserElement
+ Parser that extracts ``name``, ``type``, ``default``, and ``comment``
+ from attribute definition lines.
+ """
quoted = pp.QuotedString('"') ^ pp.QuotedString("'")
colon = pp.Literal(":").suppress()
attribute_name = pp.Word(pp.srange("[a-z]"), pp.srange("[a-z0-9_]")).set_results_name("name")
@@ -130,27 +169,62 @@ def build_attribute_parser():
attribute_parser = build_attribute_parser()
-def is_foreign_key(line):
+def is_foreign_key(line: str) -> bool:
"""
-
- :param line: a line from the table definition
- :return: true if the line appears to be a foreign key definition
+ Check if a definition line is a foreign key reference.
+
+ Parameters
+ ----------
+ line : str
+ A line from the table definition.
+
+ Returns
+ -------
+ bool
+ True if the line appears to be a foreign key definition (contains ``->``
+ not inside quotes or comments).
"""
arrow_position = line.find("->")
return arrow_position >= 0 and not any(c in line[:arrow_position] for c in "\"#'")
-def compile_foreign_key(line, context, attributes, primary_key, attr_sql, foreign_key_sql, index_sql, fk_attribute_map=None):
+def compile_foreign_key(
+ line: str,
+ context: dict,
+ attributes: list[str],
+ primary_key: list[str] | None,
+ attr_sql: list[str],
+ foreign_key_sql: list[str],
+ index_sql: list[str],
+ fk_attribute_map: dict[str, tuple[str, str]] | None = None,
+) -> None:
"""
- :param line: a line from a table definition
- :param context: namespace containing referenced objects
- :param attributes: list of attribute names already in the declaration -- to be updated by this function
- :param primary_key: None if the current foreign key is made from the dependent section. Otherwise it is the list
- of primary key attributes thus far -- to be updated by the function
- :param attr_sql: list of sql statements defining attributes -- to be updated by this function.
- :param foreign_key_sql: list of sql statements specifying foreign key constraints -- to be updated by this function.
- :param index_sql: list of INDEX declaration statements, duplicate or redundant indexes are ok.
- :param fk_attribute_map: dict mapping child attr -> (parent_table, parent_attr) -- to be updated by this function.
+ Parse a foreign key line and update declaration components.
+
+ Parameters
+ ----------
+ line : str
+ A foreign key line from the table definition (e.g., ``"-> Parent"``).
+ context : dict
+ Namespace containing referenced table objects.
+ attributes : list[str]
+ Attribute names already declared. Updated in place with new FK attributes.
+ primary_key : list[str] or None
+ Primary key attributes so far. None if in dependent section.
+ Updated in place with FK attributes when not None.
+ attr_sql : list[str]
+ SQL attribute definitions. Updated in place.
+ foreign_key_sql : list[str]
+ SQL FOREIGN KEY constraints. Updated in place.
+ index_sql : list[str]
+ SQL INDEX declarations. Updated in place.
+ fk_attribute_map : dict, optional
+ Mapping of ``child_attr -> (parent_table, parent_attr)``. Updated in place.
+
+ Raises
+ ------
+ DataJointError
+ If the foreign key reference cannot be resolved or options are invalid.
"""
# Parse and validate
from .expression import QueryExpression
@@ -214,7 +288,32 @@ def compile_foreign_key(line, context, attributes, primary_key, attr_sql, foreig
index_sql.append("UNIQUE INDEX ({attrs})".format(attrs=",".join("`%s`" % attr for attr in ref.primary_key)))
-def prepare_declare(definition, context):
+def prepare_declare(
+ definition: str, context: dict
+) -> tuple[str, list[str], list[str], list[str], list[str], list[str], dict[str, tuple[str, str]]]:
+ """
+ Parse a table definition into its components.
+
+ Parameters
+ ----------
+ definition : str
+ DataJoint table definition string.
+ context : dict
+ Namespace for resolving foreign key references.
+
+ Returns
+ -------
+ tuple
+ Seven-element tuple containing:
+
+ - table_comment : str
+ - primary_key : list[str]
+ - attribute_sql : list[str]
+ - foreign_key_sql : list[str]
+ - index_sql : list[str]
+ - external_stores : list[str]
+ - fk_attribute_map : dict[str, tuple[str, str]]
+ """
# split definition into lines
definition = re.split(r"\s*\n\s*", definition.strip())
# check for optional table comment
@@ -269,14 +368,35 @@ def prepare_declare(definition, context):
)
-def declare(full_table_name, definition, context):
- """
- Parse declaration and generate the SQL CREATE TABLE code
-
- :param full_table_name: full name of the table
- :param definition: DataJoint table definition
- :param context: dictionary of objects that might be referred to in the table
- :return: SQL CREATE TABLE statement, list of external stores used
+def declare(
+ full_table_name: str, definition: str, context: dict
+) -> tuple[str, list[str], list[str], dict[str, tuple[str, str]]]:
+ r"""
+ Parse a definition and generate SQL CREATE TABLE statement.
+
+ Parameters
+ ----------
+ full_table_name : str
+ Fully qualified table name (e.g., ```\`schema\`.\`table\```).
+ definition : str
+ DataJoint table definition string.
+ context : dict
+ Namespace for resolving foreign key references.
+
+ Returns
+ -------
+ tuple
+ Four-element tuple:
+
+ - sql : str - SQL CREATE TABLE statement
+ - external_stores : list[str] - External store names used
+ - primary_key : list[str] - Primary key attribute names
+ - fk_attribute_map : dict - FK attribute lineage mapping
+
+ Raises
+ ------
+ DataJointError
+ If table name exceeds max length or has no primary key.
"""
table_name = full_table_name.strip("`").split(".")[1]
if len(table_name) > MAX_TABLE_NAME_LENGTH:
@@ -322,12 +442,28 @@ def declare(full_table_name, definition, context):
return sql, external_stores, primary_key, fk_attribute_map
-def _make_attribute_alter(new, old, primary_key):
+def _make_attribute_alter(new: list[str], old: list[str], primary_key: list[str]) -> list[str]:
"""
- :param new: new attribute declarations
- :param old: old attribute declarations
- :param primary_key: primary key attributes
- :return: list of SQL ALTER commands
+ Generate SQL ALTER commands for attribute changes.
+
+ Parameters
+ ----------
+ new : list[str]
+ New attribute SQL declarations.
+ old : list[str]
+ Old attribute SQL declarations.
+ primary_key : list[str]
+ Primary key attribute names (cannot be altered).
+
+ Returns
+ -------
+ list[str]
+ SQL ALTER commands (ADD, MODIFY, CHANGE, DROP).
+
+ Raises
+ ------
+ DataJointError
+ If an attribute is renamed twice or renamed from non-existent attribute.
"""
# parse attribute names
name_regexp = re.compile(r"^`(?P\w+)`")
@@ -391,12 +527,31 @@ def _make_attribute_alter(new, old, primary_key):
return sql
-def alter(definition, old_definition, context):
+def alter(definition: str, old_definition: str, context: dict) -> tuple[list[str], list[str]]:
"""
- :param definition: new table definition
- :param old_definition: current table definition
- :param context: the context in which to evaluate foreign key definitions
- :return: string SQL ALTER command, list of new stores used for external storage
+ Generate SQL ALTER commands for table definition changes.
+
+ Parameters
+ ----------
+ definition : str
+ New table definition.
+ old_definition : str
+ Current table definition.
+ context : dict
+ Namespace for resolving foreign key references.
+
+ Returns
+ -------
+ tuple
+ Two-element tuple:
+
+ - sql : list[str] - SQL ALTER commands
+ - new_stores : list[str] - New external stores used
+
+ Raises
+ ------
+ NotImplementedError
+ If attempting to alter primary key, foreign keys, or indexes.
"""
(
table_comment,
@@ -432,7 +587,24 @@ def alter(definition, old_definition, context):
return sql, [e for e in external_stores if e not in external_stores_]
-def compile_index(line, index_sql):
+def compile_index(line: str, index_sql: list[str]) -> None:
+ """
+ Parse an index declaration and append SQL to index_sql.
+
+ Parameters
+ ----------
+ line : str
+ Index declaration line (e.g., ``"index(attr1, attr2)"`` or
+ ``"unique index(attr)"``).
+ index_sql : list[str]
+ List of index SQL declarations. Updated in place.
+
+ Raises
+ ------
+ DataJointError
+ If the index syntax is invalid.
+ """
+
def format_attribute(attr):
match, attr = translate_attribute(attr)
if match is None:
@@ -455,18 +627,25 @@ def format_attribute(attr):
)
-def substitute_special_type(match, category, foreign_key_sql, context):
+def substitute_special_type(match: dict, category: str, foreign_key_sql: list[str], context: dict) -> None:
"""
Substitute special types with their native SQL equivalents.
- Special types are:
- - Core DataJoint types (float32 → float, uuid → binary(16), bytes → longblob, etc.)
- - CODEC types (Codecs in angle brackets)
-
- :param match: dict containing with keys "type" and "comment" -- will be modified in place
- :param category: attribute type category from TYPE_PATTERN
- :param foreign_key_sql: list of foreign key declarations to add to
- :param context: context for looking up user-defined codecs (unused, kept for compatibility)
+ Special types include core DataJoint types (``float32`` → ``float``,
+ ``uuid`` → ``binary(16)``, ``bytes`` → ``longblob``) and codec types
+ (angle bracket syntax like ````).
+
+ Parameters
+ ----------
+ match : dict
+ Parsed attribute with keys ``"type"``, ``"comment"``, etc.
+ Modified in place with substituted type.
+ category : str
+ Type category from TYPE_PATTERN (e.g., ``"FLOAT32"``, ``"CODEC"``).
+ foreign_key_sql : list[str]
+ Foreign key declarations (unused, kept for API compatibility).
+ context : dict
+ Namespace for codec lookup (unused, kept for API compatibility).
"""
if category == "CODEC":
# Codec - resolve to underlying dtype
@@ -499,15 +678,34 @@ def substitute_special_type(match, category, foreign_key_sql, context):
assert False, f"Unknown special type: {category}"
-def compile_attribute(line, in_key, foreign_key_sql, context):
+def compile_attribute(line: str, in_key: bool, foreign_key_sql: list[str], context: dict) -> tuple[str, str, str | None]:
"""
- Convert attribute definition from DataJoint format to SQL
-
- :param line: attribution line
- :param in_key: set to True if attribute is in primary key set
- :param foreign_key_sql: the list of foreign key declarations to add to
- :param context: context in which to look up user-defined attribute type adapterss
- :returns: (name, sql, store) -- attribute name, sql code for its declaration, and optional store name
+ Convert an attribute definition from DataJoint format to SQL.
+
+ Parameters
+ ----------
+ line : str
+ Attribute definition line (e.g., ``"session_id : int32 # unique session"``).
+ in_key : bool
+ True if the attribute is part of the primary key.
+ foreign_key_sql : list[str]
+ Foreign key declarations (passed to type substitution).
+ context : dict
+ Namespace for codec lookup.
+
+ Returns
+ -------
+ tuple
+ Three-element tuple:
+
+ - name : str - Attribute name
+ - sql : str - SQL column declaration
+ - store : str or None - External store name if applicable
+
+ Raises
+ ------
+ DataJointError
+ If syntax is invalid, primary key is nullable, or blob has invalid default.
"""
try:
match = attribute_parser.parse_string(line + "#", parse_all=True)
diff --git a/src/datajoint/dependencies.py b/src/datajoint/dependencies.py
index a342bf3f0..621011426 100644
--- a/src/datajoint/dependencies.py
+++ b/src/datajoint/dependencies.py
@@ -1,3 +1,13 @@
+"""
+Foreign key dependency graph for DataJoint schemas.
+
+This module provides the Dependencies class that tracks foreign key
+relationships between tables and supports topological sorting for
+proper ordering of operations like delete and drop.
+"""
+
+from __future__ import annotations
+
import itertools
import re
from collections import defaultdict
@@ -7,18 +17,37 @@
from .errors import DataJointError
-def extract_master(part_table):
- """
- given a part table name, return master part. None if not a part table
+def extract_master(part_table: str) -> str | None:
+ r"""
+ Extract master table name from a part table name.
+
+ Parameters
+ ----------
+ part_table : str
+ Full table name (e.g., ```\`schema\`.\`master__part\```).
+
+ Returns
+ -------
+ str or None
+ Master table name if part_table is a part table, None otherwise.
"""
match = re.match(r"(?P`\w+`.`#?\w+)__\w+`", part_table)
return match["master"] + "`" if match else None
-def topo_sort(graph):
+def topo_sort(graph: nx.DiGraph) -> list[str]:
"""
- topological sort of a dependency graph that keeps part tables together with their masters
- :return: list of table names in topological order
+ Topological sort keeping part tables with their masters.
+
+ Parameters
+ ----------
+ graph : nx.DiGraph
+ Dependency graph.
+
+ Returns
+ -------
+ list[str]
+ Table names in topological order with parts following masters.
"""
graph = nx.DiGraph(graph) # make a copy
@@ -69,28 +98,52 @@ def topo_sort(graph):
class Dependencies(nx.DiGraph):
"""
- The graph of dependencies (foreign keys) between loaded tables.
+ Graph of foreign key dependencies between loaded tables.
+
+ Extends NetworkX DiGraph to track foreign key relationships and
+ support operations like cascade delete and topological ordering.
+
+ Parameters
+ ----------
+ connection : Connection, optional
+ Database connection. May be None to support NetworkX algorithms
+ that create objects with empty constructors.
+
+ Attributes
+ ----------
+ _conn : Connection or None
+ Database connection.
+ _loaded : bool
+ Whether dependencies have been loaded from the database.
- Note: the 'connection' argument should normally be supplied;
- Empty use is permitted to facilitate use of networkx algorithms which
- internally create objects with the expectation of empty constructors.
- See also: https://github.com/datajoint/datajoint-python/pull/443
+ Notes
+ -----
+ Empty constructor use is permitted to facilitate NetworkX algorithms.
+ See: https://github.com/datajoint/datajoint-python/pull/443
"""
- def __init__(self, connection=None):
+ def __init__(self, connection=None) -> None:
self._conn = connection
self._node_alias_count = itertools.count()
self._loaded = False
super().__init__(self)
- def clear(self):
+ def clear(self) -> None:
+ """Clear the graph and reset loaded state."""
self._loaded = False
super().clear()
- def load(self, force=True):
+ def load(self, force: bool = True) -> None:
"""
Load dependencies for all loaded schemas.
- This method gets called before any operation that requires dependencies: delete, drop, populate, progress.
+
+ Called before operations requiring dependencies: delete, drop,
+ populate, progress.
+
+ Parameters
+ ----------
+ force : bool, optional
+ If True (default), reload even if already loaded.
"""
# reload from scratch to prevent duplication of renamed edges
if self._loaded and not force:
@@ -165,45 +218,90 @@ def load(self, force=True):
raise DataJointError("DataJoint can only work with acyclic dependencies")
self._loaded = True
- def topo_sort(self):
- """:return: list of tables names in topological order"""
- return topo_sort(self)
+ def topo_sort(self) -> list[str]:
+ """
+ Return table names in topological order.
- def parents(self, table_name, primary=None):
+ Returns
+ -------
+ list[str]
+ Table names sorted topologically.
"""
- :param table_name: `schema`.`table`
- :param primary: if None, then all parents are returned. If True, then only foreign keys composed of
- primary key attributes are considered. If False, the only foreign keys including at least one non-primary
- attribute are considered.
- :return: dict of tables referenced by the foreign keys of table
+ return topo_sort(self)
+
+ def parents(self, table_name: str, primary: bool | None = None) -> dict:
+ r"""
+ Get tables referenced by this table's foreign keys.
+
+ Parameters
+ ----------
+ table_name : str
+ Full table name (```\`schema\`.\`table\```).
+ primary : bool, optional
+ If None, return all parents. If True, only FK composed entirely
+ of primary key attributes. If False, only FK with at least one
+ non-primary attribute.
+
+ Returns
+ -------
+ dict
+ Mapping of parent table name to edge properties.
"""
self.load(force=False)
return {p[0]: p[2] for p in self.in_edges(table_name, data=True) if primary is None or p[2]["primary"] == primary}
- def children(self, table_name, primary=None):
- """
- :param table_name: `schema`.`table`
- :param primary: if None, then all children are returned. If True, then only foreign keys composed of
- primary key attributes are considered. If False, the only foreign keys including at least one non-primary
- attribute are considered.
- :return: dict of tables referencing the table through foreign keys
+ def children(self, table_name: str, primary: bool | None = None) -> dict:
+ r"""
+ Get tables that reference this table through foreign keys.
+
+ Parameters
+ ----------
+ table_name : str
+ Full table name (```\`schema\`.\`table\```).
+ primary : bool, optional
+ If None, return all children. If True, only FK composed entirely
+ of primary key attributes. If False, only FK with at least one
+ non-primary attribute.
+
+ Returns
+ -------
+ dict
+ Mapping of child table name to edge properties.
"""
self.load(force=False)
return {p[1]: p[2] for p in self.out_edges(table_name, data=True) if primary is None or p[2]["primary"] == primary}
- def descendants(self, full_table_name):
- """
- :param full_table_name: In form `schema`.`table_name`
- :return: all dependent tables sorted in topological order. Self is included.
+ def descendants(self, full_table_name: str) -> list[str]:
+ r"""
+ Get all dependent tables in topological order.
+
+ Parameters
+ ----------
+ full_table_name : str
+ Full table name (```\`schema\`.\`table_name\```).
+
+ Returns
+ -------
+ list[str]
+ Dependent tables in topological order. Self is included first.
"""
self.load(force=False)
nodes = self.subgraph(nx.descendants(self, full_table_name))
return [full_table_name] + nodes.topo_sort()
- def ancestors(self, full_table_name):
- """
- :param full_table_name: In form `schema`.`table_name`
- :return: all dependent tables sorted in topological order. Self is included.
+ def ancestors(self, full_table_name: str) -> list[str]:
+ r"""
+ Get all ancestor tables in reverse topological order.
+
+ Parameters
+ ----------
+ full_table_name : str
+ Full table name (```\`schema\`.\`table_name\```).
+
+ Returns
+ -------
+ list[str]
+ Ancestor tables in reverse topological order. Self is included last.
"""
self.load(force=False)
nodes = self.subgraph(nx.ancestors(self, full_table_name))
diff --git a/src/datajoint/diagram.py b/src/datajoint/diagram.py
index 3b6061102..de211df8f 100644
--- a/src/datajoint/diagram.py
+++ b/src/datajoint/diagram.py
@@ -1,3 +1,12 @@
+"""
+Diagram visualization for DataJoint schemas.
+
+This module provides the Diagram class for visualizing schema structure
+as directed acyclic graphs showing tables and their foreign key relationships.
+"""
+
+from __future__ import annotations
+
import functools
import inspect
import io
@@ -32,44 +41,58 @@
class Diagram:
"""
- Entity relationship diagram, currently disabled due to the lack of required packages: matplotlib and pygraphviz.
+ Schema diagram (disabled).
+
+ Diagram visualization requires matplotlib and pygraphviz packages.
+ Install them to enable this feature.
- To enable Diagram feature, please install both matplotlib and pygraphviz. For instructions on how to install
- these two packages, refer to https://docs.datajoint.com/core/datajoint-python/0.14/client/install/
+ See Also
+ --------
+ https://docs.datajoint.com/core/datajoint-python/0.14/client/install/
"""
- def __init__(self, *args, **kwargs):
+ def __init__(self, *args, **kwargs) -> None:
logger.warning("Please install matplotlib and pygraphviz libraries to enable the Diagram feature.")
else:
class Diagram(nx.DiGraph):
"""
- Schema diagram showing tables and foreign keys between in the form of a directed
- acyclic graph (DAG). The diagram is derived from the connection.dependencies object.
-
- Usage:
-
- >>> diag = Diagram(source)
-
- source can be a table object, a table class, a schema, or a module that has a schema.
-
+ Schema diagram as a directed acyclic graph (DAG).
+
+ Visualizes tables and foreign key relationships derived from
+ ``connection.dependencies``.
+
+ Parameters
+ ----------
+ source : Table, Schema, or module
+ A table object, table class, schema, or module with a schema.
+ context : dict, optional
+ Namespace for resolving table class names. If None, uses caller's
+ frame globals/locals.
+
+ Examples
+ --------
+ >>> diag = dj.Diagram(schema.MyTable)
>>> diag.draw()
- draws the diagram using pyplot
+ Operators:
+
+ - ``diag1 + diag2`` - union of diagrams
+ - ``diag1 - diag2`` - difference of diagrams
+ - ``diag1 * diag2`` - intersection of diagrams
+ - ``diag + n`` - expand n levels of successors (children)
+ - ``diag - n`` - expand n levels of predecessors (parents)
- diag1 + diag2 - combines the two diagrams.
- diag1 - diag2 - difference between diagrams
- diag1 * diag2 - intersection of diagrams
- diag + n - expands n levels of successors
- diag - n - expands n levels of predecessors
- Thus dj.Diagram(schema.Table)+1-1 defines the diagram of immediate ancestors and descendants of schema.Table
+ >>> dj.Diagram(schema.Table) + 1 - 1 # immediate ancestors and descendants
- Note that diagram + 1 - 1 may differ from diagram - 1 + 1 and so forth.
- Only those tables that are loaded in the connection object are displayed
+ Notes
+ -----
+ ``diagram + 1 - 1`` may differ from ``diagram - 1 + 1``.
+ Only tables loaded in the connection are displayed.
"""
- def __init__(self, source, context=None):
+ def __init__(self, source, context=None) -> None:
if isinstance(source, Diagram):
# copy constructor
self.nodes_to_show = set(source.nodes_to_show)
@@ -115,27 +138,33 @@ def __init__(self, source, context=None):
self.nodes_to_show.add(node)
@classmethod
- def from_sequence(cls, sequence):
+ def from_sequence(cls, sequence) -> "Diagram":
"""
- The join Diagram for all objects in sequence
+ Create combined Diagram from a sequence of sources.
+
+ Parameters
+ ----------
+ sequence : iterable
+ Sequence of table objects, classes, or schemas.
- :param sequence: a sequence (e.g. list, tuple)
- :return: Diagram(arg1) + ... + Diagram(argn)
+ Returns
+ -------
+ Diagram
+ Union of diagrams: ``Diagram(arg1) + ... + Diagram(argn)``.
"""
return functools.reduce(lambda x, y: x + y, map(Diagram, sequence))
- def add_parts(self):
+ def add_parts(self) -> "Diagram":
"""
- Adds to the diagram the part tables of all master tables already in the diagram
- :return:
+ Add part tables of all masters already in the diagram.
+
+ Returns
+ -------
+ Diagram
+ New diagram with part tables included.
"""
def is_part(part, master):
- """
- :param part: `database`.`table_name`
- :param master: `database`.`table_name`
- :return: True if part is part of master.
- """
part = [s.strip("`") for s in part.split(".")]
master = [s.strip("`") for s in master.split(".")]
return master[0] == part[0] and master[1] + "__" == part[1][: len(master[1]) + 2]
@@ -144,11 +173,19 @@ def is_part(part, master):
self.nodes_to_show.update(n for n in self.nodes() if any(is_part(n, m) for m in self.nodes_to_show))
return self
- def __add__(self, arg):
+ def __add__(self, arg) -> "Diagram":
"""
- :param arg: either another Diagram or a positive integer.
- :return: Union of the diagrams when arg is another Diagram
- or an expansion downstream when arg is a positive integer.
+ Union or downstream expansion.
+
+ Parameters
+ ----------
+ arg : Diagram or int
+ Another Diagram for union, or positive int for downstream expansion.
+
+ Returns
+ -------
+ Diagram
+ Combined or expanded diagram.
"""
self = Diagram(self) # copy
try:
@@ -166,11 +203,19 @@ def __add__(self, arg):
self.nodes_to_show.update(new)
return self
- def __sub__(self, arg):
+ def __sub__(self, arg) -> "Diagram":
"""
- :param arg: either another Diagram or a positive integer.
- :return: Difference of the diagrams when arg is another Diagram or
- an expansion upstream when arg is a positive integer.
+ Difference or upstream expansion.
+
+ Parameters
+ ----------
+ arg : Diagram or int
+ Another Diagram for difference, or positive int for upstream expansion.
+
+ Returns
+ -------
+ Diagram
+ Reduced or expanded diagram.
"""
self = Diagram(self) # copy
try:
@@ -189,23 +234,43 @@ def __sub__(self, arg):
self.nodes_to_show.update(new)
return self
- def __mul__(self, arg):
+ def __mul__(self, arg) -> "Diagram":
"""
- Intersection of two diagrams
- :param arg: another Diagram
- :return: a new Diagram comprising nodes that are present in both operands.
+ Intersection of two diagrams.
+
+ Parameters
+ ----------
+ arg : Diagram
+ Another Diagram.
+
+ Returns
+ -------
+ Diagram
+ Diagram with nodes present in both operands.
"""
self = Diagram(self) # copy
self.nodes_to_show.intersection_update(arg.nodes_to_show)
return self
- def topo_sort(self):
- """return nodes in lexicographical topological order"""
+ def topo_sort(self) -> list[str]:
+ """
+ Return nodes in topological order.
+
+ Returns
+ -------
+ list[str]
+ Node names in topological order.
+ """
return topo_sort(self)
- def _make_graph(self):
+ def _make_graph(self) -> nx.DiGraph:
"""
- Make the self.graph - a graph object ready for drawing
+ Build graph object ready for drawing.
+
+ Returns
+ -------
+ nx.DiGraph
+ Graph with nodes relabeled to class names.
"""
# mark "distinguished" tables, i.e. those that introduce new primary key
# attributes
@@ -233,13 +298,14 @@ def _make_graph(self):
return graph
@staticmethod
- def _encapsulate_edge_attributes(graph):
+ def _encapsulate_edge_attributes(graph: nx.DiGraph) -> None:
"""
- Modifies the `nx.Graph`'s edge attribute `attr_map` to be a string representation
- of the attribute map, and encapsulates the string in double quotes.
- Changes the graph in place.
+ Encapsulate edge attr_map in double quotes for pydot compatibility.
+
+ Modifies graph in place.
- Implements workaround described in
+ See Also
+ --------
https://github.com/pydot/pydot/issues/258#issuecomment-795798099
"""
for u, v, *_, edgedata in graph.edges(data=True):
@@ -247,13 +313,14 @@ def _encapsulate_edge_attributes(graph):
graph.edges[u, v]["attr_map"] = '"{0}"'.format(edgedata["attr_map"])
@staticmethod
- def _encapsulate_node_names(graph):
+ def _encapsulate_node_names(graph: nx.DiGraph) -> None:
"""
- Modifies the `nx.Graph`'s node names string representations encapsulated in
- double quotes.
- Changes the graph in place.
+ Encapsulate node names in double quotes for pydot compatibility.
- Implements workaround described in
+ Modifies graph in place.
+
+ See Also
+ --------
https://github.com/datajoint/datajoint-python/pull/1176
"""
nx.relabel_nodes(
@@ -396,7 +463,22 @@ def draw(self):
else:
raise DataJointError("pyplot was not imported")
- def save(self, filename, format=None):
+ def save(self, filename: str, format: str | None = None) -> None:
+ """
+ Save diagram to file.
+
+ Parameters
+ ----------
+ filename : str
+ Output filename.
+ format : str, optional
+ File format (``'png'`` or ``'svg'``). Inferred from extension if None.
+
+ Raises
+ ------
+ DataJointError
+ If format is unsupported.
+ """
if format is None:
if filename.lower().endswith(".png"):
format = "png"
diff --git a/src/datajoint/errors.py b/src/datajoint/errors.py
index aadc74caf..d2a789692 100644
--- a/src/datajoint/errors.py
+++ b/src/datajoint/errors.py
@@ -1,87 +1,74 @@
"""
-Exception classes for the DataJoint library
+Exception classes for the DataJoint library.
+
+This module defines the exception hierarchy for DataJoint errors.
"""
+from __future__ import annotations
+
# --- Top Level ---
class DataJointError(Exception):
- """
- Base class for errors specific to DataJoint internal operation.
- """
+ """Base class for errors specific to DataJoint internal operation."""
- def suggest(self, *args):
+ def suggest(self, *args) -> "DataJointError":
"""
- regenerate the exception with additional arguments
+ Regenerate the exception with additional arguments.
+
+ Parameters
+ ----------
+ *args : any
+ Additional arguments to append to the exception.
- :param args: addition arguments
- :return: a new exception of the same type with the additional arguments
+ Returns
+ -------
+ DataJointError
+ A new exception of the same type with the additional arguments.
"""
return self.__class__(*(self.args + args))
# --- Second Level ---
class LostConnectionError(DataJointError):
- """
- Loss of server connection
- """
+ """Loss of server connection."""
class QueryError(DataJointError):
- """
- Errors arising from queries to the database
- """
+ """Errors arising from queries to the database."""
# --- Third Level: QueryErrors ---
class QuerySyntaxError(QueryError):
- """
- Errors arising from incorrect query syntax
- """
+ """Errors arising from incorrect query syntax."""
class AccessError(QueryError):
- """
- User access error: insufficient privileges.
- """
+ """User access error: insufficient privileges."""
class MissingTableError(DataJointError):
- """
- Query on a table that has not been declared
- """
+ """Query on a table that has not been declared."""
class DuplicateError(QueryError):
- """
- An integrity error caused by a duplicate entry into a unique key
- """
+ """Integrity error caused by a duplicate entry into a unique key."""
class IntegrityError(QueryError):
- """
- An integrity error triggered by foreign key constraints
- """
+ """Integrity error triggered by foreign key constraints."""
class UnknownAttributeError(QueryError):
- """
- User requests an attribute name not found in query heading
- """
+ """User requests an attribute name not found in query heading."""
class MissingAttributeError(QueryError):
- """
- An error arising when a required attribute value is not provided in INSERT
- """
+ """Required attribute value not provided in INSERT."""
class MissingExternalFile(DataJointError):
- """
- Error raised when an external file managed by DataJoint is no longer accessible
- """
+ """External file managed by DataJoint is no longer accessible."""
class BucketInaccessible(DataJointError):
- """
- Error raised when a S3 bucket is inaccessible
- """
+ """S3 bucket is inaccessible."""
diff --git a/src/datajoint/gc.py b/src/datajoint/gc.py
index 1ab08789e..33ede63d2 100644
--- a/src/datajoint/gc.py
+++ b/src/datajoint/gc.py
@@ -46,11 +46,15 @@ def _uses_content_storage(attr) -> bool:
- (chains to )
- (chains to )
- Args:
- attr: Attribute from table heading
-
- Returns:
- True if the attribute stores content hashes
+ Parameters
+ ----------
+ attr : Attribute
+ Attribute from table heading.
+
+ Returns
+ -------
+ bool
+ True if the attribute stores content hashes.
"""
if not attr.codec:
return False
@@ -74,11 +78,15 @@ def _uses_object_storage(attr) -> bool:
"""
Check if an attribute uses path-addressed object storage.
- Args:
- attr: Attribute from table heading
+ Parameters
+ ----------
+ attr : Attribute
+ Attribute from table heading.
- Returns:
- True if the attribute stores object paths
+ Returns
+ -------
+ bool
+ True if the attribute stores object paths.
"""
if not attr.codec:
return False
@@ -91,11 +99,15 @@ def _extract_content_refs(value: Any) -> list[tuple[str, str | None]]:
"""
Extract content references from a stored value.
- Args:
- value: The stored value (could be JSON string or dict)
+ Parameters
+ ----------
+ value : Any
+ The stored value (could be JSON string or dict).
- Returns:
- List of (content_hash, store_name) tuples
+ Returns
+ -------
+ list[tuple[str, str | None]]
+ List of (content_hash, store_name) tuples.
"""
refs = []
@@ -120,11 +132,15 @@ def _extract_object_refs(value: Any) -> list[tuple[str, str | None]]:
"""
Extract object path references from a stored value.
- Args:
- value: The stored value (could be JSON string or dict)
+ Parameters
+ ----------
+ value : Any
+ The stored value (could be JSON string or dict).
- Returns:
- List of (path, store_name) tuples
+ Returns
+ -------
+ list[tuple[str, str | None]]
+ List of (path, store_name) tuples.
"""
refs = []
@@ -156,13 +172,19 @@ def scan_references(
Examines all tables in the given schemas and extracts content hashes
from columns that use content-addressed storage (, , ).
- Args:
- *schemas: Schema instances to scan
- store_name: Only include references to this store (None = all stores)
- verbose: Print progress information
-
- Returns:
- Set of content hashes that are referenced
+ Parameters
+ ----------
+ *schemas : Schema
+ Schema instances to scan.
+ store_name : str, optional
+ Only include references to this store (None = all stores).
+ verbose : bool, optional
+ Print progress information.
+
+ Returns
+ -------
+ set[str]
+ Set of content hashes that are referenced.
"""
referenced: set[str] = set()
@@ -213,13 +235,19 @@ def scan_object_references(
Examines all tables in the given schemas and extracts object paths
from columns that use path-addressed storage (