942 lines
41 KiB
HTML
942 lines
41 KiB
HTML
<!DOCTYPE html>
|
|
<html><head>
|
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
|
<meta http-equiv="content-type" content="text/html; charset=UTF-8">
|
|
<link href="sqlite.css" rel="stylesheet">
|
|
<title>The SQLite R*Tree Module</title>
|
|
<!-- path= -->
|
|
</head>
|
|
<body>
|
|
<div class=nosearch>
|
|
<a href="index.html">
|
|
<img class="logo" src="images/sqlite370_banner.gif" alt="SQLite" border="0">
|
|
</a>
|
|
<div><!-- IE hack to prevent disappearing logo --></div>
|
|
<div class="tagline desktoponly">
|
|
Small. Fast. Reliable.<br>Choose any three.
|
|
</div>
|
|
<div class="menu mainmenu">
|
|
<ul>
|
|
<li><a href="index.html">Home</a>
|
|
<li class='mobileonly'><a href="javascript:void(0)" onclick='toggle_div("submenu")'>Menu</a>
|
|
<li class='wideonly'><a href='about.html'>About</a>
|
|
<li class='desktoponly'><a href="docs.html">Documentation</a>
|
|
<li class='desktoponly'><a href="download.html">Download</a>
|
|
<li class='wideonly'><a href='copyright.html'>License</a>
|
|
<li class='desktoponly'><a href="support.html">Support</a>
|
|
<li class='desktoponly'><a href="prosupport.html">Purchase</a>
|
|
<li class='search' id='search_menubutton'>
|
|
<a href="javascript:void(0)" onclick='toggle_search()'>Search</a>
|
|
</ul>
|
|
</div>
|
|
<div class="menu submenu" id="submenu">
|
|
<ul>
|
|
<li><a href='about.html'>About</a>
|
|
<li><a href='docs.html'>Documentation</a>
|
|
<li><a href='download.html'>Download</a>
|
|
<li><a href='support.html'>Support</a>
|
|
<li><a href='prosupport.html'>Purchase</a>
|
|
</ul>
|
|
</div>
|
|
<div class="searchmenu" id="searchmenu">
|
|
<form method="GET" action="search">
|
|
<select name="s" id="searchtype">
|
|
<option value="d">Search Documentation</option>
|
|
<option value="c">Search Changelog</option>
|
|
</select>
|
|
<input type="text" name="q" id="searchbox" value="">
|
|
<input type="submit" value="Go">
|
|
</form>
|
|
</div>
|
|
</div>
|
|
<script>
|
|
function toggle_div(nm) {
|
|
var w = document.getElementById(nm);
|
|
if( w.style.display=="block" ){
|
|
w.style.display = "none";
|
|
}else{
|
|
w.style.display = "block";
|
|
}
|
|
}
|
|
function toggle_search() {
|
|
var w = document.getElementById("searchmenu");
|
|
if( w.style.display=="block" ){
|
|
w.style.display = "none";
|
|
} else {
|
|
w.style.display = "block";
|
|
setTimeout(function(){
|
|
document.getElementById("searchbox").focus()
|
|
}, 30);
|
|
}
|
|
}
|
|
function div_off(nm){document.getElementById(nm).style.display="none";}
|
|
window.onbeforeunload = function(e){div_off("submenu");}
|
|
/* Disable the Search feature if we are not operating from CGI, since */
|
|
/* Search is accomplished using CGI and will not work without it. */
|
|
if( !location.origin || !location.origin.match || !location.origin.match(/http/) ){
|
|
document.getElementById("search_menubutton").style.display = "none";
|
|
}
|
|
/* Used by the Hide/Show button beside syntax diagrams, to toggle the */
|
|
function hideorshow(btn,obj){
|
|
var x = document.getElementById(obj);
|
|
var b = document.getElementById(btn);
|
|
if( x.style.display!='none' ){
|
|
x.style.display = 'none';
|
|
b.innerHTML='show';
|
|
}else{
|
|
x.style.display = '';
|
|
b.innerHTML='hide';
|
|
}
|
|
return false;
|
|
}
|
|
var antiRobot = 0;
|
|
function antiRobotGo(){
|
|
if( antiRobot!=3 ) return;
|
|
antiRobot = 7;
|
|
var j = document.getElementById("mtimelink");
|
|
if(j && j.hasAttribute("data-href")) j.href=j.getAttribute("data-href");
|
|
}
|
|
function antiRobotDefense(){
|
|
document.body.onmousedown=function(){
|
|
antiRobot |= 2;
|
|
antiRobotGo();
|
|
document.body.onmousedown=null;
|
|
}
|
|
document.body.onmousemove=function(){
|
|
antiRobot |= 2;
|
|
antiRobotGo();
|
|
document.body.onmousemove=null;
|
|
}
|
|
setTimeout(function(){
|
|
antiRobot |= 1;
|
|
antiRobotGo();
|
|
}, 100)
|
|
antiRobotGo();
|
|
}
|
|
antiRobotDefense();
|
|
</script>
|
|
<div class=fancy>
|
|
<div class=nosearch>
|
|
<div class="fancy_title">
|
|
The SQLite R*Tree Module
|
|
</div>
|
|
<div class="fancy_toc">
|
|
<a onclick="toggle_toc()">
|
|
<span class="fancy_toc_mark" id="toc_mk">►</span>
|
|
Table Of Contents
|
|
</a>
|
|
<div id="toc_sub"><div class="fancy-toc1"><a href="#overview">1. Overview</a></div>
|
|
<div class="fancy-toc1"><a href="#compiling_the_r_tree_module">2. Compiling The R*Tree Module</a></div>
|
|
<div class="fancy-toc1"><a href="#using_the_r_tree_module">3. Using the R*Tree Module</a></div>
|
|
<div class="fancy-toc2"><a href="#creating_an_r_tree_index">3.1. Creating An R*Tree Index</a></div>
|
|
<div class="fancy-toc3"><a href="#column_naming_details">3.1.1. Column naming details</a></div>
|
|
<div class="fancy-toc2"><a href="#populating_an_r_tree_index">3.2. Populating An R*Tree Index</a></div>
|
|
<div class="fancy-toc2"><a href="#querying_an_r_tree_index">3.3. Querying An R*Tree Index</a></div>
|
|
<div class="fancy-toc2"><a href="#roundoff_error">3.4. Roundoff Error</a></div>
|
|
<div class="fancy-toc2"><a href="#reading_and_writing_at_the_same_time">3.5. Reading And Writing At The Same Time</a></div>
|
|
<div class="fancy-toc1"><a href="#using_r_trees_effectively">4. Using R*Trees Effectively</a></div>
|
|
<div class="fancy-toc2"><a href="#auxiliary_columns">4.1. Auxiliary Columns</a></div>
|
|
<div class="fancy-toc3"><a href="#limitations">4.1.1. Limitations</a></div>
|
|
<div class="fancy-toc1"><a href="#integer_valued_r_trees">5. Integer-Valued R-Trees</a></div>
|
|
<div class="fancy-toc1"><a href="#custom_r_tree_queries">6. Custom R-Tree Queries</a></div>
|
|
<div class="fancy-toc2"><a href="#the_legacy_xgeom_callback">6.1. The Legacy xGeom Callback</a></div>
|
|
<div class="fancy-toc2"><a href="#the_new_xqueryfunc_callback">6.2. The New xQueryFunc Callback</a></div>
|
|
<div class="fancy-toc2"><a href="#additional_considerations_for_custom_queries">6.3. Additional Considerations for Custom Queries</a></div>
|
|
<div class="fancy-toc1"><a href="#implementation_details">7. Implementation Details</a></div>
|
|
<div class="fancy-toc2"><a href="#shadow_tables">7.1. Shadow Tables</a></div>
|
|
<div class="fancy-toc2"><a href="#integrity_check_using_the_rtreecheck_sql_function">7.2. Integrity Check using the rtreecheck() SQL function</a></div>
|
|
</div>
|
|
</div>
|
|
<script>
|
|
function toggle_toc(){
|
|
var sub = document.getElementById("toc_sub")
|
|
var mk = document.getElementById("toc_mk")
|
|
if( sub.style.display!="block" ){
|
|
sub.style.display = "block";
|
|
mk.innerHTML = "▼";
|
|
} else {
|
|
sub.style.display = "none";
|
|
mk.innerHTML = "►";
|
|
}
|
|
}
|
|
</script>
|
|
</div>
|
|
|
|
|
|
|
|
|
|
<h1 id="overview"><span>1. </span>Overview</h1>
|
|
|
|
<p>
|
|
An <a href="http://en.wikipedia.org/wiki/R-tree">R-Tree</a> is a special
|
|
index that is designed for doing range queries. R-Trees are most commonly
|
|
used in geospatial systems where each entry is a rectangle with minimum and
|
|
maximum X and Y coordinates. Given a query rectangle, an R-Tree is able
|
|
to quickly find all entries that are contained within the query rectangle
|
|
or which overlap the query rectangle. This idea is easily extended to
|
|
three dimensions for use in CAD systems. R-Trees also find use in time-domain
|
|
range look-ups. For example, suppose a database records the starting and
|
|
ending times for a large number of events. A R-Tree is able to quickly
|
|
find all events that were active at any time during a given
|
|
time interval, or all events that started during a particular time interval,
|
|
or all events that both started and ended within a given time interval.
|
|
And so forth.
|
|
</p>
|
|
|
|
<p>
|
|
The R-Tree concept originated with
|
|
<a href="https://link.springer.com/referenceworkentry/10.1007/978-0-387-35973-1_1151">Toni Guttman</a>:
|
|
<em>R-Trees: A Dynamic Index Structure for Spatial Searching</em>,
|
|
Proc. 1984 ACM SIGMOD International Conference on Management of Data,
|
|
pp. 47-57.
|
|
The implementation found in SQLite is a refinement of Guttman's original
|
|
idea, commonly called "R*Trees", that was described by
|
|
Norbert Beckmann, Hans-Peter Kriegel, Ralf Schneider, Bernhard Seeger:
|
|
<em>The R*-Tree: An Efficient and Robust Access Method for Points
|
|
and Rectangles.</em> SIGMOD Conference 1990: 322-331.
|
|
</p>
|
|
|
|
<h1 id="compiling_the_r_tree_module"><span>2. </span>Compiling The R*Tree Module</h1>
|
|
|
|
<p>
|
|
The source code to the SQLite R*Tree module is included as part
|
|
of the <a href="amalgamation.html">amalgamation</a>. However, depending on configuration options
|
|
and the particular version of SQLite you are using, it may or may not
|
|
be enabled by default. To ensure that the R*Tree module is enabled,
|
|
simply compile with the <a href="compile.html#enable_rtree">SQLITE_ENABLE_RTREE</a>
|
|
C-preprocessor macro defined. With many compilers, this is accomplished
|
|
by adding the option "-DSQLITE_ENABLE_RTREE=1" to the compiler
|
|
command-line.
|
|
</p>
|
|
|
|
<h1 id="using_the_r_tree_module"><span>3. </span>Using the R*Tree Module</h1>
|
|
|
|
<p>
|
|
The SQLite R*Tree module is implemented as a
|
|
<a href="c3ref/create_module.html">virtual table</a>. Each R*Tree index is a
|
|
virtual table with an odd number of columns between 3 and 11.
|
|
The first column is always a 64-bit signed integer primary key.
|
|
The other columns are pairs, one pair per dimension, containing the
|
|
minimum and maximum values for that dimension, respectively.
|
|
A 1-dimensional R*Tree thus has 3 columns.
|
|
A 2-dimensional R*Tree has 5 columns.
|
|
A 3-dimensional R*Tree has 7 columns.
|
|
A 4-dimensional R*Tree has 9 columns.
|
|
And a 5-dimensional R*Tree has 11 columns. The SQLite R*Tree implementation
|
|
does not support R*Trees wider than 5 dimensions.
|
|
</p>
|
|
|
|
<p>
|
|
The first column of an SQLite R*Tree is similar to an integer primary
|
|
key column of a normal SQLite table. It may only store a 64-bit signed
|
|
integer value. Inserting a NULL value into this column causes SQLite
|
|
to automatically generate a new unique primary key value. If an attempt
|
|
is made to insert any other non-integer value into this column,
|
|
the r-tree module silently converts it to an integer before writing it
|
|
into the database.
|
|
</p><p>
|
|
The min/max-value pair columns are stored as 32-bit floating point values for
|
|
"rtree" virtual tables or as 32-bit signed integers in "rtree_i32" virtual
|
|
tables. Unlike regular SQLite tables which can store data in a variety of
|
|
datatypes and formats, the R*Tree rigidly enforce these storage types.
|
|
If any other type of value is inserted into such a column, the r-tree
|
|
module silently converts it to the required type before writing the
|
|
new record to the database.
|
|
|
|
</p><h2 id="creating_an_r_tree_index"><span>3.1. </span>Creating An R*Tree Index</h2>
|
|
|
|
<p>
|
|
A new R*Tree index is created as follows:
|
|
</p>
|
|
|
|
<div class="codeblock"><pre>CREATE VIRTUAL TABLE <em><name></em> USING rtree(<em><column-names></em>);
|
|
</pre></div>
|
|
|
|
<p>
|
|
The <em><name></em> is the name your application chooses for the
|
|
R*Tree index and <em><column-names></em> is a comma separated list
|
|
of between 3 and 11 columns.
|
|
The virtual <name> table creates three <a href="vtab.html#xshadowname">shadow tables</a> to actually
|
|
store its content. The names of these shadow tables are:
|
|
</p>
|
|
|
|
<div class="codeblock"><pre><em><name></em><strong>_node</strong><br>
|
|
<em><name></em><strong>_rowid</strong><br>
|
|
<em><name></em><strong>_parent</strong>
|
|
</pre></div>
|
|
|
|
|
|
<p>
|
|
The shadow tables are ordinary SQLite data tables. You can query them
|
|
directly if you like, though this unlikely to reveal anything particularly
|
|
useful.
|
|
And you can <a href="lang_update.html">UPDATE</a>, <a href="lang_delete.html">DELETE</a>, <a href="lang_insert.html">INSERT</a> or even <a href="lang_droptable.html">DROP</a>
|
|
the shadow tables, though doing so will corrupt your R*Tree index.
|
|
So it is best to simply ignore the shadow tables. Recognize that they
|
|
hold your R*Tree index information and let it go as that.
|
|
</p>
|
|
|
|
<p>
|
|
As an example, consider creating a two-dimensional R*Tree index for use in
|
|
spatial queries:
|
|
</p>
|
|
|
|
<div class="codeblock"><pre>CREATE VIRTUAL TABLE demo_index USING rtree(
|
|
id, -- Integer primary key
|
|
minX, maxX, -- Minimum and maximum X coordinate
|
|
minY, maxY -- Minimum and maximum Y coordinate
|
|
);
|
|
</pre></div>
|
|
|
|
|
|
<h3 id="column_naming_details"><span>3.1.1. </span>Column naming details</h3>
|
|
|
|
<p>
|
|
In the argments to "rtree" in the CREATE VIRTUAL TABLE statement, the
|
|
names of the columns are taken from the first token of each argument.
|
|
All subsequent tokens within each argument are silently ignored.
|
|
This means, for example, that if you try to give a column a
|
|
<a href="datatype3.html#affinity">type affinity</a> or add a constraint such as UNIQUE or NOT NULL or DEFAULT to
|
|
a column, those extra tokens are accepted as valid, but they do not change
|
|
the behavior of the rtree.
|
|
In an RTREE virtual table, the first column always has a
|
|
<a href="datatype3.html#affinity">type affinity</a> of INTEGER and all other data columns have a
|
|
<a href="datatype3.html#affinity">type affinity</a> of REAL.
|
|
In an RTREE_I32 virtual table, all columns have type affinity of INTEGER.
|
|
|
|
</p><p>
|
|
Recommended practice is to omit any extra tokens in the rtree specification.
|
|
Let each argument to "rtree" be a single ordinary label that is the name of
|
|
the corresponding column, and omit all other tokens from the argument list.
|
|
|
|
</p><h2 id="populating_an_r_tree_index"><span>3.2. </span>Populating An R*Tree Index</h2>
|
|
|
|
<p>
|
|
The usual <a href="lang_insert.html">INSERT</a>, <a href="lang_update.html">UPDATE</a>, and <a href="lang_delete.html">DELETE</a> commands work on an R*Tree
|
|
index just like on regular tables. So to insert some data into our sample
|
|
R*Tree index, we can do something like this:
|
|
</p>
|
|
|
|
<div class="codeblock"><pre>INSERT INTO demo_index VALUES
|
|
(28215, -80.781227, -80.604706, 35.208813, 35.297367),
|
|
(28216, -80.957283, -80.840599, 35.235920, 35.367825),
|
|
(28217, -80.960869, -80.869431, 35.133682, 35.208233),
|
|
(28226, -80.878983, -80.778275, 35.060287, 35.154446),
|
|
(28227, -80.745544, -80.555382, 35.130215, 35.236916),
|
|
(28244, -80.844208, -80.841988, 35.223728, 35.225471),
|
|
(28262, -80.809074, -80.682938, 35.276207, 35.377747),
|
|
(28269, -80.851471, -80.735718, 35.272560, 35.407925),
|
|
(28270, -80.794983, -80.728966, 35.059872, 35.161823),
|
|
(28273, -80.994766, -80.875259, 35.074734, 35.172836),
|
|
(28277, -80.876793, -80.767586, 35.001709, 35.101063),
|
|
(28278, -81.058029, -80.956375, 35.044701, 35.223812),
|
|
(28280, -80.844208, -80.841972, 35.225468, 35.227203),
|
|
(28282, -80.846382, -80.844193, 35.223972, 35.225655);
|
|
</pre></div>
|
|
|
|
|
|
<p>
|
|
The entries above are bounding boxes (longitude and latitude) for 14
|
|
zipcodes near Charlotte, NC. A real database would have many thousands,
|
|
millions, or billions of such entries, but this small 14-row sample will
|
|
be sufficient to illustrate the ideas.
|
|
</p>
|
|
|
|
<h2 id="querying_an_r_tree_index"><span>3.3. </span>Querying An R*Tree Index</h2>
|
|
|
|
<p>
|
|
Any valid query will work against an R*Tree index. The R*Tree
|
|
implementation just makes some kinds of queries especially
|
|
efficient. Queries against the primary key are efficient:
|
|
</p>
|
|
|
|
<div class="codeblock"><pre>SELECT * FROM demo_index WHERE id=28269;
|
|
</pre></div>
|
|
|
|
|
|
<p>
|
|
Of course, an ordinary SQLite table will also do a query against its
|
|
integer primary key efficiently, so the previous is not important.
|
|
The big reason for using an R*Tree is so that
|
|
you can efficiently do range queries against the coordinate
|
|
ranges. For example, the main office of the SQLite project is
|
|
located at 35.37785, -80.77470.
|
|
To find which zipcodes might service that office, one could right:
|
|
</p>
|
|
|
|
<div class="codeblock"><pre>SELECT id FROM demo_index
|
|
WHERE minX<=-80.77470 AND maxX>=-80.77470
|
|
AND minY<=35.37785 AND maxY>=35.37785;
|
|
</pre></div>
|
|
|
|
|
|
<p>
|
|
The query above will quickly locate all zipcodes that contain
|
|
the SQLite main office in their bounding box, even if the
|
|
R*Tree contains many entries. The previous is an example
|
|
of a "contained-within" query. The R*Tree also supports "overlapping"
|
|
queries. For example, to find all zipcode bounding boxes that overlap
|
|
with the 28269 zipcode:
|
|
</p>
|
|
|
|
<div class="codeblock"><pre>SELECT A.id FROM demo_index AS A, demo_index AS B
|
|
WHERE A.maxX>=B.minX AND A.minX<=B.maxX
|
|
AND A.maxY>=B.minY AND A.minY<=B.maxY
|
|
AND B.id=28269;
|
|
</pre></div>
|
|
|
|
|
|
<p>
|
|
This second query will find both 28269 entry (since every bounding box
|
|
overlaps with itself) and also other zipcode that is close enough to
|
|
28269 that their bounding boxes overlap.
|
|
</p>
|
|
|
|
<p>
|
|
Note that it is not necessary for all coordinates in an R*Tree index
|
|
to be constrained in order for the index search to be efficient.
|
|
One might, for example, want to query all objects that overlap with
|
|
the 35th parallel:
|
|
</p>
|
|
|
|
<div class="codeblock"><pre>SELECT id FROM demo_index
|
|
WHERE maxY>=35.0 AND minY<=35.0;
|
|
</pre></div>
|
|
|
|
|
|
<p>
|
|
But, generally speaking, the more constraints that the R*Tree module
|
|
has to work with, and the smaller the bounding box, the faster the
|
|
results will come back.
|
|
</p>
|
|
|
|
<h2 id="roundoff_error"><span>3.4. </span>Roundoff Error</h2>
|
|
|
|
<p>
|
|
By default, coordinates are stored in an R*Tree using 32-bit floating
|
|
point values. When a coordinate cannot be exactly represented by a
|
|
32-bit floating point number, the lower-bound coordinates are rounded down
|
|
and the upper-bound coordinates are rounded up. Thus, bounding boxes might
|
|
be slightly larger than specified, but will never be any smaller. This
|
|
is exactly what is desired for doing the more common "overlapping" queries
|
|
where the application wants to find every entry in the R*Tree that overlaps
|
|
a query bounding box. Rounding the entry bounding boxes outward might cause a
|
|
few extra entries to appears in an overlapping query if the edge of the
|
|
entry bounding box corresponds to an edge of the query bounding box. But
|
|
the overlapping query will never miss a valid table entry.
|
|
|
|
</p><p>However, for a "contained-within" style query, rounding the bounding
|
|
boxes outward might cause some entries to be excluded from the result set
|
|
if the edge of the entry bounding box corresponds to the edge of the query
|
|
bounding box. To guard against this, applications should expand their
|
|
contained-within query boxes slightly (by 0.000012%) by rounding down the
|
|
lower coordinates and rounding up the top coordinates, in each dimension.
|
|
|
|
<a name="readwrite"></a>
|
|
|
|
</p><h2 id="reading_and_writing_at_the_same_time"><span>3.5. </span>Reading And Writing At The Same Time</h2>
|
|
|
|
<p>
|
|
It is the nature of the Guttman R-Tree algorithm that any write might
|
|
radically restructure the tree, and in the process change the scan order
|
|
of the nodes. For this reason, it is not generally possible to modify
|
|
the R-Tree in the middle of a query of the R-Tree. Attempts to do so
|
|
will fail with a <a href="rescode.html#locked">SQLITE_LOCKED</a> "database table is locked" error.
|
|
|
|
</p><p>
|
|
So, for example, suppose an application runs one query against an R-Tree like
|
|
this:
|
|
|
|
</p><div class="codeblock"><pre>SELECT id FROM demo_index
|
|
WHERE maxY>=35.0 AND minY<=35.0;
|
|
</pre></div>
|
|
|
|
<p>
|
|
Then for each "id" value returned, suppose the application creates an
|
|
UPDATE statement like the following and binds the "id" value returned against
|
|
the "?1" parameter:
|
|
|
|
</p><div class="codeblock"><pre>UPDATE demo_index SET maxY=maxY+0.5 WHERE id=?1;
|
|
</pre></div>
|
|
|
|
<p>
|
|
Then the UPDATE might fail with an SQLITE_LOCKED error. The reason is that
|
|
the initial query has not run to completion. It is remembering its place
|
|
in the middle of a scan of the R-Tree. So an update to the R-Tree cannot
|
|
be tolerated as this would disrupt the scan.
|
|
|
|
</p><p>
|
|
This is a limitation of the R-Tree extension only. Ordinary tables in
|
|
SQLite are able to read and write at the same time. Other virtual tables
|
|
might (or might not) also that capability. And R-Tree can appear to read
|
|
and write at the same time in some circumstances, if it can figure out how
|
|
to reliably run the query to completion before starting the update. But
|
|
you shouldn't count on that for every query. Generally speaking, it is
|
|
best to avoid running queries and updates to the same R-Tree at the same
|
|
time.
|
|
|
|
</p><p>
|
|
If you really need to update an R-Tree based on complex queries against
|
|
the same R-Tree, it is best to run the complex queries first and store
|
|
the results in a temporary table, then update the R-Tree based on the values
|
|
stored in the temporary table.
|
|
|
|
</p><h1 id="using_r_trees_effectively"><span>4. </span>Using R*Trees Effectively</h1>
|
|
|
|
<p>
|
|
For SQLite versions prior to 3.24.0 (2018-06-04),
|
|
the only information that an R*Tree index stores about an object is
|
|
its integer ID and its bounding box. Additional information needs to
|
|
be stored in separate tables and related to the R*Tree index using
|
|
the primary key. For the example above, one might create an auxiliary
|
|
table as follows:
|
|
</p>
|
|
|
|
<div class="codeblock"><pre>CREATE TABLE demo_data(
|
|
id INTEGER PRIMARY KEY, -- primary key
|
|
objname TEXT, -- name of the object
|
|
objtype TEXT, -- object type
|
|
boundary BLOB -- detailed boundary of object
|
|
);
|
|
</pre></div>
|
|
|
|
<p>
|
|
In this example, the demo_data.boundary field is intended to hold some
|
|
kind of binary representation of the precise boundaries of the object.
|
|
The R*Tree index only holds an axis-aligned rectangular boundary for the
|
|
object. The R*Tree boundary is just an approximation of the true object
|
|
boundary. So what typically happens is that the R*Tree index is used to
|
|
narrow a search down to a list of candidate objects and then more detailed
|
|
and expensive computations are done on each candidate to find if the
|
|
candidate truly meets the search criteria.
|
|
</p>
|
|
|
|
<blockquote><p>
|
|
<strong>Key Point:</strong>
|
|
An R*Tree index does not normally provide the exact answer but merely
|
|
reduces the set of potential answers from millions to dozens.
|
|
</p></blockquote>
|
|
|
|
<p>
|
|
Suppose the demo_data.boundary field holds some proprietary data description
|
|
of a complex two-dimensional boundary for a zipcode and suppose that the
|
|
application has used the <a href="c3ref/create_function.html">sqlite3_create_function()</a> interface to
|
|
created an application-defined function "contained_in(boundary,lat,long)"
|
|
that accepts the demo_data.boundary object and a latitute and longitude
|
|
and returns return true or false if the lat/long is contained within
|
|
the boundary.
|
|
One may assume that "contained_in()" is a relatively slow
|
|
functions that we do not want to invoke too frequently.
|
|
Then an efficient way to find the specific ZIP code for the main
|
|
SQLite office would be to run a query like this:
|
|
</p>
|
|
|
|
<a name="diquery"></a>
|
|
<div class="codeblock"><pre>SELECT objname FROM demo_data, demo_index
|
|
WHERE demo_data.id=demo_index.id
|
|
AND contained_in(demo_data.boundary, 35.37785, -80.77470)
|
|
AND minX<=-80.77470 AND maxX>=-80.77470
|
|
AND minY<=35.37785 AND maxY>=35.37785;
|
|
</pre></div>
|
|
|
|
<p>Notice how the query above works: The R*Tree index runs in the outer
|
|
loop to find entries that contain the SQLite main office in their
|
|
boundary box.
|
|
For each row found, SQLite looks up
|
|
the corresponding entry in the demo_data table. It then uses the boundary
|
|
field from the demo_data table as a parameter to the contained_in()
|
|
function and if that function returns true, then we know the sought after
|
|
coordinate is in that ZIP code boundary.</p>
|
|
|
|
<p>One would get the same answer without the use of the R*Tree index
|
|
using the following simpler query:</p>
|
|
|
|
<div class="codeblock"><pre>SELECT objname FROM demo_data
|
|
WHERE contained_in(demo_data.boundary, 35.37785, -80.77470);
|
|
</pre></div>
|
|
|
|
<p>The problem with this latter query is that it must apply the
|
|
contained_in() function to all entries in the demo_data table.
|
|
The use of the R*Tree in the penultimate query reduces the number of
|
|
calls to contained_in() function to a small subset of the entire table.
|
|
The R*Tree index did not find the exact answer itself, it merely
|
|
limited the search space.</p>
|
|
|
|
<a name="auxcol"></a>
|
|
|
|
<h2 id="auxiliary_columns"><span>4.1. </span>Auxiliary Columns</h2>
|
|
|
|
<p>
|
|
Beginning with SQLite version 3.24.0 (2018-06-04), r-tree tables
|
|
can have auxiliary columns that store arbitrary data.
|
|
Auxiliary columns can be used in place of
|
|
secondary tables such as "demo_data".
|
|
|
|
</p><p>
|
|
Auxiliary columns are marked with a "+" symbol before the column name.
|
|
Auxiliary columns must come after all of the coordinate boundary columns.
|
|
An RTREE table can have no more than 100 columns total. In other words,
|
|
the count of columns including the integer primary key column,
|
|
the coordinate boundary columns, and all auxiliary columns must be 100 or less.
|
|
The following example shows an r-tree table with auxiliary columns that
|
|
is equivalent to the two tables "demo_index" and "demo_data" above:
|
|
|
|
</p><div class="codeblock"><pre>CREATE VIRTUAL TABLE demo_index2 USING rtree(
|
|
id, -- Integer primary key
|
|
minX, maxX, -- Minimum and maximum X coordinate
|
|
minY, maxY, -- Minimum and maximum Y coordinate
|
|
+objname TEXT, -- name of the object
|
|
+objtype TEXT, -- object type
|
|
+boundary BLOB -- detailed boundary of object
|
|
);
|
|
</pre></div>
|
|
|
|
<p>
|
|
By combining location data and related information into the same
|
|
table, auxiliary columns can provide a cleaner model
|
|
and reduce the need to joins.
|
|
For example, the earlier
|
|
<a href="#diquery">join between demo_index and demo_data</a> can now
|
|
be written as a simple query, like this:
|
|
|
|
</p><div class="codeblock"><pre>SELECT objname FROM demo_index2
|
|
WHERE contained_in(boundary, 35.37785, -80.77470)
|
|
AND minX<=-80.77470 AND maxX>=-80.77470
|
|
AND minY<=35.37785 AND maxY>=35.37785;
|
|
</pre></div>
|
|
|
|
<h3 id="limitations"><span>4.1.1. </span>Limitations</h3>
|
|
|
|
<p>
|
|
For auxiliary columns, only the name of the column matters.
|
|
The <a href="datatype3.html#affinity">type affinity</a> is ignored.
|
|
Constraints such as NOT NULL, UNIQUE, REFERENCES, or CHECK
|
|
are also ignored. However, future versions
|
|
of SQLite might start paying attention to the type affinity and
|
|
constraints, so users of auxiliary columns are advised to leave
|
|
both blank, to avoid future compatibility problems.
|
|
|
|
<a name="intrtree"></a>
|
|
|
|
</p><h1 id="integer_valued_r_trees"><span>5. </span>Integer-Valued R-Trees</h1>
|
|
|
|
<p>
|
|
The default virtual table ("rtree") stores coordinates as
|
|
single-precision (4-byte) floating point numbers. If integer coordinates
|
|
are desired, declare the table using "rtree_i32" instead:
|
|
|
|
</p><div class="codeblock"><pre>CREATE VIRTUAL TABLE intrtree USING rtree_i32(id,x0,x1,y0,y1,z0,z1);
|
|
</pre></div>
|
|
|
|
<p>
|
|
An rtree_i32 stores coordinates as 32-bit signed integers.
|
|
Even though it stores values using integer, the rtree_i32 virtual
|
|
table still uses floating point computations internally as part of
|
|
the r-tree algorithm.
|
|
|
|
<a name="customquery"></a>
|
|
|
|
</p><h1 id="custom_r_tree_queries"><span>6. </span>Custom R-Tree Queries</h1>
|
|
|
|
<p>By using standard SQL expressions in the WHERE clause of a SELECT query,
|
|
a programmer can query for all R*Tree entries that
|
|
intersect with or are contained within a particular bounding-box.
|
|
Custom R*Tree queries, using the MATCH
|
|
operator in the WHERE clause of a SELECT, allow the programmer to query for
|
|
the set of R*Tree entries that intersect any arbitrary region or shape, not
|
|
just a box. This capability is useful, for example, in computing the
|
|
subset of objects in the R*Tree that are visible from a camera positioned
|
|
in 3-D space.
|
|
|
|
</p><p>Regions for custom R*Tree queries are defined by R*Tree geometry callbacks
|
|
implemented by the application and registered with SQLite via a call to one
|
|
of the following two APIs:
|
|
|
|
</p><div class="codeblock"><pre>int sqlite3_rtree_query_callback(
|
|
sqlite3 *db,
|
|
const char *zQueryFunc,
|
|
int (*xQueryFunc)(sqlite3_rtree_query_info*),
|
|
void *pContext,
|
|
void (*xDestructor)(void*)
|
|
);
|
|
int sqlite3_rtree_geometry_callback(
|
|
sqlite3 *db,
|
|
const char *zGeom,
|
|
int (*xGeom)(sqlite3_rtree_geometry *, int nCoord, double *aCoord, int *pRes),
|
|
void *pContext
|
|
);
|
|
</pre></div>
|
|
|
|
<p>The sqlite3_rtree_query_callback() became available with SQLite
|
|
<a href="releaselog/3_8_5.html">version 3.8.5</a> (2014-06-04) and is the preferred interface.
|
|
The sqlite3_rtree_geometry_callback() is an older and less flexible
|
|
interface that is supported for backwards compatibility.
|
|
|
|
</p><p>A call to one of the above APIs creates a new SQL function named by the
|
|
second parameter (zQueryFunc or zGeom). When that SQL function appears
|
|
on the right-hand side of the MATCH operator and the left-hand side of the
|
|
MATCH operator is any column in the R*Tree virtual table, then the callback
|
|
defined by the third argument (xQueryFunc or xGeom) is invoked to determine
|
|
if a particular object or subtree overlaps the desired region.
|
|
|
|
</p><p>For example, a query like the following might be used to find all
|
|
R*Tree entries that overlap with a circle centered a 45.3,22.9 with a
|
|
radius of 5.0:
|
|
|
|
</p><div class="codeblock"><pre>SELECT id FROM demo_index WHERE id MATCH circle(45.3, 22.9, 5.0)
|
|
</pre></div>
|
|
|
|
<p>The SQL syntax for custom queries is the same regardless of which
|
|
interface, sqlite3_rtree_geometry_callback() or sqlite3_rtree_query_callback(),
|
|
is used to register the SQL function. However, the newer query-style
|
|
callbacks give the application greater control over how the query proceeds.
|
|
|
|
</p><h2 id="the_legacy_xgeom_callback"><span>6.1. </span>The Legacy xGeom Callback</h2>
|
|
|
|
<p>The legacy xGeom callback is invoked with four arguments. The first
|
|
argument is a pointer to an sqlite3_rtree_geometry structure which provides
|
|
information about how the SQL function was invoked. The second argument
|
|
is the number of coordinates in each r-tree entry, and is always the same
|
|
for any given R*Tree. The number of coordinates is 2 for a 1-dimensional R*Tree,
|
|
4 for a 2-dimensional R*Tree, 6 for a 3-dimensional R*Tree, and so forth.
|
|
The third argument, aCoord[], is an array of nCoord coordinates that defines
|
|
a bounding box to be tested. The last argument is a pointer into which
|
|
the callback result should be written. The result is zero
|
|
if the bounding-box defined by aCoord[] is completely outside
|
|
the region defined by the xGeom callback and the result is non-zero if
|
|
the bounding-box is inside or overlaps with the xGeom region. The xGeom
|
|
callback should normally return SQLITE_OK. If xGeom returns anything other
|
|
than SQLITE_OK, then the r-tree query will abort with an error.
|
|
|
|
</p><p>The sqlite3_rtree_geometry structure that the first argument to the
|
|
xGeom callback points to has a structure shown below. The exact same
|
|
sqlite3_rtree_geometry
|
|
structure is used for every callback for same MATCH operator in the same
|
|
query. The contents of the sqlite3_rtree_geometry
|
|
structure are initialized by SQLite but are
|
|
not subsequently modified. The callback is free to make changes to the
|
|
pUser and xDelUser elements of the structure if desired.
|
|
|
|
</p><div class="codeblock"><pre>typedef struct sqlite3_rtree_geometry sqlite3_rtree_geometry;
|
|
struct sqlite3_rtree_geometry {
|
|
void *pContext; /* Copy of pContext passed to s_r_g_c() */
|
|
int nParam; /* Size of array aParam */
|
|
double *aParam; /* Parameters passed to SQL geom function */
|
|
void *pUser; /* Callback implementation user data */
|
|
void (*xDelUser)(void *); /* Called by SQLite to clean up pUser */
|
|
};
|
|
</pre></div>
|
|
|
|
<p>The pContext member of the sqlite3_rtree_geometry
|
|
structure is always set to a copy of the pContext
|
|
argument passed to sqlite3_rtree_geometry_callback() when the
|
|
callback is registered. The aParam[] array (size nParam) contains the parameter
|
|
values passed to the SQL function on the right-hand side of the MATCH operator.
|
|
In the example "circle" query above, nParam would be set to 3 and the aParam[]
|
|
array would contain the three values 45.3, 22.9 and 5.0.
|
|
|
|
</p><p>The pUser and xDelUser members of the sqlite3_rtree_geometry structure are
|
|
initially set to NULL. The pUser variable may be set by the callback
|
|
implementation to any arbitrary value that may be useful to subsequent
|
|
invocations of the callback within the same query (for example, a
|
|
pointer to a complicated data structure used to test for region intersection).
|
|
If the xDelUser variable is set to a non-NULL value, then after the
|
|
query has finished running SQLite automatically invokes it with the
|
|
value of the pUser variable as the only argument. In other words, xDelUser
|
|
may be set to a destructor function for the pUser value.
|
|
|
|
</p><p>The xGeom callback always does a depth-first search of the r-tree.
|
|
|
|
<a name="xquery"></a>
|
|
|
|
</p><h2 id="the_new_xqueryfunc_callback"><span>6.2. </span>The New xQueryFunc Callback</h2>
|
|
|
|
<p>The newer xQueryFunc callback receives more information from the r-tree
|
|
query engine on each call, and it sends more information back to the query engine
|
|
before it returns.
|
|
To help keep the interface manageable, the xQueryFunc callback sends and receives
|
|
information from the query engine as fields in the
|
|
sqlite3_rtree_query_info structure:
|
|
|
|
</p><div class="codeblock"><pre>struct sqlite3_rtree_query_info {
|
|
void *pContext; /* pContext from when function registered */
|
|
int nParam; /* Number of function parameters */
|
|
sqlite3_rtree_dbl *aParam; /* value of function parameters */
|
|
void *pUser; /* callback can use this, if desired */
|
|
void (*xDelUser)(void*); /* function to free pUser */
|
|
sqlite3_rtree_dbl *aCoord; /* Coordinates of node or entry to check */
|
|
unsigned int *anQueue; /* Number of pending entries in the queue */
|
|
int nCoord; /* Number of coordinates */
|
|
int iLevel; /* Level of current node or entry */
|
|
int mxLevel; /* The largest iLevel value in the tree */
|
|
sqlite3_int64 iRowid; /* Rowid for current entry */
|
|
sqlite3_rtree_dbl rParentScore; /* Score of parent node */
|
|
int eParentWithin; /* Visibility of parent node */
|
|
int eWithin; /* OUT: Visiblity */
|
|
sqlite3_rtree_dbl rScore; /* OUT: Write the score here */
|
|
/* The following fields are only available in 3.8.11 and later */
|
|
sqlite3_value **apSqlParam; /* Original SQL values of parameters */
|
|
};
|
|
</pre></div>
|
|
|
|
<p>The first five fields of the sqlite3_rtree_query_info structure are identical
|
|
to the sqlite3_rtree_geometry structure, and have exactly the same meaning.
|
|
The sqlite3_rtree_query_info structure also contains nCoord and aCoord fields
|
|
which have the same meaning as the parameter of the same name in the xGeom callback.
|
|
|
|
</p><p>The xQueryFunc must set the eWithin field of sqlite3_rtree_query_info to
|
|
one of the values NOT_WITHIN, PARTLY_WITHIN, or FULLY_WITHIN depending on whether
|
|
or not the bounding box defined by aCoord[] is completely outside the region,
|
|
overlaps the region, or is completely inside the region, respectively. In
|
|
addition, the xQueryFunc must set the rScore field to a non-negative value that
|
|
indicates the order in which subtrees and entries of the query should be analyzed
|
|
and returned. Smaller scores are processed first.
|
|
|
|
</p><p>As its name implies, an R*Tree is organized as a tree. Each node of the
|
|
tree is a bounding box. The root of the tree is a bounding box that encapsulates
|
|
all elements of the tree. Beneath the root are a number of subtrees (typically
|
|
20 or more) each with their own smaller bounding boxes and each containing some
|
|
subset of the R*Tree entries. The subtrees may have sub-subtrees, and so forth
|
|
until finally one reaches the leaves of the tree which are the actual R*Tree
|
|
entries.
|
|
|
|
</p><p>An R*Tree query is initialized by making the root node the only entry
|
|
in a priority queue sorted by rScore.
|
|
The query proceeds by extracting the entry from the priority queue that has
|
|
the lowest score. If that entry is a leaf (meaning that it is an actual
|
|
R*Tree entry and not a subtree) then that entry
|
|
is returned as one row of the query result.
|
|
If the extracted priority queue entry is a node (a subtree),
|
|
then the next child of that node is passed to the xQueryFunc callback.
|
|
If the node has more children then it is returned to the priority queue.
|
|
Otherwise it is discarded. Those subelements for which the xQueryFunc
|
|
callback sets eWithin to PARTLY_WITHIN or FULLY_WITHIN are added to the
|
|
priority queue using the score supplied by the callback. Subelements that
|
|
return NOT_WITHIN are discarded. The query runs until the priority queue is
|
|
empty.
|
|
|
|
</p><p>Every leaf entry and node (subtree) within the R*Tree has an integer "level".
|
|
The leaves have a level of 0. The first containing subtree of the leaves has
|
|
a level of 1. The root of the R*Tree has the largest level value. The
|
|
mxLevel entry in the sqlite3_rtree_query_info structure is the level value for
|
|
the root of the R*Tree. The iLevel entry in sqlite3_rtree_query_info gives the
|
|
level for the object being interrogated.
|
|
|
|
</p><p>Most R*Tree queries use a depth-first search. This is accomplished by setting
|
|
the rScore equal to iLevel. A depth-first search is usually preferred since it
|
|
minimizes the number of elements in the priority queue, which reduces memory
|
|
requirements and speeds processing. However, some application may prefer a
|
|
breadth-first search, which can be accomplished by setting rScore to mxLevel-iLevel.
|
|
By creating more complex formulas for rScore, applications can exercise
|
|
detailed control over the order in which subtree are searched and leaf
|
|
R*Tree entries are returned. For example, in an application with many
|
|
millions of R*Tree entries, the rScore might be arranged so that the
|
|
largest or most significant entries are returned first, allowing the
|
|
application to display the most important information quickly, and
|
|
filling in smaller and less important details as they become available.
|
|
|
|
</p><p>Other information fields of the sqlite3_rtree_query_info structure are
|
|
available for use by the xQueryFunc callback, if desired. The iRowid field
|
|
is the rowid (the first of the 3 to 11 columns in the R*Tree) for the element
|
|
being considered. iRowid is only valid for leaves. The eParentWithin and
|
|
rParentScore values are copies of the eWithin and rScore values from the
|
|
containing subtree of the current row. The anQueue field is an array
|
|
of mxLevel+1 unsigned integers that tell the current number of elements in
|
|
the priority queue at each level.
|
|
|
|
</p><h2 id="additional_considerations_for_custom_queries"><span>6.3. </span>Additional Considerations for Custom Queries</h2>
|
|
|
|
<p>
|
|
The MATCH operator of a custom R*Tree query function must be a top-level
|
|
AND-connected term of the WHERE clause, or else it will not be usable
|
|
by the R*Tree query optimizer and the query will not be runnable.
|
|
If the MATCH operator is connected to other terms of the WHERE clause
|
|
via an OR operator, for example, the query will fail with an error.
|
|
|
|
</p><p>
|
|
Two or more MATCH operators are allowed in the same WHERE clause, as long
|
|
as they are connected by AND operators. However,
|
|
the R*Tree query engine only contains a single priority queue. The priority
|
|
assigned to each node in the search is the lowest priority returned by any
|
|
of the MATCH operators.
|
|
|
|
</p><h1 id="implementation_details"><span>7. </span>Implementation Details</h1>
|
|
|
|
<p>
|
|
The following sections describe some low-level details of the R*Tree implementation,
|
|
that might be useful for trouble-shooting or performance analysis.
|
|
|
|
<a name="xshadow"></a>
|
|
|
|
</p><h2 id="shadow_tables"><span>7.1. </span>Shadow Tables</h2>
|
|
|
|
<p>The content of an R*Tree index is actually stored in three ordinary
|
|
SQLite tables with names derived from the name of the R*Tree. These
|
|
three tables are called "<a href="vtab.html#xshadowname">shadow tables</a>". This is their schema:
|
|
|
|
</p><div class="codeblock"><pre>CREATE TABLE %_node(nodeno INTEGER PRIMARY KEY, data)
|
|
CREATE TABLE %_parent(nodeno INTEGER PRIMARY KEY, parentnode)
|
|
CREATE TABLE %_rowid(rowid INTEGER PRIMARY KEY, nodeno)
|
|
</pre></div>
|
|
|
|
<p>The "%" in the name of each shadow table is replaced by the name of the
|
|
R*Tree virtual table. So, if the name of the R*Tree table is "xyz" then
|
|
the three shadow tables would be "xyz_node", "xyz_parent", and "xyz_rowid".
|
|
|
|
</p><p>There is one entry in the %_node table for each R*Tree node. An
|
|
R*Tree node consists of one or more entries that are proximate to one another.
|
|
The nodes of an R*Tree for a tree. All nodes other than the root have an
|
|
entry in the %_parent shadow table that identifies the parent node.
|
|
Each entry in an R*Tree has a rowid. The %_rowid shadow table maps entry
|
|
rowids to the node that contains that entry.
|
|
|
|
</p><p>Extra columns appended to the %_rowid table hold the
|
|
content of <a href="rtree.html#auxcol">auxiliary columns</a>. The names of these extra
|
|
%_rowid columns are probably not the same as the
|
|
actual auxiliary column names.
|
|
|
|
<a name="rtreecheck"></a>
|
|
|
|
</p><h2 id="integrity_check_using_the_rtreecheck_sql_function"><span>7.2. </span>Integrity Check using the rtreecheck() SQL function</h2>
|
|
|
|
<p>The scalar SQL function rtreecheck(R) or rtreecheck(S,R) runs an
|
|
integrity check on the rtree table named R contained within database S.
|
|
The function returns a human-language description of any problems found,
|
|
or the string 'ok' if everything is ok. Running rtreecheck() on an R*Tree
|
|
virtual table is similar to running <a href="pragma.html#pragma_integrity_check">PRAGMA integrity_check</a> on a
|
|
database.
|
|
|
|
</p><p>Example: To verify that an R*Tree named "demo_index" is well-formed
|
|
and internally consistent, run:
|
|
|
|
</p><div class="codeblock"><pre>SELECT rtreecheck('demo_index');
|
|
</pre></div>
|
|
|
|
<p>The rtreecheck() function performs the following checks:
|
|
|
|
</p><ol>
|
|
<li><p>
|
|
For each cell in the r-tree structure (%_node table), that:
|
|
</p><ol type="a">
|
|
<li><p> for each dimension, (coord1 <= coord2).
|
|
</p></li><li><p> unless the cell is on the root node, that the cell is bounded
|
|
by the parent cell on the parent node.
|
|
</p></li><li><p> for leaf nodes, that there is an entry in the %_rowid
|
|
table corresponding to the cell's rowid value that
|
|
points to the correct node.
|
|
</p></li><li><p> for cells on non-leaf nodes, that there is an entry in the
|
|
%_parent table mapping from the cell's child node to the
|
|
node that it resides on.
|
|
</p></li></ol>
|
|
</li><li><p>
|
|
That there are the same number of entries in the %_rowid table
|
|
as there are leaf cells in the r-tree structure, and that there
|
|
is a leaf cell that corresponds to each entry in the %_rowid table.
|
|
</p></li><li><p>
|
|
That there are the same number of entries in the %_parent table
|
|
as there are non-leaf cells in the r-tree structure, and that
|
|
there is a non-leaf cell that corresponds to each entry in the
|
|
%_parent table.
|
|
</p></li></ol>
|
|
<p align="center"><small><i>This page last modified on <a href="https://sqlite.org/docsrc/honeypot" id="mtimelink" data-href="https://sqlite.org/docsrc/finfo/pages/rtree.in?m=703d907909">2023-02-20 00:00:42</a> UTC </small></i></p>
|
|
|