Source code for sqliteanalyzer.header
# Copyright 2018 Santiago Gil
# (github.com/santigl)
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
# COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
"""Class to parse an SQLite header from a database file."""
from math import log
import sys
[docs]class SQLiteHeader:
"""Read an SQLite database file and extract the information
contained in its header (first ``HEADER_SIZE_BYTES`` = 100 bytes).
Reference:
https://sqlite.org/fileformat.html#the_database_header
"""
HEADER_SIZE_BYTES = 100
TEXT_ENCODINGS = ['UTF-8', 'UTF-16le', 'UTF-16be']
def __init__(self, db_path: str):
"""Create an SQLiteHeader instance.
Arguments:
db_path: path to an SQLite database file
Raises:
OSError, if the database file cannot be opened
"""
with open(db_path, 'rb') as db_file:
self._raw_header = bytes(db_file.read(self.HEADER_SIZE_BYTES))
print(self._raw_header)
#: str: header string (``'SQLite format 3\x00'``)
self.header_string = self._raw_header[0:15].decode()
#: int: page size in bytes
self.page_size = self._read_int(16, 2)
if self.page_size == 1:
self.page_size = 65536
#: int: SQLite version used in the last read
self.format_read_version = self._read_int(18)
#: int: SQLite version used in the last write
self.format_write_version = self._read_int(19)
#: int: Unused bytes reserved at the end of a page
self.reserved_space = self._read_int(20)
#: int: Maximum embedded payload fraction (must be 64)
self.max_embedded_payload = self._read_int(21)
#: int: Minimum embedded payload fraction (must be 32)
self.min_embedded_payload = self._read_int(22)
#: int: Leaf payload fraction. Must be 32.
self.leaf_payload = self._read_int(23)
#: int: File change counter
self.change_counter = self._read_int(24, 4)
#: int: Size of the DB in pages
self.page_count = self._read_int(28, 4)
#: int: Page# of the first freelist trunk page
self.freelist_start = self._read_int(32, 4)
#: int: Total number of freelist pages
self.freelist_count = self._read_int(36, 4)
#: int: Schema cookie
self.schema_cookie = self._read_int(40, 4)
#: int: Schema format number
self.schema_format = self._read_int(44, 4)
#: int: Default page cache size
self.page_cache_size = self._read_int(48, 4)
self.largest_root_page = self._read_int(52, 4)
"""int: Page# of the largest root b-tree page when in
autovacuum or incremental vacuum modes
"""
encoding_value = self._read_int(56, 4)
"""str: Text encoding
``'UTF-8'``, ``'UTF-16le'``, ``'UTF-16be'`` or ``None`` if
not valid
"""
if encoding_value in [1, 2, 3]:
self.text_encoding = self.TEXT_ENCODINGS[encoding_value-1]
else:
self.text_encoding = None
#: int: User version as set by the ``user_version`` pragma
self.user_version = self._read_int(60, 4)
#: bool: Incremental vacuum mode enabled
self.incremental_vacuum_mode = bool(self._read_int(64, 4))
#: int: Application ID as set by the ``application_id`` pragma
self.application_id = self._read_int(68, 4)
#: int: Bytes reserved by SQLite for expansion. (Must be 0.)
self.reserved = self._read_int(72, 20)
#: int: ``version-valid-for`` number
self.version_valid_for = self._read_int(92, 4)
#: int: SQLite version number
self.sqlite_version_number = self._read_int(96, 4)
[docs] def header_seems_valid(self) -> bool:
"""Returns whether the values in fields that have constraints
do respect them (i.e. the header appears to be valid)."""
page_size_is_power_of_two = log(self.page_size, 2).is_integer()
page_size_value_is_valid = self.page_size in range(512, 32769) \
or self.page_size == 65535
return page_size_is_power_of_two \
and page_size_value_is_valid \
and self.format_read_version in [1, 2] \
and self.format_write_version in [1, 2] \
and self.max_embedded_payload == 64 \
and self.min_embedded_payload == 32 \
and self.leaf_payload == 32 \
and self.schema_format in [1, 2, 3, 4] \
and self.text_encoding is not None \
and self.reserved == 0
def _read_int(self, start: int, length=1) -> int:
"""Reads length bytes from the start of the header
and returns the interpreted value as an integer.
"""
field = self._raw_header[start:start+length]
return int.from_bytes(field, byteorder='big')
def __str__(self):
"""Returns a string representation of an SQLite3Header."""
lines = [
'Header seems valid? {}'.format(self.header_seems_valid()),
'Header string: {}'.format(self.header_string),
'Page size: {}'.format(self.page_size),
'Format read version: {}'.format(self.format_read_version),
'Format write version: {}'.format(self.format_write_version),
'Reserved space: {}'.format(self.reserved_space),
'Max. embeded payload: {}'.format(self.max_embedded_payload),
'Min. embeded payload: {}'.format(self.min_embedded_payload),
'Leaf payload: {}'.format(self.leaf_payload),
'Change counter: {}'.format(self.change_counter),
'Page count: {}'.format(self.page_count),
'Freelist start page: {}'.format(self.freelist_start),
'Freelist size: {}'.format(self.freelist_count),
'Schema cookie: {}'.format(self.schema_cookie),
'Schema format: {}'.format(self.schema_format),
'Page cache size: {}'.format(self.page_cache_size),
'Largest b-tree-root page #: {}'.format(self.largest_root_page),
'Text encoding: {}'.format(self.text_encoding),
'User version: {}'.format(self.user_version),
'Incremental vacuum mode: {}'.format(self.incremental_vacuum_mode),
'Application id.: {}'.format(self.application_id),
'Version valid for: {}'.format(self.version_valid_for),
'SQLite version number: {}'.format(self.sqlite_version_number)
]
return '\n'.join(lines)