# -*- coding: utf-8 -*-
# ###
# Copyright (c) 2013, Rice University
# This software is subject to the provisions of the GNU Affero General
# Public License version 3 (AGPLv3).
# See LICENCE.txt for details.
# ###
"""Database models and utilities."""
import contextlib
import os
import json
import psycopg2
import logging
import cnxdb
from pyramid.threadlocal import get_current_registry
from cnxtransforms import (
produce_cnxml_for_module,
produce_html_for_module,
transform_abstract_to_cnxml,
transform_abstract_to_html,
)
from . import config
from .utils import split_ident_hash, IdentHashMissingVersion
here = os.path.abspath(os.path.dirname(__file__))
CNXDB_DIRECTORY = os.path.abspath(os.path.dirname(cnxdb.__file__))
SQL_DIRECTORY = os.path.join(CNXDB_DIRECTORY, 'archive-sql')
logger = logging.getLogger('cnxarchive')
class ContentNotFound(Exception):
"""Used when database retrival fails."""
pass
def _read_sql_file(name):
path = os.path.join(SQL_DIRECTORY, '{}.sql'.format(name))
with open(path, 'r') as fp:
return fp.read()
SQL = {
'get-available-languages-and-count': _read_sql_file(
'get-available-languages-and-count'),
'get-module': _read_sql_file('get-module'),
'get-content-from-legacy-id': _read_sql_file('get-content-from-legacy-id'),
'get-content-from-legacy-id-ver': _read_sql_file(
'get-content-from-legacy-id-ver'),
'get-module-metadata': _read_sql_file('get-module-metadata'),
'get-resource': _read_sql_file('get-resource'),
'get-resource-by-filename': _read_sql_file('get-resource-by-filename'),
'get-resourceid-by-filename': _read_sql_file('get-resourceid-by-filename'),
'get-tree-by-uuid-n-version': _read_sql_file('get-tree-by-uuid-n-version'),
'get-module-latest-version': _read_sql_file('get-module-latest-version'),
'get-module-head-version': _read_sql_file('get-module-head-version'),
'get-module-versions': _read_sql_file('get-module-versions'),
'get-module-uuid': _read_sql_file('get-module-uuid'),
'get-subject-list': _read_sql_file('get-subject-list'),
'get-featured-links': _read_sql_file('get-featured-links'),
'get-users-by-ids': _read_sql_file('get-users-by-ids'),
'get-service-state-messages': _read_sql_file('get-service-state-messages'),
'get-license-info-as-json': _read_sql_file('get-license-info-as-json'),
'get-in-book-search': _read_sql_file('get-in-book-search'),
'get-in-book-search-full-page': _read_sql_file(
'get-in-book-search-full-page'),
'get-in-collated-book-search': _read_sql_file(
'get-in-collated-book-search'),
'get-in-collated-book-search-full-page': _read_sql_file(
'get-in-collated-book-search-full-page'),
'get-collated-content': _read_sql_file('get-collated-content'),
'get-collated-state': _read_sql_file('get-collated-state'),
'get-core-info': _read_sql_file('get-core-info'),
'get-book-core-info': _read_sql_file('get-book-core-info'),
'query-module_files-by-xpath': _read_sql_file(
'query-module_files-by-xpath'),
'query-collated_file_associations-by-xpath': _read_sql_file(
'query-collated_file_associations-by-xpath'),
'get-books-containing-page': _read_sql_file('get-books-containing-page'),
'get-book-latest-version-with-page': _read_sql_file(
'get-book-latest-version-with-page'),
}
@contextlib.contextmanager
def db_connect(connection_string=None):
"""Function to supply a database connection object."""
if connection_string is None:
settings = get_current_registry().settings
connection_string = settings[config.CONNECTION_STRING]
db_conn = psycopg2.connect(connection_string)
try:
with db_conn:
yield db_conn
finally:
db_conn.close()
def get_module_ident_from_ident_hash(ident_hash, cursor):
"""Return the moduleid for a given ``ident_hash``."""
try:
uuid, (mj_ver, mn_ver) = split_ident_hash(
ident_hash, split_version=True)
except IdentHashMissingVersion as e:
uuid, mj_ver, mn_ver = e.id, None, None
args = [uuid]
stmt = "SELECT module_ident FROM {} WHERE uuid = %s"
table_name = 'modules'
if mj_ver is None:
table_name = 'latest_modules'
else:
args.append(mj_ver)
stmt += " AND major_version = %s"
if mn_ver is not None:
args.append(mn_ver)
stmt += " AND minor_version = %s"
stmt = stmt.format(table_name)
cursor.execute(stmt, args)
try:
module_ident = cursor.fetchone()[0]
except TypeError: # NoneType
module_ident = None
return module_ident
def get_tree(ident_hash, cursor, as_collated=False):
"""Return a JSON representation of the binder tree for ``ident_hash``."""
uuid, version = split_ident_hash(ident_hash)
cursor.execute(SQL['get-tree-by-uuid-n-version'],
(uuid, version, as_collated,))
try:
tree = cursor.fetchone()[0]
except TypeError: # NoneType
raise ContentNotFound()
if type(tree) in (type(''), type(u'')):
return json.loads(tree)
else:
return tree
def get_collated_content(ident_hash, context_ident_hash, cursor):
"""Return collated content for ``ident_hash``."""
cursor.execute(SQL['get-collated-content'],
(ident_hash, context_ident_hash,))
try:
return cursor.fetchone()[0]
except TypeError: # NoneType
return
def get_module_uuid(plpy, moduleid):
"""Retrieve page uuid from legacy moduleid."""
plan = plpy.prepare("SELECT uuid FROM modules WHERE moduleid = $1;",
('text',))
result = plpy.execute(plan, (moduleid,), 1)
if result:
return result[0]['uuid']
def get_current_module_ident(moduleid, plpy):
"""Retrieve module_ident for a given moduleid.
Note that module_ident is used only for internal database relational
associations, and is equivalent to a uuid@version for a given document.
"""
plan = plpy.prepare('''\
SELECT m.module_ident FROM modules m
WHERE m.moduleid = $1 ORDER BY revised DESC''', ('text',))
results = plpy.execute(plan, (moduleid,), 1)
if results:
return results[0]['module_ident']
def get_minor_version(module_ident, plpy):
"""Retrieve minor version only given module_ident."""
# Make sure to always return the max minor version that is already in the
# database, in case the given module_ident is not the latest version
plan = plpy.prepare('''\
WITH t AS (
SELECT uuid, major_version
FROM modules
WHERE module_ident = $1
)
SELECT MAX(m.minor_version) AS minor_version
FROM modules m, t
WHERE m.uuid = t.uuid AND m.major_version = t.major_version
''', ('integer',))
results = plpy.execute(plan, (module_ident,), 1)
return results[0]['minor_version']
def next_version(module_ident, plpy):
"""Determine next minor version for a given module_ident.
Note potential race condition!
"""
minor = get_minor_version(module_ident, plpy)
return minor + 1
def get_collections(module_ident, plpy):
"""Get all the collections that the module is part of."""
# Make sure to only return one match per collection and only if it is the
# latest collection (which may not be the same as what is in
# latest_modules)
plan = plpy.prepare('''
WITH RECURSIVE t(node, parent, path, document) AS (
SELECT tr.nodeid, tr.parent_id, ARRAY[tr.nodeid], tr.documentid
FROM trees tr
WHERE tr.documentid = $1 and tr.is_collated = 'False'
UNION ALL
SELECT c.nodeid, c.parent_id, path || ARRAY[c.nodeid], c.documentid
FROM trees c JOIN t ON (c.nodeid = t.parent)
WHERE not c.nodeid = ANY(t.path)
),
latest(module_ident) AS (
SELECT module_ident FROM (
SELECT m.module_ident, m.revised,
MAX(m.revised) OVER (PARTITION BY m.uuid) as latest
FROM modules m where m.portal_type = 'Collection'
) r
WHERE r.revised = r.latest
)
SELECT module_ident FROM t, latest
WHERE latest.module_ident = t.document
''', ('integer',))
for i in plpy.execute(plan, (module_ident,)):
yield i['module_ident']
def get_subcols(module_ident, plpy):
"""Get all the sub-collections that the module is part of."""
plan = plpy.prepare('''
WITH RECURSIVE t(node, parent, path, document) AS (
SELECT tr.nodeid, tr.parent_id, ARRAY[tr.nodeid], tr.documentid
FROM trees tr
WHERE tr.documentid = $1 and tr.is_collated = 'False'
UNION ALL
SELECT c.nodeid, c.parent_id, path || ARRAY[c.nodeid], c.documentid
FROM trees c JOIN t ON (c.nodeid = t.parent)
WHERE not c.nodeid = ANY(t.path)
)
SELECT DISTINCT m.module_ident
FROM t JOIN modules m ON (t.document = m.module_ident)
WHERE m.portal_type = 'SubCollection'
ORDER BY m.module_ident
''', ('integer',))
for i in plpy.execute(plan, (module_ident,)):
yield i['module_ident']
def rebuild_collection_tree(old_collection_ident, new_document_id_map, plpy):
"""Create a new tree for the collection based on the old tree.
This uses new document ids, replacing old ones.
"""
get_tree = plpy.prepare('''
WITH RECURSIVE t(node, parent, document, title, childorder, latest, path)
AS (SELECT tr.nodeid, tr.parent_id, tr.documentid, tr.title,
tr.childorder, tr.latest, ARRAY[tr.nodeid]
FROM trees tr
WHERE tr.documentid = $1 AND tr.is_collated = 'False'
UNION ALL
SELECT c.nodeid, c.parent_id, c.documentid, c.title,
c.childorder, c.latest, path || ARRAY[c.nodeid]
FROM trees c JOIN t ON (c.parent_id = t.node)
WHERE not c.nodeid = ANY(t.path)
)
SELECT * FROM t
''', ('integer',))
def get_old_tree():
return plpy.execute(get_tree, (old_collection_ident,))
tree = {} # { old_nodeid: {'data': ...}, ...}
children = {} # { nodeid: [child_nodeid, ...], child_nodeid: [...]}
for i in get_old_tree():
tree[i['node']] = {'data': i, 'new_nodeid': None}
children.setdefault(i['parent'], [])
children[i['parent']].append(i['node'])
insert_tree = plpy.prepare('''
INSERT INTO trees (nodeid, parent_id, documentid,
title, childorder, latest)
VALUES (DEFAULT, $1, $2, $3, $4, $5)
RETURNING nodeid
''', ('integer', 'integer', 'text', 'integer', 'boolean'))
def execute(fields):
results = plpy.execute(insert_tree, fields, 1)
return results[0]['nodeid']
root_node = children[None][0]
def build_tree(node, parent):
data = tree[node]['data']
new_node = execute([parent, new_document_id_map.get(data['document'],
data['document']), data['title'],
data['childorder'], data['latest']])
for i in children.get(node, []):
build_tree(i, new_node)
build_tree(root_node, None)
def republish_collection(submitter, submitlog, next_minor_version,
collection_ident, plpy, revised=None):
"""Insert a new row for collection_ident with a new version.
Returns the module_ident of the row inserted.
"""
sql = '''
INSERT INTO modules (portal_type, moduleid, uuid, version, name, created,
revised, abstractid, licenseid, doctype, submitter, submitlog,
stateid, parent, language, authors, maintainers, licensors,
parentauthors, google_analytics, buylink, print_style,
major_version, minor_version)
SELECT m.portal_type, m.moduleid, m.uuid, m.version, m.name, m.created,
{}, m.abstractid, m.licenseid, m.doctype, $3, $4,
m.stateid, m.parent, m.language, m.authors, m.maintainers, m.licensors,
m.parentauthors, m.google_analytics, m.buylink, m.print_style,
m.major_version, $1
FROM modules m
WHERE m.module_ident = $2
RETURNING module_ident
'''
if revised is None:
sql = sql.format('CURRENT_TIMESTAMP')
types = ('integer', 'integer', 'text', 'text')
params = (next_minor_version, collection_ident, submitter, submitlog)
else:
sql = sql.format('$5')
types = ('integer', 'integer', 'text', 'text', 'timestamp')
params = (next_minor_version, collection_ident, submitter, submitlog,
revised)
plan = plpy.prepare(sql, types)
new_ident = plpy.execute(plan, params, 1)[0]['module_ident']
plan = plpy.prepare("""\
INSERT INTO modulekeywords (module_ident, keywordid)
SELECT $1, keywordid
FROM modulekeywords
WHERE module_ident = $2""", ('integer', 'integer'))
plpy.execute(plan, (new_ident, collection_ident,))
plan = plpy.prepare("""\
INSERT INTO moduletags (module_ident, tagid)
SELECT $1, tagid
FROM moduletags
WHERE module_ident = $2""", ('integer', 'integer'))
plpy.execute(plan, (new_ident, collection_ident,))
plan = plpy.prepare("""\
INSERT INTO module_files (module_ident, fileid, filename)
SELECT $1, fileid, filename
FROM module_files
WHERE module_ident = $2 and filename != 'collection.xml'""",
('integer', 'integer'))
plpy.execute(plan, (new_ident, collection_ident,))
return new_ident
def set_version(portal_type, legacy_version, td):
"""Set the major_version and minor_version if they are not set."""
modified = 'OK'
legacy_major, legacy_minor = legacy_version.split('.')
if portal_type == 'Collection':
# For collections, both major and minor needs to be set
modified = 'MODIFY'
td['new']['major_version'] = int(legacy_minor)
if td['new']['minor_version'] is None:
td['new']['minor_version'] = 1
elif portal_type == 'Module':
# For modules, major should be set and minor should be None
# N.B. a very few older modules had major=2 and minor zero-based.
# The odd math below adds one to the minor for those
modified = 'MODIFY'
td['new']['major_version'] = int(legacy_minor)+(int(legacy_major)-1)
td['new']['minor_version'] = None
return modified
def republish_module(td, plpy):
"""When a module is republished, create new minor versions of collections.
All collections (including subcollections) that this module is contained
in part of will need to be updated (a minor update).
e.g. there is a collection c1 v2.1, which contains a chapter sc1 v2.1,
which contains a module m1 v3. When m1 is updated, we will have a new row
in the modules table with m1 v4.
This trigger will create increment the minor versions of c1 and sc1, so
we'll have c1 v2.2, and sc1 v2.2. However, another chapter sc2 will stay
at v2.1.
We need to create a collection tree for c1 v2.2 which is exactly the same
as c1 v2.1, but with m1 v4 instead of m1 v3, and sc1 v2.2 and c1 v2.2
instead of sc1 2.1 and c1 v2.1
"""
portal_type = td['new']['portal_type']
modified = 'OK'
moduleid = td['new']['moduleid']
legacy_version = td['new']['version']
submitter = td['new']['submitter']
submitlog = td['new']['submitlog']
modified = set_version(portal_type, legacy_version, td)
current_module_ident = get_current_module_ident(moduleid, plpy)
if current_module_ident:
# need to overide autogen uuid to keep it constant per moduleid
uuid = get_module_uuid(plpy, moduleid)
td['new']['uuid'] = uuid
modified = 'MODIFY'
else:
# nothing to do if the module/collection is new
return modified
if portal_type != 'Module':
# nothing else to do if something else is being published
return modified
# Module is republished
replace_map = {current_module_ident: td['new']['module_ident']}
# find the nested subcollections the module is in, and
# republish them, as well, adding to map, for all collections
# Note that map is for all subcollections, regardless of what
# collection they are contained in.
for sub_id in get_subcols(current_module_ident, plpy):
minor = next_version(sub_id, plpy)
new_subcol_ident = republish_collection(submitter, submitlog,
minor, sub_id, plpy)
replace_map[sub_id] = new_subcol_ident
# Now do each collection that contains this module
for collection_id in get_collections(current_module_ident, plpy):
minor = next_version(collection_id, plpy)
new_ident = republish_collection(submitter, submitlog, minor,
collection_id, plpy)
replace_map[collection_id] = new_ident
rebuild_collection_tree(collection_id, replace_map, plpy)
return modified
[docs]def republish_module_trigger(plpy, td):
"""Trigger called from postgres database when republishing a module.
When a module is republished, the versions of the collections that it is
part of will need to be updated (a minor update).
e.g. there is a collection c1 v2.1, which contains module m1 v3
m1 is updated, we have a new row in the modules table with m1 v4
this trigger will create increment the minor version of c1, so we'll have
c1 v2.2
we need to create a collection tree for c1 v2.2 which is exactly the same
as c1 v2.1, but with m1 v4 instead of m1 v3, and c1 v2.2 instead of c1 v2.2
"""
# Is this an insert from legacy? Legacy always supplies the version.
is_legacy_publication = td['new']['version'] is not None
if not is_legacy_publication:
# Bail out, because this trigger only applies to legacy publications.
return "OK"
plpy.log('Trigger fired on %s' % (td['new']['moduleid'],))
modified = republish_module(td, plpy)
plpy.log('modified: {}'.format(modified))
plpy.log('insert values:\n{}\n'.format('\n'.join([
'{}: {}'.format(key, value)
for key, value in td['new'].items()])))
return modified
[docs]def assign_moduleid_default_trigger(plpy, td):
"""Trigger to fill in legacy ``moduleid`` when publishing.
This correctly assigns ``moduleid`` value to
cnx-publishing publications. This does NOT include
matching the ``moduleid`` to previous revision by way of ``uuid``.
This correctly updates the sequence values when a legacy publication
specifies the ``moduleid`` value. This is because legacy does not know
about nor use the sequence values when setting legacy ``moduleid``.
"""
modified_state = "OK"
portal_type = td['new']['portal_type']
uuid = td['new']['uuid']
moduleid = td['new']['moduleid']
version = td['new']['version']
# Is this an insert from legacy? Legacy always supplies the version.
is_legacy_publication = version is not None
if moduleid is None:
# If the moduleid is not supplied, it is a new publication.
if portal_type in ("Collection", "SubCollection"):
prefix, sequence_name = 'col', "collectionid_seq"
else:
prefix, sequence_name = 'm', "moduleid_seq"
plan = plpy.prepare("SELECT $1 || nextval($2)::text AS moduleid",
['text', 'text'])
row = plpy.execute(plan, (prefix, sequence_name,), 1)
moduleid = row[0]['moduleid']
modified_state = "MODIFY"
td['new']['moduleid'] = moduleid
elif is_legacy_publication and moduleid is not None:
# Set the sequence value based on what legacy gave us.
plan = plpy.prepare("""\
SELECT setval($1, max(substr(moduleid, $2)::int))
FROM (
SELECT moduleid from modules where portal_type in ($3,$4)
UNION ALL
SELECT $4) AS all_together""", ['text', 'int', 'text', 'text'])
args = []
if portal_type == 'Collection':
args.append('collectionid_seq')
args.append(4)
args.extend(('Collection', 'SubCollection'))
elif portal_type == 'Module':
args.append('moduleid_seq')
args.append(2)
args.extend(('Module', 'CompositeModule'))
args.append(moduleid)
if len(args) == 4:
plpy.execute(plan, args)
plpy.log("Fixed identifier and version for publication at '{}' "
"with the following values: {} and {}"
.format(uuid, moduleid, version))
return modified_state
[docs]def assign_version_default_trigger(plpy, td):
"""Trigger to fill in legacy data fields.
A compatibilty trigger to fill in legacy data fields that are not
populated when inserting publications from cnx-publishing.
If this is not a legacy publication the ``version`` will be set
based on the ``major_version`` value.
"""
modified_state = "OK"
portal_type = td['new']['portal_type']
version = td['new']['version']
minor_version = td['new']['minor_version']
# Set the minor version on collections, because by default it is
# None/Null, which is the correct default for modules.
if minor_version is None and portal_type in ('Collection',
'SubCollection'):
modified_state = "MODIFY"
td['new']['minor_version'] = 1
# Set the legacy version field based on the major version.
if version is None:
major_version = td['new']['major_version']
version = "1.{}".format(major_version)
modified_state = "MODIFY"
td['new']['version'] = version
return modified_state
[docs]def assign_document_controls_default_trigger(plpy, td):
"""Trigger to fill in document_controls when legacy publishes.
A compatibilty trigger to fill in ``uuid`` and ``licenseid`` columns
of the ``document_controls`` table that are not
populated when inserting publications from legacy.
This uuid default is not on ``modules.uuid`` column itself,
because the value needs to be loosely associated
with the ``document_controls`` entry
to prevent uuid collisions and bridge the pending publications gap.
"""
modified_state = "OK"
uuid = td['new']['uuid']
# Only do the procedure if this is a legacy publication.
if uuid is None:
modified_state = "MODIFY"
plan = plpy.prepare("""\
INSERT INTO document_controls (uuid, licenseid) VALUES (DEFAULT, $1)
RETURNING uuid""", ('integer',))
uuid_ = plpy.execute(plan, (td['new']['licenseid'],))[0]['uuid']
td['new']['uuid'] = uuid_
return modified_state
[docs]def upsert_document_acl_trigger(plpy, td):
"""Trigger for filling in acls when legacy publishes.
A compatibility trigger to upsert authorization control entries (ACEs)
for legacy publications.
"""
modified_state = "OK"
uuid_ = td['new']['uuid']
authors = td['new']['authors'] and td['new']['authors'] or []
maintainers = td['new']['maintainers'] and td['new']['maintainers'] or []
is_legacy_publication = td['new']['version'] is not None
if not is_legacy_publication:
return modified_state
# Upsert all authors and maintainers into the ACL
# to give them publish permission.
permissibles = []
permissibles.extend(authors)
permissibles.extend(maintainers)
permissibles = set([(uid, 'publish',) for uid in permissibles])
plan = plpy.prepare("""\
SELECT user_id, permission FROM document_acl WHERE uuid = $1""",
['uuid'])
existing_permissibles = set([(r['user_id'], r['permission'],)
for r in plpy.execute(plan, (uuid_,))])
new_permissibles = permissibles.difference(existing_permissibles)
for uid, permission in new_permissibles:
plan = plpy.prepare("""\
INSERT INTO document_acl (uuid, user_id, permission)
VALUES ($1, $2, $3)""", ['uuid', 'text', 'permission_type'])
plpy.execute(plan, (uuid_, uid, permission,))
[docs]def upsert_users_from_legacy_publication_trigger(plpy, td):
"""A compatibility trigger to upsert users from legacy persons table."""
modified_state = "OK"
authors = td['new']['authors'] and td['new']['authors'] or []
maintainers = td['new']['maintainers'] and td['new']['maintainers'] or []
licensors = td['new']['licensors'] and td['new']['licensors'] or []
is_legacy_publication = td['new']['version'] is not None
if not is_legacy_publication:
return modified_state
# Upsert all roles into the users table.
users = []
users.extend(authors)
users.extend(maintainers)
users.extend(licensors)
users = list(set(users))
plan = plpy.prepare("""\
SELECT username FROM users WHERE username = any($1)""",
['text[]'])
existing_users = set([r['username'] for r in plpy.execute(plan, (users,))])
new_users = set(users).difference(existing_users)
for username in new_users:
plan = plpy.prepare("""\
INSERT INTO users (username, first_name, last_name, full_name, title)
SELECT personid, firstname, surname, fullname, honorific
FROM persons where personid = $1""", ['text'])
plpy.execute(plan, (username,))
return modified_state
[docs]def insert_users_for_optional_roles_trigger(plpy, td):
"""Trigger to update users from optional roles entries.
A compatibility trigger to insert users from moduleoptionalroles
records. This is primarily for legacy compatibility, but it is not
possible to tell whether the entry came from legacy or cnx-publishing.
Therefore, we only insert into users.
"""
modified_state = "OK"
users = td['new']['personids'] and td['new']['personids'] or []
plan = plpy.prepare("""\
SELECT username FROM users WHERE username = any($1)""",
['text[]'])
existing_users = set([r['username'] for r in plpy.execute(plan, (users,))])
new_users = set(users).difference(existing_users)
for username in new_users:
plan = plpy.prepare("""\
INSERT INTO users (username, first_name, last_name, full_name, title)
SELECT personid, firstname, surname, fullname, honorific
FROM persons where personid = $1""", ['text'])
plpy.execute(plan, (username,))
return modified_state
[docs]def add_module_file(plpy, td):
"""Database trigger for adding a module file.
When a legacy ``index.cnxml`` is added, this trigger
transforms it into html and stores it as ``index.cnxml.html``.
When a cnx-publishing ``index.cnxml.html`` is added, this trigger
checks if ``index.html.cnxml`` exists before
transforming it into cnxml and stores it as ``index.html.cnxml``.
Note, we do not use ``index.html`` over ``index.cnxml.html``, because
legacy allows users to name files ``index.html``.
"""
module_ident = td['new']['module_ident']
filename = td['new']['filename']
msg = "produce {}->{} for module_ident = {}"
def check_for(filenames, module_ident):
"""Find filenames associated with module_ident.
Check for a file at ``filename`` associated with
module at ``module_ident``.
"""
stmt = plpy.prepare("""\
SELECT TRUE AS exists FROM module_files
WHERE filename = $1 AND module_ident = $2""", ['text', 'integer'])
any_exist = False
for filename in filenames:
result = plpy.execute(stmt, [filename, module_ident])
try:
exists = result[0]['exists']
except IndexError:
exists = False
any_exist = any_exist or exists
return any_exist
# Declare the content producer function variable,
# because it is possible that it will not be assigned.
producer_func = None
if filename == 'index.cnxml':
new_filenames = ('index.cnxml.html',)
# Transform content to html.
other_exists = check_for(new_filenames, module_ident)
if not other_exists:
msg = msg.format('cnxml', 'html', module_ident)
producer_func = produce_html_for_module
elif filename == 'index.cnxml.html':
new_filenames = ('index.html.cnxml', 'index.cnxml',)
# Transform content to cnxml.
other_exists = check_for(new_filenames, module_ident)
if not other_exists:
msg = msg.format('html', 'cnxml', module_ident)
producer_func = produce_cnxml_for_module
else:
# Not one of the special named files.
return # skip
plpy.info(msg)
if producer_func is not None:
producer_func(plpy,
module_ident,
source_filename=filename,
destination_filenames=new_filenames)
_transform_abstract(plpy, module_ident)
return
def _transform_abstract(plpy, module_ident):
"""Transform abstract, bi-directionally.
Transforms an abstract using one of content columns
('abstract' or 'html') to determine which direction the transform
will go (cnxml->html or html->cnxml).
A transform is done on either one of them to make
the other value. If no value is supplied, the trigger raises an error.
If both values are supplied, the trigger will skip.
"""
plan = plpy.prepare("""\
SELECT a.abstractid, a.abstract, a.html
FROM modules AS m NATURAL JOIN abstracts AS a
WHERE m.module_ident = $1""", ('integer',))
result = plpy.execute(plan, (module_ident,), 1)[0]
abstractid, cnxml, html = (
result['abstractid'], result['abstract'], result['html'])
if cnxml is not None and html is not None:
return # skip
# TODO Prevent blank abstracts (abstract = null & html = null).
msg = "produce {}->{} for abstractid={}"
if cnxml is None:
# Transform html->cnxml
msg = msg.format('html', 'cnxml', abstractid)
content = html
column = 'abstract'
transform_func = transform_abstract_to_cnxml
else:
# Transform cnxml->html
msg = msg.format('cnxml', 'html', abstractid)
content = cnxml
column = 'html'
transform_func = transform_abstract_to_html
content, messages = transform_func(content, module_ident, plpy)
plan = plpy.prepare(
"UPDATE abstracts SET {} = $1 WHERE abstractid = $2".format(column),
('text', 'integer'))
plpy.execute(plan, (content, abstractid,))
return msg
def get_collection_tree(collection_ident, cursor):
"""Build and retrieve json tree representation of a book."""
cursor.execute('''
WITH RECURSIVE t(node, parent, document, path) AS (
SELECT tr.nodeid, tr.parent_id, tr.documentid, ARRAY[tr.nodeid]
FROM trees tr
WHERE tr.documentid = %s and tr.is_collated = 'False'
UNION ALL
SELECT c.nodeid, c.parent_id, c.documentid, path || ARRAY[c.nodeid]
FROM trees c JOIN t ON c.parent_id = t.node
WHERE NOT c.nodeid = ANY(t.path)
)
SELECT t.document, m.portal_type
FROM t JOIN modules m
ON t.document = m.module_ident''', [collection_ident])
for i in cursor.fetchall():
yield i
def get_module_can_publish(cursor, id):
"""Return userids allowed to publish this book."""
cursor.execute("""
SELECT DISTINCT user_id
FROM document_acl
WHERE uuid = %s AND permission = 'publish'""", (id,))
return [i[0] for i in cursor.fetchall()]