class Manager:
"""
A manager for facilitating the registration of migration functions
and applying those migrations to documents.
"""
version_attribute_name = 'version'
_upgrade_funcs = set()
def __init__(self, target_version):
self.target_version = target_version
@classmethod
def register(cls, func):
"""
Decorate a migration function with this method
to make it available for migrating cases.
"""
cls._add_version_info(func)
cls._upgrade_funcs.add(func)
return func
Implementation (2)
class Manager:
def migrate_doc(self, doc):
"""
Migrate the doc from its current version to the target version
and return it.
"""
orig_ver = doc.get(self.version_attribute_name, 0)
funcs = self._get_migrate_funcs(orig_ver, self.target_version)
for func in funcs:
func(self, doc)
doc[self.version_attribute_name] = func.target
return doc
@classmethod
def _get_migrate_funcs(cls, orig_version, target_version):
direction = 1 if target_version > orig_version else -1
versions = range(orig_version, target_version + direction, direction)
transitions = recipes.pairwise(versions)
return itertools.starmap(cls._get_func, transitions)
// Step 4: Tag each shard key range
sh.addTagRange(
"yougov.interviews", // collection namespace
{"region": "EMEA", "survey_id": MinKey}, // min value
{"region": "EMEA", "survey_id": MaxKey}, // max value
"EMEA" // tag
);
sh.addTagRange(
"yougov.interviews", // collection namespace
{"region": "US", "survey_id": MinKey}, // min value
{"region": "US", "survey_id": MaxKey}, // max value
"US" // tag
);
(Mongo) Database-as-a-Service
mongos instance running on each app server
applications connect to mongodb://localhost/
Read Preference "nearest"
provides low-latency reads when using geographically distributed replica sets
from pymongo import MongoClient, ReadPreference
# set read preference on the client
mongo = MongoClient('localhost',
read_preference=ReadPreference.NEAREST)
# ...or on an individual database
mongo = MongoClient('localhost')
db = mongo.get_database('dragoman',
read_preference=ReadPreference.NEAREST)
Database Creation
def create_db_in_shard(db_name, shard, client=None):
"""
In a sharded cluster, create a database in a particular shard.
"""
client = client or pymongo.MongoClient()
# flush the router config to ensure it's not stale
res = client.admin.command('flushRouterConfig')
if not res.get('ok'):
raise RuntimeError("unable to flush router config")
if shard not in get_ids(client.config.shards):
raise ValueError(nf("Unknown shard {shard}"))
if db_name in get_ids(client.config.databases):
raise ValueError("database already exists")
# MongoDB doesn't have a 'create database' command, so insert an
# item into a collection and then drop the collection.
client[db_name].foo.insert({'foo': 1})
client[db_name].foo.drop()
if client[db_name].collection_names():
raise ValueError("database has collections")
res = client.admin.command('movePrimary', value=db_name, to=shard)
if not res.get('ok'):
raise RuntimeError(str(res))
return nf("Successfully created {db_name} in {shard} via {hostname}")
Multi-Dimensional Partitioning
still partition for archival
managed programmatically
seek additional layers of partitioning
Partitioning Motives
locality
distribution (size)
distribution (performance)
storage
Predictions
migrate with whimsy
all apps in global cluster
Wishes
Supply shard hint, independent from query SERVER-11991
Generalized custom shard logic, independent from data in records
Oplog-only replica for creating new replicas Server-14539
Embedded solution (sqlite)
Conclusions
If you have dynamically changing data, choose a schemaless database. Take advantage of the schemaless aspects and migrate with whimsy.
When your company is successful and international, use tag-aware sharding to achieve partitioning of your data without losing control, but let MongoDB manage the abstraction.