From de800fa4e243b7e3b9e895be778364ff0e5dff5f Mon Sep 17 00:00:00 2001 From: Steven Silvester Date: Thu, 9 Dec 2021 13:31:36 -0600 Subject: [PATCH] initial try for binder setup --- binder/PORT | 1 + binder/VERSION | 1 + binder/environment.yml | 16 ++ binder/init | 13 + binder/mongo | 8 + binder/postBuild | 14 ++ notebooks/MongoClient.ipynb | 463 ++++++++++++++++++++++++++++++++++++ 7 files changed, 516 insertions(+) create mode 100644 binder/PORT create mode 100644 binder/VERSION create mode 100644 binder/environment.yml create mode 100644 binder/init create mode 100644 binder/mongo create mode 100644 binder/postBuild create mode 100644 notebooks/MongoClient.ipynb diff --git a/binder/PORT b/binder/PORT new file mode 100644 index 000000000..b1dc6500a --- /dev/null +++ b/binder/PORT @@ -0,0 +1 @@ +27017 \ No newline at end of file diff --git a/binder/VERSION b/binder/VERSION new file mode 100644 index 000000000..64b5ae393 --- /dev/null +++ b/binder/VERSION @@ -0,0 +1 @@ +4.4.0 \ No newline at end of file diff --git a/binder/environment.yml b/binder/environment.yml new file mode 100644 index 000000000..6374bd498 --- /dev/null +++ b/binder/environment.yml @@ -0,0 +1,16 @@ +name: pymongo + +channels: + - conda-forge + - nodefaults + +dependencies: + # runtime dependencies + - python >=3.6,<3.11.0 + - jupyterlab >=3.0.0,<4.0.0 + - pip + - pymongo >=4 + - mongodb + - nodejs + - pip: + - mtools \ No newline at end of file diff --git a/binder/init b/binder/init new file mode 100644 index 000000000..c381fca4e --- /dev/null +++ b/binder/init @@ -0,0 +1,13 @@ +VERSION=`cat VERSION` +PORT=`cat PORT` +MONGODB_BIN=`m bin $VERSION` +mlaunch init --replicaset --name repl0 --nodes 3 --binarypath \ +$MONGODB_BIN --port $PORT --hostname localhost --setParameter \ +enableTestCommands=1 +mongo +VERSION=`cat VERSION` +PORT=`cat PORT` +MONGODB_BIN=`m bin $VERSION` +CMD="$MONGODB_BIN/mongo mongodb://localhost:$PORT/?replicaSet=repl0" +echo "$CMD" +$CMD \ No newline at end of file diff --git a/binder/mongo b/binder/mongo new file mode 100644 index 000000000..37e43d292 --- /dev/null +++ b/binder/mongo @@ -0,0 +1,8 @@ +VERSION=`cat VERSION` +PORT=`cat PORT` +MONGODB_BIN=`m bin $VERSION` +CMD="$MONGODB_BIN/mongo --ssl --sslCAFile \ +--sslPEMKeyFile \ +mongodb://localhost:$PORT/?replicaSet=repl0" +echo "$CMD" +$CMD diff --git a/binder/postBuild b/binder/postBuild new file mode 100644 index 000000000..5786d6827 --- /dev/null +++ b/binder/postBuild @@ -0,0 +1,14 @@ +#!/usr/bin/env bash +set -eux + +npm install -g m + +pip install -e . + +m 4.4.0 + +mkdir 440_psa_tls + +cd binder + +./init & \ No newline at end of file diff --git a/notebooks/MongoClient.ipynb b/notebooks/MongoClient.ipynb new file mode 100644 index 000000000..d5a928c9d --- /dev/null +++ b/notebooks/MongoClient.ipynb @@ -0,0 +1,463 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "

MongoClient

" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "········\n" + ] + } + ], + "source": [ + "from pymongo import MongoClient\n", + "from getpass import getpass\n", + "uri = \"mongodb+srv://silvester@mflix.qu46j.mongodb.net/test\"\n", + "password = getpass()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "notes" + } + }, + "source": [ + "The MongoClient constructor accepts many different arguments to configure how the driver connects to MongoDB and how many operations will be performed. We'll look at the most basic configuration first, which is passing the SRV string of our Atlas cluster to MongoClient." + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "outputs": [], + "source": [ + "client = MongoClient(uri, password=password)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Database(MongoClient(host=['mflix-shard-00-00.qu46j.mongodb.net:27017', 'mflix-shard-00-01.qu46j.mongodb.net:27017', 'mflix-shard-00-02.qu46j.mongodb.net:27017'], document_class=dict, tz_aware=False, connect=True, authsource='admin', replicaset='atlas-13a6ky-shard-0', ssl=True), 'stats')" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "client.stats" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "notes" + } + }, + "source": [ + "Note that because we're using an Atlas SRV string, we got an SSL connection for free! It also defaults the **authSource** to the **admin** database.\n", + "\n", + "Now that we've connected to our **mongod**, we can create a database handle. Let's look at the available databases." + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "['sample_airbnb',\n", + " 'sample_analytics',\n", + " 'sample_geospatial',\n", + " 'sample_mflix',\n", + " 'sample_restaurants',\n", + " 'sample_supplies',\n", + " 'sample_training',\n", + " 'sample_weatherdata',\n", + " 'admin',\n", + " 'local']" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "client.list_database_names()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "notes" + } + }, + "source": [ + "Let's use the **sample_mflix** database. One useful property of a MongoClient object is we can use property accessors" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "['sessions', 'comments', 'movies', 'theaters', 'users']" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mflix = client.sample_mflix\n", + "mflix.list_collection_names()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "notes" + } + }, + "source": [ + "or we can use dictionary accessors" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "['sessions', 'comments', 'movies', 'theaters', 'users']" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mflix = client['sample_mflix']\n", + "mflix.list_collection_names()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "notes" + } + }, + "source": [ + "Now that we have a database object and have listed available collections, let's create a collection object. As with the database object, we can use either property or dictionary accessors." + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "outputs": [], + "source": [ + "movies = mflix.movies" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "notes" + } + }, + "source": [ + "And let's perform a query on our movies collection. We'll just get the count of documents in the collection." + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": { + "scrolled": true, + "slideshow": { + "slide_type": "subslide" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "23530" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "movies.count_documents({})" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[\n", + " {\n", + " \"title\": \"Roadracers\"\n", + " },\n", + " {\n", + " \"title\": \"Midaq Alley\"\n", + " },\n", + " {\n", + " \"title\": \"Desperado\"\n", + " },\n", + " {\n", + " \"title\": \"Fools Rush In\"\n", + " },\n", + " {\n", + " \"title\": \"The Hunchback\"\n", + " },\n", + " {\n", + " \"title\": \"54\"\n", + " },\n", + " {\n", + " \"title\": \"Frida\"\n", + " },\n", + " {\n", + " \"title\": \"Wild Wild West\"\n", + " },\n", + " {\n", + " \"title\": \"No One Writes to the Colonel\"\n", + " },\n", + " {\n", + " \"title\": \"54\"\n", + " },\n", + " {\n", + " \"title\": \"In the Time of the Butterflies\"\n", + " },\n", + " {\n", + " \"title\": \"Once Upon a Time in Mexico\"\n", + " },\n", + " {\n", + " \"title\": \"After the Sunset\"\n", + " },\n", + " {\n", + " \"title\": \"Ask the Dust\"\n", + " },\n", + " {\n", + " \"title\": \"Lonely Hearts\"\n", + " },\n", + " {\n", + " \"title\": \"Puss in Boots\"\n", + " },\n", + " {\n", + " \"title\": \"The Prophet\"\n", + " },\n", + " {\n", + " \"title\": \"Here Comes the Boom\"\n", + " },\n", + " {\n", + " \"title\": \"Here Comes the Boom\"\n", + " },\n", + " {\n", + " \"title\": \"As Luck Would Have It\"\n", + " },\n", + " {\n", + " \"title\": \"Tale of Tales\"\n", + " }\n", + "]\n" + ] + } + ], + "source": [ + "cursor = movies.find( { \"cast\": \"Salma Hayek\"}, { \"title\": 1, \"_id\": 0 })\n", + "import bson\n", + "from bson.json_util import dumps\n", + "print(dumps(cursor, indent=2))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "notes" + } + }, + "source": [ + "The MongoClient constructor also accepts many optional keyword parameters. We can set the maximum connection pool, default read and write concerns, whether to retry writes, configuring SSL, authentication, and much more.\n", + "\n", + "A full list and how to use MongoClient for more advanced use cases is available [here](http://api.mongodb.com/python/current/api/pymongo/mongo_client.html)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "Here is an example setting the **connectTimeoutMS** to 200 milliseconds, how long the driver will allow attempt to connect before erroring, and setting **retryWrites** to True, signaling to the driver to retry a write in the event of a network error." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [], + "source": [ + "client = MongoClient(uri, connectTimeoutMS=200, retryWrites=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [], + "source": [ + "client.stats" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "## Summary\n", + "\n", + "* MongoClient accepts many optional keyword arguments to fine-tune your connection.\n", + "* After instantiating the client, databases handles can be created via property or dictionary accessors on the client object.\n", + "* Collections handles are referenced from the database object.\n", + "* Collection specific operations like querying or updating documents are performed on the collection object." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.7" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}