2. HTTP using stdlib

2.1. http.HTTPStatus

2.1.1. Using statuses

from http import HTTPStatus

HTTPStatus.OK
HTTPStatus.OK == 200

HTTPStatus.OK.value
HTTPStatus.OK.phrase
HTTPStatus.OK.description

list(HTTPStatus)

2.1.2. Most common statuses

from http import HTTPStatus


HTTPStatus.OK                       # 200
HTTPStatus.CREATED                  # 201
HTTPStatus.MOVED_PERMANENTLY        # 301
HTTPStatus.FOUND                    # 302
HTTPStatus.BAD_REQUEST              # 400
HTTPStatus.UNAUTHORIZED             # 401
HTTPStatus.FORBIDDEN                # 403
HTTPStatus.METHOD_NOT_ALLOWED       # 405
HTTPStatus.NOT_FOUND                # 404
HTTPStatus.INTERNAL_SERVER_ERROR    # 500

2.1.3. All statuses

Table 2.12. http.HTTPStatus

Code

Description

100

CONTINUE

101

SWITCHING_PROTOCOLS

102

PROCESSING

200

OK

201

CREATED

202

ACCEPTED

203

NON_AUTHORITATIVE_INFORMATION

204

NO_CONTENT

205

RESET_CONTENT

206

PARTIAL_CONTENT

207

MULTI_STATUS

208

ALREADY_REPORTED

226

IM_USED

300

MULTIPLE_CHOICES

301

MOVED_PERMANENTLY

302

FOUND

303

SEE_OTHER

304

NOT_MODIFIED

305

USE_PROXY

307

TEMPORARY_REDIRECT

308

PERMANENT_REDIRECT

400

BAD_REQUEST

401

UNAUTHORIZED

402

PAYMENT_REQUIRED

403

FORBIDDEN

404

NOT_FOUND

405

METHOD_NOT_ALLOWED

406

NOT_ACCEPTABLE

407

PROXY_AUTHENTICATION_REQUIRED

408

REQUEST_TIMEOUT

409

CONFLICT

410

GONE

411

LENGTH_REQUIRED

412

PRECONDITION_FAILED

413

REQUEST_ENTITY_TOO_LARGE

414

REQUEST_URI_TOO_LONG

415

UNSUPPORTED_MEDIA_TYPE

416

REQUEST_RANGE_NOT_SATISFIABLE

417

EXPECTATION_FAILED

421

MISDIRECTED_REQUEST

422

UNPROCESSABLE_ENTITY

423

LOCKED

424

FAILED_DEPENDENCY

426

UPGRADE_REQUIRED

428

PRECONDITION_REQUIRED

429

TOO_MANY_REQUESTS

431

REQUEST_HEADER_FIELDS_TOO_LARGE

500

INTERNAL_SERVER_ERROR

501

NOT_IMPLEMENTED

502

BAD_GATEWAY

503

SERVICE_UNAVAILABLE

504

GATEWAY_TIMEOUT

505

HTTP_VERSION_NOT_SUPPORTED

506

VARIANT_ALSO_NEGOTIATES

507

INSUFFICIENT_STORAGE

508

LOOP_DETECTED

510

NOT_EXTENDED

511

NETWORK_AUTHENTICATION_REQUIRED

2.2. urllib

Listing 2.83. ściąganie danych z internetu, które trzeba rozpakować, Dane są w formacie TSV (tab separator values), można je rozpakować modułem CSV i podać jako delimiter='\t'
import os
import urllib.request
import zipfile

data_path = 'data'

os.makedirs(data_path, exist_ok=True)

url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/00228/smsspamcollection.zip'
file_name = url.split('/')[-1]
dest_file = os.path.join(data_path, file_name)

data_file = 'SMSSpamCollection'
data_full = os.path.join(data_path, data_file)

urllib.request.urlretrieve(url, dest_file)

with zipfile.ZipFile(dest_file) as zip_file:
    zip_file.extract(data_file, path=data_path)

2.3. http.server

Warning

http.server is not recommended for production. It only implements basic security checks.

2.3.1. Simple HTTP Server

$ python -m http.server 8000 --bind 127.0.0.1

2.3.2. Own HTTP Sever

import re
from http import HTTPStatus
from http.server import BaseHTTPRequestHandler
from http.server import HTTPServer

SERVER = ('localhost', 8080)


class RequestHandler(BaseHTTPRequestHandler):
    def do_HEAD(self):
        self.send_response(HTTPStatus.OK)
        self.send_header('Content-type', 'text/html')
        self.end_headers()

    def do_GET(self):
        self.send_response(HTTPStatus.OK)
        self.send_header('Content-type','text/html')
        self.end_headers()
        self.wfile.write('<html>')
        self.wfile.write('<body>Hello World!</body>')
        self.wfile.write('</html>')

    def do_POST(self):
        if re.search('/api/v1/*', self.path):
            content_length = int(self.headers['Content-Length'])
            post_data = self.rfile.read(content_length)

            self.send_response(HTTPStatus.OK)
            self.send_header('Content-type','text/html')
            self.end_headers()
            self.wfile.write('<html>')
            self.wfile.write('<body>Hello World!</body>')
            self.wfile.write('</html>')


try:
    print('Starting server {SERVER}, use <Ctrl-C> to stop')
    httpd = HTTPServer(SERVER, RequestHandler)
    httpd.serve_forever()

except KeyboardInterrupt:
    print ('^C received, shutting down the web server...')
    httpd.socket.close()

2.3.3. Threaded server with JSON response

  • http.server.ThreadingHTTPServer since Python 3.7

import json
from http import HTTPStatus
from http.server import ThreadingHTTPServer
from http.server import BaseHTTPRequestHandler


class RequestHandler(BaseHTTPRequestHandler):
    def do_GET(self):
        data = {
            'first_name': 'Jose',
            'last_name': 'Jimenez'
        }
        response = bytes(json.dumps(data), 'UTF-8')

        self.send_response(HTTPStatus.OK)
        self.send_header('Content-Type', 'application/json; charset=utf-8')
        self.end_headers()
        self.wfile.write(response)
        self.server.path = self.path


def run(host='127.0.0.1', port=8080):
    print(f'Starting server on {host}:{port}, use <Ctrl-C> to stop')
    httpd = ThreadingHTTPServer((host, port), RequestHandler)
    httpd.serve_forever()


if __name__ == '__main__':
    run()

2.4. http.client

2.4.1. Connecting

h1 = http.client.HTTPConnection('www.python.org')
h2 = http.client.HTTPConnection('www.python.org:80')
h3 = http.client.HTTPConnection('www.python.org', 80)
h4 = http.client.HTTPConnection('www.python.org', 80, timeout=10)

2.4.2. GET Request

import http.client


conn = http.client.HTTPSConnection("www.python.org")
conn.request("GET", "/")
response = conn.getresponse()

response.status         # 200
response.reason         # OK

data = response.read()  # This will return entire content.
conn.close()

2.4.3. GET Request in chunks

import http.client


conn = http.client.HTTPSConnection("www.python.org")
conn.request("GET", "/")
response = conn.getresponse()

# The following example demonstrates reading data in chunks.
while not response.closed:
    print(response.read(200))   # 200 bytes

2.4.4. GET Request to Not Existing Resource

import http.client


conn = http.client.HTTPSConnection("www.python.org")

conn.request("GET", "/parrot.spam")
response = conn.getresponse()

response.status  # 404
response.reason  # Not Found

data = response.read()
print(data)
# b'<!doctype html> ... </body>\n</html>\n'

conn.close()

2.4.5. HEAD Request

import http.client

conn = http.client.HTTPSConnection("www.python.org")

conn.request("HEAD", "/")
response = conn.getresponse()

response.status         # 200
response.reason         # OK

print(response.headers)
# Server: nginx
# Content-Type: text/html; charset=utf-8
# X-Frame-Options: SAMEORIGIN
# x-xss-protection: 1; mode=block
# X-Clacks-Overhead: GNU Terry Pratchett
# Via: 1.1 varnish
# Content-Length: 48925
# Accept-Ranges: bytes
# Date: Tue, 06 Nov 2018 11:06:52 GMT
# Via: 1.1 varnish
# Age: 599
# Connection: keep-alive
# X-Served-By: cache-iad2142-IAD, cache-fra19148-FRA
# X-Cache: HIT, HIT
# X-Cache-Hits: 1, 2
# X-Timer: S1541502413.680933,VS0,VE0
# Vary: Cookie
# Strict-Transport-Security: max-age=63072000; includeSubDomains

conn.close()

2.4.6. POST Request

import http.client
import urllib.parse


params = urllib.parse.urlencode({
    '@number': 12524,
    '@type': 'issue',
    '@action': 'show'
})

headers = {
    "Content-type": "application/x-www-form-urlencoded",
    "Accept": "text/plain"
}

conn = http.client.HTTPSConnection("bugs.python.org")
conn.request("POST", "/", params, headers)

response = conn.getresponse()

response.status  # 302
response.reason  # 'Found'

data = response.read()
print(data)
# b'Redirecting to <a href="https://bugs.python.org/issue12524">https://bugs.python.org/issue12524</a>'

conn.close()

2.4.7. Basic Auth

import http.client
import json
from http import HTTPStatus
from base64 import b64encode


USERNAME = 'my_username'
TOKEN = 'my_token'


auth = bytes(f'{USERNAME}:{TOKEN}', 'utf-8')
auth = b64encode(auth).decode('ascii')

headers = {
    'Authorization': f'Basic {auth}',
    'User-Agent': 'Python http.client',
    'Accept': 'application/json'
}

conn = http.client.HTTPSConnection(host="api.github.com", port=443)
conn.request("GET", "/users", headers=headers)

response = conn.getresponse()

if response.status == HTTPStatus.OK:
    body = response.read().decode()
    data = json.loads(body)
    print(data)

conn.close()

2.5. Assignments

2.5.1. REST API

  • Filename: http_github.py

  • Lines of code to write: 60 lines

  • Estimated time of completion: 60 min

  1. Załóż darmowe konto na Github i potwierdź email

  2. W przeglądarce internetowej wygeneruj w swoim profilu token https://github.com/settings/tokens

  3. Używając biblioteki standardowej w Pythonie

  4. Zaciągnij informacje o repozytoriach użytkownika Django na https://github.com

  5. Każdy request uwierzytelnij za pomocą Basic Auth i swojego Access Tokena

  6. Następnie z przeglądnij listę z poziomu Pythona i znajdź URL dla repozytorium django

  7. Przeglądnij to repozytorium i jego listę commitów

  8. Podaj datę i opis ostatniego commita

  9. Znajdź numery ID ticketów (Fixed #...) z issue trackera, które zostały rozwiązane w ostatnim miesiącu

The whys and wherefores
  • Komunikacja HTTP (request, response)

  • Parsowanie odpowiedzi HTTP

  • Sprawdzanie stanu połączenia

  • Serializacja i parsowanie JSON

  • Korzystanie z API i dokumentacji

  • Regexpy

  • Używanie biblioteki standardowej i bibliotek zewnętrznych

Hints
"name": "django",
"full_name": "django/django",

# wyszukaj "commits_url": ???
https://api.github.com/

GET /orgs/django/repos
GET /repos/django/django/commits
$ curl https://api.github.com/orgs/django/repos
$ curl https://api.github.com/repos/django/django/commits