python-tools/advancedhttpserver.py

#!/usr/bin/env python
"""
advanced HTTP Server With Upload, ranged get requests, post requests, basic auth and tls
this builds on simple http server

Author: jens (gitea.caret.be)

you'll need `pip install pyOpenSSL` for now if you want the ssl feature

This migth depend on rust compiler and wheel

pip3 install wheel
sudo dnf install rust


license: gpl-v3
"""


import os
import posixpath
import http.server
from http.server import SimpleHTTPRequestHandler, HTTPServer
import urllib.parse
import html
import mimetypes
import re
import io
import ssl
import base64
from getpass import getpass

BYTE_RANGE_RE = re.compile(r'bytes=(\d+)-(\d+)?$')
KEYFILE = 'yourkeyfile.key'

BASIC_AUTH = None


def copy_byte_range(infile, outfile, start=None, stop=None, bufsize=16*1024):
    '''Like shutil.copyfileobj, but only copy a range of the streams.

    Both start and stop are inclusive.
    '''
    if start is not None:
        infile.seek(start)
    while 1:
        to_read = min(bufsize, stop + 1 - infile.tell() if stop else bufsize)
        buf = infile.read(to_read)
        if not buf:
            break
        outfile.write(buf)


def parse_byte_range(byte_range):
    '''Returns the two numbers in 'bytes=123-456' or throws ValueError.

    The last number or both numbers may be None.
    '''
    if byte_range.strip() == '':
        return None, None

    m = BYTE_RANGE_RE.match(byte_range)
    if not m:
        raise ValueError('Invalid byte range %s' % byte_range)

    first, last = [x and int(x) for x in m.groups()]
    if last and last < first:
        raise ValueError('Invalid byte range %s' % byte_range)
    return first, last


class RangeRequestHandler(SimpleHTTPRequestHandler):

    """Simple HTTP request handler with GET/HEAD/POST commands.
    This serves files from the current directory and any of its
    subdirectories.  The MIME type for files is determined by
    calling the .guess_type() method. And can reveive file uploaded
    by client.
    The GET/HEAD/POST requests are identical except that the HEAD
    request omits the actual contents of the file.
    """

    server_version = "rangeHTTPWithUpload/1"

    def do_GET(self):
        """Serve a GET request."""
        f = self.send_head()
        if f:
            self.copyfile(f, self.wfile)
            f.close()

    def do_HEAD(self):
        """Serve a HEAD request."""
        f = self.send_head()
        if f:
            f.close()

    def do_POST(self):
        """Serve a POST request."""
        r, info = self.deal_post_data()
        print(r, info, "by: ", self.client_address)
        f = io.BytesIO()
        f.write(b'<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">')
        f.write(b"<html>\n<title>Upload Result Page</title>\n")
        f.write(b"<body>\n<h2>Upload Result Page</h2>\n")
        f.write(b"<hr>\n")
        if r:
            f.write(b"<strong>Success:</strong>")
        else:
            f.write(b"<strong>Failed:</strong>")
        f.write(info)
        f.write(b"<br><a href=\"%s\">back</a>" % self.headers['referer'].encode())
        f.tell()
        f.seek(0)
        self.send_head()
        if f:
            self.copyfile(f, self.wfile)
            f.close()

    def deal_post_data(self):
        boundary = self.headers.get_boundary()
        boundary = boundary.encode()
        remainbytes = int(self.headers['content-length'])
        line = self.rfile.readline()
        remainbytes -= len(line)
        if boundary not in line:
            return (False, b"Content NOT begin with boundary")
        line = self.rfile.readline()
        remainbytes -= len(line)
        fn = re.findall(r'Content-Disposition.*name="file"; filename="(.*)"', line.decode("utf-8"))
        if not fn:
            return (False, b"Can't find out file name...")
        path = self.translate_path(self.path)
        fn = os.path.join(path, os.path.basename(fn[0]))
        line = self.rfile.readline()
        remainbytes -= len(line)
        line = self.rfile.readline()
        remainbytes -= len(line)
        try:
            out = open(fn, 'wb')
        except IOError:
            return (False, b"Can't create file to write, do you have permission to write?")

        preline = self.rfile.readline()
        remainbytes -= len(preline)
        while remainbytes > 0:
            line = self.rfile.readline()
            remainbytes -= len(line)
            if boundary in line:
                preline = preline[0:-1]
                if preline.endswith(b'\r'):
                    preline = preline[0:-1]
                out.write(preline)
                out.close()
                return (True, b"File '%s' upload success!" % fn.encode())
            else:
                out.write(preline)
                preline = line
        return (False, b"Unexpect Ends of data.")

    def send_head(self):
        """Common code for GET and HEAD commands.
        This sends the response code and MIME headers.
        Return value is either a file object (which has to be copied
        to the outputfile by the caller unless the command was HEAD,
        and must be closed by the caller under all circumstances), or
        None, in which case the caller has nothing further to do.
        """
        # check authentication
        if self.headers.get('Authorization') != 'Basic %s' % BASIC_AUTH.decode():
            self.send_response(401)
            self.send_header('WWW-Authenticate', 'Basic realm=\"Test\"')
            self.send_header('Content-type', 'text/html')
            self.end_headers()
            return None

        self.range = None
        first = last = None
        if 'Range' in self.headers:
            try:
                self.range = parse_byte_range(self.headers['Range'])
            except ValueError:
                self.send_error(400, 'Invalid byte range')
                return None
            first, last = self.range
        path = self.translate_path(self.path)
        if path.endswith(KEYFILE):
            self.send_error(403, "You can't download the ssl keyfile")

        f = None
        if os.path.isdir(path):
            if not self.path.endswith('/'):
                # redirect browser - doing basically what apache does
                self.send_response(301)
                self.send_header("Location", self.path + "/")
                self.end_headers()
                return None
            for index in "index.html", "index.htm":
                index = os.path.join(path, index)
                if os.path.exists(index):
                    path = index
                    break
            else:
                return self.list_directory(path)
        ctype = self.guess_type(path)
        try:
            # Always read in binary mode. Opening files in text mode may cause
            # newline translations, making the actual size of the content
            # transmitted *less* than the content-length!
            f = open(path, 'rb')
        except IOError:
            self.send_error(404, "File not found")
            return None
        except ValueError:
            self.send_error(418, "I'm a teapot")
            return None

        fs = os.fstat(f.fileno())
        file_len = fs[6]
        if first and first >= file_len:
            self.send_error(416, 'Requested Range Not Satisfiable')
            return None

        if self.range:
            self.send_response(206)
            self.send_header('Accept-Ranges', 'bytes')
            if last is None or last >= file_len:
                last = file_len - 1
            response_length = last - first + 1

            self.send_header('Content-Range', 'bytes %s-%s/%s' % (first, last, file_len))

        else:
            self.send_response(200)
            response_length = file_len
        self.send_header("Content-type", ctype)
        self.send_header('Content-Length', str(response_length))
        self.send_header("Last-Modified", self.date_time_string(fs.st_mtime))
        self.end_headers()
        return f

    def list_directory(self, path):
        """Helper to produce a directory listing (absent index.html).
        Return value is either a file object, or None (indicating an
        error).  In either case, the headers are sent, making the
        interface the same as for send_head().
        """
        try:
            list = os.listdir(path)
        except os.error:
            self.send_error(404, "No permission to list directory")
            return None
        list.sort(key=lambda a: a.lower())
        f = io.BytesIO()
        displaypath = html.escape(urllib.parse.unquote(self.path)).encode()
        f.write(b'<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">')
        f.write(b"<html>\n<title>Directory listing for %s</title>\n" % displaypath)
        f.write(b"<body>\n<h2>Directory listing for %s</h2>\n" % displaypath)
        f.write(b"<hr>\n")
        f.write(b"<form ENCTYPE=\"multipart/form-data\" method=\"post\">")
        f.write(b"<input name=\"file\" type=\"file\"/>")
        f.write(b"<input type=\"submit\" value=\"upload\"/></form>\n")
        f.write(b"<hr>\n<ul>\n")
        for name in list:
            fullname = os.path.join(path, name)
            displayname = linkname = name
            # Append / for directories or @ for symbolic links
            if os.path.isdir(fullname):
                displayname = name + "/"
                linkname = name + "/"
            if os.path.islink(fullname):
                displayname = name + "@"
                # Note: a link to a directory displays with @ and links with /
            f.write(b'<li><a href="%s">%s</a>\n'
                    % (urllib.parse.quote(linkname).encode(), html.escape(displayname).encode()))
        f.write(b"</ul>\n<hr>\n</body>\n</html>\n")
        length = f.tell()
        f.seek(0)
        self.send_response(200)
        self.send_header("Content-type", "text/html")
        self.send_header("Content-Length", str(length))
        self.end_headers()
        return f

    def translate_path(self, path):
        """Translate a /-separated PATH to the local filename syntax.
        Components that mean special things to the local file system
        (e.g. drive or directory names) are ignored.  (XXX They should
        probably be diagnosed.)
        """
        # abandon query parameters
        path = path.split('?', 1)[0]
        path = path.split('#', 1)[0]
        path = posixpath.normpath(urllib.parse.unquote(path))
        words = path.split('/')
        words = [_f for _f in words if _f]
        path = os.getcwd()
        for word in words:
            drive, word = os.path.splitdrive(word)
            head, word = os.path.split(word)
            if word in (os.curdir, os.pardir):
                continue
            path = os.path.join(path, word)
        return path

    def copyfile(self, source, outputfile):
        """Copy all data between two file objects.
        The SOURCE argument is a file object open for reading
        (or anything with a read() method) and the DESTINATION
        argument is a file object open for writing (or
        anything with a write() method).
        The only reason for overriding this would be to change
        the block size or perhaps to replace newlines by CRLF
        -- note however that this the default server uses this
        to copy binary data as well.
        """
        if not self.range:
            return copy_byte_range(source, outputfile)

        # SimpleHTTPRequestHandler uses shutil.copyfileobj, which doesn't let
        # you stop the copying before the end of the file.
        start, stop = self.range  # set in send_head()
        return copy_byte_range(source, outputfile, start, stop)

    def guess_type(self, path):
        """Guess the type of a file.
        Argument is a PATH (a filename).
        Return value is a string of the form type/subtype,
        usable for a MIME Content-type header.
        The default implementation looks the file's extension
        up in the table self.extensions_map, using application/octet-stream
        as a default; however it would be permissible (if
        slow) to look inside the data to make a better guess.
        """

        base, ext = posixpath.splitext(path)
        if ext in self.extensions_map:
            return self.extensions_map[ext]
        ext = ext.lower()
        if ext in self.extensions_map:
            return self.extensions_map[ext]
        else:
            return self.extensions_map['']

    if not mimetypes.inited:
        mimetypes.init()  # try to read system mime.types
    extensions_map = mimetypes.types_map.copy()
    extensions_map.update({
        '': 'application/octet-stream',  # Default
        '.py': 'text/plain',
        '.c': 'text/plain',
        '.h': 'text/plain',
        })


def test(HandlerClass=RangeRequestHandler):
    http.server.test(HandlerClass)


def runssl(keyfile):
    from OpenSSL.crypto import load_certificate, FILETYPE_PEM

    httpd = HTTPServer(('0.0.0.0', 4443), RangeRequestHandler)
    httpd.socket = ssl.wrap_socket(httpd.socket, server_side=True, certfile=keyfile)
    print('visit https://0.0.0.0:4443/')

    cert_file_string = open(keyfile, "rb").read()
    cert = load_certificate(FILETYPE_PEM, cert_file_string)

    sha256_fingerprint = cert.digest("sha256")
    print('key fingerprint', sha256_fingerprint)
    httpd.serve_forever()


if __name__ == '__main__':
    # test()

    os.system('openssl req -new -x509 -keyout {0}  -out {0}  -days 365 -nodes -subj /CN=me,OU=no,O=acme,C=UK/'
              .format(KEYFILE))
    password = getpass('admin password?')
    BASIC_AUTH = base64.b64encode(b'admin:%s' % password.encode())
    try:
        runssl(KEYFILE)
    except KeyboardInterrupt:
        os.unlink(KEYFILE)
    print('done running')