Re: link indirection

"Daniel W. Connolly" <connolly@hal.com>
Errors-To: listmaster@www0.cern.ch
Date: Wed, 4 May 1994 17:52:54 +0200
Errors-To: listmaster@www0.cern.ch
Message-id: <9405041540.AA24521@ulua.hal.com>
Errors-To: listmaster@www0.cern.ch
Reply-To: connolly@hal.com
Originator: www-talk@info.cern.ch
Sender: www-talk@www0.cern.ch
Precedence: bulk
From: "Daniel W. Connolly" <connolly@hal.com>
To: Multiple recipients of list <www-talk@www0.cern.ch>
Subject: Re: link indirection 
X-Listprocessor-Version: 6.0c -- ListProcessor by Anastasios Kotsikonas
Content-Type: text/plain; charset="us-ascii"
Content-Type: text/plain; charset="us-ascii"
Mime-Version: 1.0
Mime-Version: 1.0
In message <199405041523.AA03204@RA.DEPT.CS.YALE.EDU>, Stan Letovsky writes:
>into my code.  I looked at Mosaic-2.4, and libwww2/HTTP.c seems to be
>the relevant piece, but it depends on a lot of other code in this
>module, and I would rather not have to include all of it if I
>don't have to.

You noticed this too, huh...

> Is there a bare-bones description of the client-side
>of the httpd protocol anywhere?

Lately, python is my favorite quick-and-dirty, but
precise programming language.

The cool think about python is that it's pretty easy
to grok, right out of the box. See for yourself:

(I didn't write this...guido did)

>From http://www.cwi.nl/~guido/python-src/Lib/httplib.py


# HTTP client class
#
# See the following document for a tentative protocol description:
#     Hypertext Transfer Protocol (HTTP)        Tim Berners-Lee, CERN
#     Internet Draft                                       5 Nov 1993
#     draft-ietf-iiir-http-00.txt                  Expires 5 May 1994
#
# Example:
#
# >>> from httplib import HTTP
# >>> h = HTTP('www.cwi.nl')
# >>> h.putreqest('GET', '/index.html')
# >>> h.putheader('Accept', 'text/html')
# >>> h.putheader('Accept', 'text/plain')
# >>> errcode, errmsg, headers = h.getreply()
# >>> if errcode == 200:
# ...     f = h.getfile()
# ...     print f.read() ## Print the raw HTML
# ...
# <TITLE>Home Page of CWI, Amsterdam</TITLE>
# [...many more lines...]
# >>>
#
# Note that an HTTP object is used for a single request -- to issue a
# second request to the same server, you create a new HTTP object.
# (This is in accordance with the protocol, which uses a new TCP
# connection for each request.)


import os
import socket
import string
import regex
import regsub
import rfc822

HTTP_VERSION = 'HTTP/1.0'
HTTP_PORT = 80

replypat = regsub.gsub('\\.', '\\\\.', HTTP_VERSION) + \
          '[ \t]+\([0-9][0-9][0-9]\)\(.*\)'
replyprog = regex.compile(replypat)

class HTTP:

        def __init__(self, *args):
                self.debuglevel = 0
                if args: apply(self.connect, args)

        def set_debuglevel(self, debuglevel):
                self.debuglevel = debuglevel

        def connect(self, host, *args):
                if args:
                        if args[1:]: raise TypeError, 'too many args'
                        port = args[0]
                else:
                        i = string.find(host, ':')
                        port = None
                        if i >= 0:
                                host, port = host[:i], host[i+1:]
                                try: port = string.atoi(port)
                                except string.atoi_error: pass
                if not port: port = HTTP_PORT
                self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
                if self.debuglevel > 0: print 'connect:', (host, port)
                self.sock.connect(host, port)

        def send(self, str):
                if self.debuglevel > 0: print 'send:', `str`
                self.sock.send(str)

        def putrequest(self, request, selector):
                str = '%s %s %s\r\n' % (request, selector, HTTP_VERSION)
                self.send(str)

        def putheader(self, header, *args):
                str = '%s: %s\r\n' % (header, string.joinfields(args,'\r\n\t'))
                self.send(str)

        def endheaders(self):
                self.send('\r\n')

        def endrequest(self):
                if self.debuglevel > 0: print 'shutdown: 1'
                self.sock.shutdown(1)

        def getreply(self):
                self.endrequest()
                self.file = self.sock.makefile('r')
                line = self.file.readline()
                if self.debuglevel > 0: print 'reply:', `line`
                if replyprog.match(line) < 0:
                        self.headers = None
                        return -1, line, self.headers
                errcode, errmsg = replyprog.group(1, 2)
                errcode = string.atoi(errcode)
                errmsg = string.strip(errmsg)
                self.headers = rfc822.Message(self.file)
                return errcode, errmsg, self.headers

        def getfile(self):
                return self.file


def test():
        import sys
        import getopt
        opts, args = getopt.getopt(sys.argv[1:], 'd')
        dl = 0
        for o, a in opts:
                if o == '-d': dl = dl + 1
        host = 'www.cwi.nl:80'
        selector = '/index.html'
        if args[0:]: host = args[0]
        if args[1:]: selector = args[1]
        h = HTTP()
        h.set_debuglevel(dl)
        h.connect(host)
        h.putrequest('GET', selector)
        errcode, errmsg, headers = h.getreply()
        print 'errcode =', errcode
        print 'headers =', headers
        print 'errmsg  =', errmsg
        if headers:
                for header in headers.headers: print string.strip(header)
        print h.getfile().read()

if __name__ == '__main__':
        test()