Re: CR not stripped properly on FTP transfers

Bill Janssen <janssen@parc.xerox.com>
Message-id: <Eg6GyF0B0KGW9gefwF@holmes.parc.xerox.com>
Date: 	Fri, 11 Jun 1993 18:21:21 PDT
Sender: Bill Janssen <janssen@parc.xerox.com>
From: Bill Janssen <janssen@parc.xerox.com>
To: timbl@www3.cern.ch, Bill Janssen <janssen@parc.xerox.com>
Subject: Re: CR not stripped properly on FTP transfers
Cc: www-talk@nxoc01.cern.ch
In-reply-to: <0g6Fu48B0KGW5geexr@holmes.parc.xerox.com>
References: <0g6Fu48B0KGW5geexr@holmes.parc.xerox.com>
Yes, changing HTFTP to use image mode for transfers seems to work well. 
Now tar file transfers work OK with -source.

But note that this quick (and dirty) fix has its own problems.  In
particular, if I fetch a text file from a Mac FTP server (or any ftp
server that doesn't use LF in its new-line string), the file will
contain CR where there should be LF.  It's a shame we don't have two
different URL schemes for FTP:  one (say binaryftp:foo) would be for
binary documents, and the other (say, textftp:foo) would be for text
documents.

Here's my version of HTFTP.c/HTFTPLoad() [note that it also has patches
for our Internet gateway scheme, please ignore them]:

/*	Retrieve File from Server
**	-------------------------
**
** On entry,
**	name		WWW address of a file: document, including hostname
** On exit,
**	returns		Socket number for file if good.
**			<0 if bad.
*/
PUBLIC int HTFTPLoad
ARGS4 (
  CONST char *,			name,
  HTParentAnchor *,		anchor,
  HTFormat,			format_out,
  HTStream *,			sink
)
{
    BOOL isDirectory = NO;
    int status;
    int retry;			/* How many times tried? */
    HTFormat format;
    int transferType;
#define	ASCIITransferType	1
#define IMAGETransferType	2
    
    for (retry=0; retry<2; retry++) {	/* For timed out/broken connections */
    
	status = get_connection(name);
	if (status<0) return status;

#ifdef LISTEN
	status = get_listen_socket();
	if (status<0) return status;
    
#ifdef REPEAT_PORT
/*	Inform the server of the port number we will listen on
*/
	{
	    status = response(port_command);
	    if (status !=2) {		/* Could have timed out */
		if (status<0) continue;		/* try again - net error*/
		return -status;			/* bad reply */
	    }
	    if (TRACE) fprintf(stderr, "FTP: Port defined.\n");
	}
#endif
#else	/* Use PASV */
/*	Tell the server to be passive
*/
	{
	    char *p;
	    int reply, h0, h1, h2, h3, p0, p1;	/* Parts of reply */
	    status = response("PASV%c%c", CR, LF);
	    if (status !=2) {
		if (status<0) continue;		/* retry or Bad return */
		return -status;			/* bad reply */
	    }
	    for(p=response_text; *p; p++)
		if ((*p<'0')||(*p>'9')) *p = ' ';	/* Keep only digits */
	    status = sscanf(response_text, "%d%d%d%d%d%d%d",
		    &reply, &h0, &h1, &h2, &h3, &p0, &p1);
	    if (status<5) {
		if (TRACE) fprintf(stderr, "FTP: PASV reply has no inet address!\n");
		return -99;
	    }
	    passive_port = (p0<<8) + p1;
	    if (TRACE) fprintf(stderr, "FTP: Server is listening on port %d\n",
		    passive_port);
	}

/*	Open connection for data:
*/
	{
	    struct sockaddr_in soc_address;
	    int status = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
	    if (status<0) {
		(void) HTInetStatus("socket for data socket");
		return status;
	    }
	    data_soc = status;
	    
#ifdef IGATEWAY
	    Cannot use PASV with IGATEWAY with this code as written
#endif
	    soc_address.sin_addr.s_addr = control->addr;
	    soc_address.sin_port = htons(passive_port);
	    soc_address.sin_family = AF_INET;	    /* Family, host order  */
	    if (TRACE) fprintf(stderr,  
		"FTP: Data remote address is port %d, inet %d.%d.%d.%d\n",
			(unsigned int)ntohs(soc_address.sin_port),
			(int)*((unsigned char *)(&soc_address.sin_addr)+0),
			(int)*((unsigned char *)(&soc_address.sin_addr)+1),
			(int)*((unsigned char *)(&soc_address.sin_addr)+2),
			(int)*((unsigned char *)(&soc_address.sin_addr)+3));
    
	    status = connect(data_soc, (struct sockaddr*)&soc_address,
		    sizeof(soc_address));
	    if (status<0){
		(void) HTInetStatus("connect for data");
		NETCLOSE(data_soc);
		return status;			/* Bad return */
	    }
	    
	    if (TRACE) fprintf(stderr, "FTP data connected, socket %d\n", data_soc);
	}
#endif /* use PASV */
	status = 0;
        break;	/* No more retries */

    } /* for retries */
    if (status<0) return status;	/* Failed with this code */
    
/*	Ask for the file:
*/    
    {
        char *filename = HTParse(name, "", PARSE_PATH + PARSE_PUNCTUATION);
	char command[LINE_LENGTH+1];
	int retrieveStatus;
	
	if (!*filename) StrAllocCopy(filename, "/");
	sprintf(command, "TYPE I%c%c", CR, LF);
	status = response(command);
	if (status == 1)
	  transferType = IMAGETransferType;
	else
	  transferType = ASCIITransferType;
	sprintf(command, "RETR %s%c%c", filename, CR, LF);
	format = HTFileFormat(filename);
	status = response(command);
	if (status != 1) {  /* Failed : try to CWD to it */
	  if (transferType == IMAGETransferType)
	    /* change back to ASCII */
	    {
	      sprintf(command, "TYPE A%c%c", CR, LF);
	      status = response(command);
	      transferType = ASCIITransferType;
	    }
	  sprintf(command, "CWD %s%c%c", filename, CR, LF);
	  status = response(command);
	  if (status == 2) {  /* Successed : let's NAME LIST it */
	    isDirectory = YES;
	    sprintf(command, "NLST%c%c", CR, LF);
	    status = response (command);
	  }
	}
	free(filename);
	if (status != 1) return -status;		/* Action not started */
    }

#ifdef LISTEN
/*	Wait for the connection
*/
    {
	struct sockaddr_in soc_address;
        int	soc_addrlen=sizeof(soc_address);
	status = accept(master_socket,
			(struct sockaddr *)&soc_address,
			&soc_addrlen);
	if (status<0)
	    return HTInetStatus("accept");
	CTRACE(tfp, "TCP: Accepted new socket %d\n", status);
	data_soc = status;
    }
#else
/* @@ */
#endif
    if (isDirectory) {
	return read_directory (anchor, name, format_out, sink);
      /* returns HT_LOADED or error */
    } else {
	HTParseSocket(format, format_out,
		anchor, data_soc, sink);

	HTInitInput(control->socket);
	/* Reset buffering to control connection DD 921208 */
    
	status = NETCLOSE(data_soc);
	if (TRACE) fprintf(stderr, "FTP: Closing data socket %d\n", data_soc);
	if (status<0) (void) HTInetStatus("close");	/* Comment only */
	data_soc = -1;	/* invalidate it */
	
	status = response(NIL);		/* Pick up final reply */
	if (status!=2) return HTLoadError(sink, 500, response_text);

	if (transferType == IMAGETransferType)
	  /* change back to ASCII */
	  {
	    char command[10];

	    sprintf(command, "TYPE A%c%c", CR, LF);
	    status = response(command);
	  }
		
	return HT_LOADED;
    }       
} /* open_file_read */
/*	Retrieve File from Server
**	-------------------------
**
** On entry,
**	name		WWW address of a file: document, including hostname
** On exit,
**	returns		Socket number for file if good.
**			<0 if bad.
*/
PUBLIC int HTFTPLoad
ARGS4 (
  CONST char *,			name,
  HTParentAnchor *,		anchor,
  HTFormat,			format_out,
  HTStream *,			sink
)
{
    BOOL isDirectory = NO;
    int status;
    int retry;			/* How many times tried? */
    HTFormat format;
    int transferType;
#define	ASCIITransferType	1
#define IMAGETransferType	2
    
    for (retry=0; retry<2; retry++) {	/* For timed out/broken connections */
    
	status = get_connection(name);
	if (status<0) return status;

#ifdef LISTEN
	status = get_listen_socket();
	if (status<0) return status;
    
#ifdef REPEAT_PORT
/*	Inform the server of the port number we will listen on
*/
	{
	    status = response(port_command);
	    if (status !=2) {		/* Could have timed out */
		if (status<0) continue;		/* try again - net error*/
		return -status;			/* bad reply */
	    }
	    if (TRACE) fprintf(stderr, "FTP: Port defined.\n");
	}
#endif
#else	/* Use PASV */
/*	Tell the server to be passive
*/
	{
	    char *p;
	    int reply, h0, h1, h2, h3, p0, p1;	/* Parts of reply */
	    status = response("PASV%c%c", CR, LF);
	    if (status !=2) {
		if (status<0) continue;		/* retry or Bad return */
		return -status;			/* bad reply */
	    }
	    for(p=response_text; *p; p++)
		if ((*p<'0')||(*p>'9')) *p = ' ';	/* Keep only digits */
	    status = sscanf(response_text, "%d%d%d%d%d%d%d",
		    &reply, &h0, &h1, &h2, &h3, &p0, &p1);
	    if (status<5) {
		if (TRACE) fprintf(stderr, "FTP: PASV reply has no inet address!\n");
		return -99;
	    }
	    passive_port = (p0<<8) + p1;
	    if (TRACE) fprintf(stderr, "FTP: Server is listening on port %d\n",
		    passive_port);
	}

/*	Open connection for data:
*/
	{
	    struct sockaddr_in soc_address;
	    int status = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
	    if (status<0) {
		(void) HTInetStatus("socket for data socket");
		return status;
	    }
	    data_soc = status;
	    
#ifdef IGATEWAY
	    Cannot use PASV with IGATEWAY with this code as written
#endif
	    soc_address.sin_addr.s_addr = control->addr;
	    soc_address.sin_port = htons(passive_port);
	    soc_address.sin_family = AF_INET;	    /* Family, host order  */
	    if (TRACE) fprintf(stderr,  
		"FTP: Data remote address is port %d, inet %d.%d.%d.%d\n",
			(unsigned int)ntohs(soc_address.sin_port),
			(int)*((unsigned char *)(&soc_address.sin_addr)+0),
			(int)*((unsigned char *)(&soc_address.sin_addr)+1),
			(int)*((unsigned char *)(&soc_address.sin_addr)+2),
			(int)*((unsigned char *)(&soc_address.sin_addr)+3));
    
	    status = connect(data_soc, (struct sockaddr*)&soc_address,
		    sizeof(soc_address));
	    if (status<0){
		(void) HTInetStatus("connect for data");
		NETCLOSE(data_soc);
		return status;			/* Bad return */
	    }
	    
	    if (TRACE) fprintf(stderr, "FTP data connected, socket %d\n", data_soc);
	}
#endif /* use PASV */
	status = 0;
        break;	/* No more retries */

    } /* for retries */
    if (status<0) return status;	/* Failed with this code */
    
/*	Ask for the file:
*/    
    {
        char *filename = HTParse(name, "", PARSE_PATH + PARSE_PUNCTUATION);
	char command[LINE_LENGTH+1];
	int retrieveStatus;
	
	if (!*filename) StrAllocCopy(filename, "/");
	sprintf(command, "TYPE I%c%c", CR, LF);
	status = response(command);
	if (status == 1)
	  transferType = IMAGETransferType;
	else
	  transferType = ASCIITransferType;
	sprintf(command, "RETR %s%c%c", filename, CR, LF);
	format = HTFileFormat(filename);
	status = response(command);
	if (status != 1) {  /* Failed : try to CWD to it */
	  if (transferType == IMAGETransferType)
	    /* change back to ASCII */
	    {
	      sprintf(command, "TYPE A%c%c", CR, LF);
	      status = response(command);
	      transferType = ASCIITransferType;
	    }
	  sprintf(command, "CWD %s%c%c", filename, CR, LF);
	  status = response(command);
	  if (status == 2) {  /* Successed : let's NAME LIST it */
	    isDirectory = YES;
	    sprintf(command, "NLST%c%c", CR, LF);
	    status = response (command);
	  }
	}
	free(filename);
	if (status != 1) return -status;		/* Action not started */
    }

#ifdef LISTEN
/*	Wait for the connection
*/
    {
	struct sockaddr_in soc_address;
        int	soc_addrlen=sizeof(soc_address);
	status = accept(master_socket,
			(struct sockaddr *)&soc_address,
			&soc_addrlen);
	if (status<0)
	    return HTInetStatus("accept");
	CTRACE(tfp, "TCP: Accepted new socket %d\n", status);
	data_soc = status;
    }
#else
/* @@ */
#endif
    if (isDirectory) {
	return read_directory (anchor, name, format_out, sink);
      /* returns HT_LOADED or error */
    } else {
	HTParseSocket(format, format_out,
		anchor, data_soc, sink);

	HTInitInput(control->socket);
	/* Reset buffering to control connection DD 921208 */
    
	status = NETCLOSE(data_soc);
	if (TRACE) fprintf(stderr, "FTP: Closing data socket %d\n", data_soc);
	if (status<0) (void) HTInetStatus("close");	/* Comment only */
	data_soc = -1;	/* invalidate it */
	
	status = response(NIL);		/* Pick up final reply */
	if (status!=2) return HTLoadError(sink, 500, response_text);

	if (transferType == IMAGETransferType)
	  /* change back to ASCII */
	  {
	    char command[10];

	    sprintf(command, "TYPE A%c%c", CR, LF);
	    status = response(command);
	  }
		
	return HT_LOADED;
    }       
} /* open_file_read */