/* |
* 没有解析服务器返回码 |
* 仅返回服务器发送的资源数据 |
* 数据大小最大支持 MAX_RCVBUF_LEN (800*1024) |
* |
*/ |
#include <stdio.h> |
#include <stdlib.h> |
#include <unistd.h> |
#include <string.h> |
#include <strings.h> |
#include <sys/socket.h> |
#include <netinet/in.h> |
#include <arpa/inet.h> |
#include <errno.h> |
#include <netdb.h> |
#define DEFAULT_PORT 80 |
#define CONTENT_LEN_STR "Content-Length" |
#define CHUNKED_STR "Transfer-Encoding: chunked" |
#define MAX_URL_LEN 2048 |
#define MAX_BUF_LEN 2048 |
#define MAX_IP_LEN 16 |
#define MAX_RCVBUF_LEN (800*1024) |
#define CHUNK_SIZE_LEN 6 |
#define CHUNK_END_STR "\r\n" |
#define CHUNK_END_STR_LEN 2 |
#define REQUEST_FORMART "GET %s HTTP/1.1\r\nHost:%s\r\nConnection: close\r\nUser-Agent: Mozilla/5.0 (X11; Linux i686) AppleWebKit/534.34 (KHTML, like Gecko) rekonq/1.1 Safari/534.34\r\nAccept: text/ihtml,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8\r\nAccept-Encoding: \r\nAccept-Charset:utf-8,*;q=0.5\r\nAccept-Language: zh-CN, en-US; q=0.8, en; q=0.6\r\n\r\n" |
struct url_parse_t { |
char host[MAX_URL_LEN]; |
unsigned short port; |
char resource[MAX_URL_LEN]; |
}; |
static int parse_url( char *url, struct url_parse_t *url_parse) |
{ |
char *temp; |
char *parse_p = url; |
char host[1024] = {0}; |
char resource[1024] = {0}; |
int port = 80; |
if (temp = strstr (parse_p, "http://" )) { |
parse_p = url + 7; |
} |
if (temp = strstr (parse_p, "/" )) { |
strncpy (host, parse_p, temp - parse_p); |
strcpy (resource, temp); |
} else { |
strcpy (host, parse_p); |
resource[0] = '/' ; |
} |
if (temp = strstr (host, ":" )) { |
port = atoi (temp + 1); |
if (port <= 0) { |
return -1; |
} |
} |
strcpy (url_parse->host, host); |
strcpy (url_parse->resource, resource); |
url_parse->port = port; |
return 0; |
} |
static int parse_proxy( char *proxy, char *ip, unsigned short *port) |
{ |
if ( sscanf (proxy, "%[^':']:%hd" , ip, port) != 2) { |
return -1; |
} |
return 0; |
} |
static int read_full( int fd, char *buf, int content_len, int proxy_flag) |
{ |
int templen = 0; |
int left_bytes = content_len; |
while (left_bytes) { |
if ((templen = read(fd, buf + content_len - left_bytes, left_bytes)) < 0) { |
return -1; |
} else if (!templen && proxy_flag) { |
break ; |
} |
left_bytes -= templen; |
} |
return content_len; |
} |
//recved_bytes保存已接收的属于本块的数据 |
//用于确定chunk-size |
// |
//return value: |
//0 for chunk-size == 0 传输结束 |
//大于0 正常结束 本块接收完毕 可以开始接收下一块 |
//-1 接收异常 |
static int read_chunk( int fd, char *buf, const char *recved_bytes, int recved_size) |
{ |
char *temp; |
char temp_buf[MAX_BUF_LEN]; |
const char *recved_p; |
int chunk_len; |
int rcvsize; |
int bufoffset = 0; |
//printf("in read chunk\n"); |
if (recved_bytes) { |
//如果没有\r\n说明chunk-size没有接收完 需要继续接收 |
if (!(temp = strstr (recved_bytes, "\r\n" ))) { |
memcpy (temp_buf, recved_bytes, recved_size); |
if ((rcvsize = read_full(fd, temp_buf + recved_size, CHUNK_SIZE_LEN, 0)) < 0) { |
return -1; |
} |
recved_size += CHUNK_SIZE_LEN; |
recved_p = temp_buf; |
} else { |
recved_p = recved_bytes; |
} |
} else { |
if ((rcvsize = read_full(fd, temp_buf, CHUNK_SIZE_LEN/2, 0)) < 0) { |
return -1; |
} |
if (! strncmp (temp_buf, "0\r\n" , CHUNK_SIZE_LEN/2)) { |
return 0; |
} else { |
if (rcvsize = read_full(fd, temp_buf + CHUNK_SIZE_LEN/2, CHUNK_SIZE_LEN - CHUNK_SIZE_LEN/2, 0) < 0) { |
return -1; |
} |
} |
recved_p = temp_buf; |
recved_size = CHUNK_SIZE_LEN; |
} |
sscanf (recved_p, "%x" , &chunk_len); |
if (chunk_len < 1000) { |
printf ( "%s$$$$$" , recved_p); |
} |
if (!chunk_len) { |
return chunk_len; |
} |
if (!(temp = strstr (recved_p, CHUNK_END_STR))) { |
return -1; |
} |
if (temp + CHUNK_END_STR_LEN == recved_p + recved_size) { |
recved_size = 0; |
bufoffset = 0; |
} else if (temp + CHUNK_END_STR_LEN > recved_p + recved_size) { |
return -1; |
} else { |
bufoffset = recved_size - (temp + CHUNK_END_STR_LEN - recved_p); |
memcpy (buf, temp + CHUNK_END_STR_LEN, bufoffset); |
} |
if ((rcvsize = read_full(fd, buf + bufoffset, chunk_len - bufoffset, 0)) < 0) { |
return -1; |
} |
if (*(buf + bufoffset + rcvsize - 1) != '\n' && *(buf + bufoffset + rcvsize - 2) != '\r' ) { |
//clear the end "\r\n" |
if (read_full(fd, temp_buf, CHUNK_END_STR_LEN, 0) < 0) { |
return -1; |
} |
} |
*(buf + chunk_len) = 0; |
return chunk_len; |
} |
static int recv_page( int fd, char *buf, int proxy_flag, struct url_parse_t *parsed_url) |
{ |
char rcvbuf[MAX_BUF_LEN]; |
char resourcebuf[MAX_URL_LEN]; |
char *temp; |
int content_len = MAX_RCVBUF_LEN - 1; |
int bufoffset = 0; |
int templen; |
int rcvsize; |
int chunk_return; |
if (proxy_flag) { |
sprintf (resourcebuf, "http://%s/%s" , parsed_url->host, parsed_url->resource); |
} else { |
sprintf (resourcebuf, "%s" , parsed_url->resource); |
} |
//sprintf(rcvbuf, REQUEST_FORMART, resourcebuf, parsed_url->host, parsed_url->port); |
sprintf (rcvbuf, REQUEST_FORMART, resourcebuf, parsed_url->host); |
if (write(fd, rcvbuf, strlen (rcvbuf)) < 0) { |
return -1; |
} |
rcvsize = read(fd, rcvbuf, sizeof (rcvbuf)); |
printf ( "rcvsize:%d, response:%s\n" , rcvsize, rcvbuf); |
/* |
* if "chunked" |
* loop: |
* send bytes num --n first |
* then n bytes data |
* do loop |
* stop when bytes num == 0 end with "\r\n" |
* then end all data end with "\r\n" |
*/ |
if (!proxy_flag) { |
if ((temp = strstr (rcvbuf, CONTENT_LEN_STR))) { |
sscanf (temp, "Content-Length:%d" , &content_len); |
} else if (temp = strstr (rcvbuf, CHUNKED_STR)){ |
content_len = 0; |
} else { |
return -1; |
} |
} |
temp = strstr (rcvbuf, "\r\n\r\n" ); |
//in case recv done. |
if (content_len && content_len <= rcvsize - (temp + 4 - rcvbuf)) { |
memcpy (buf, temp + 4, content_len); |
buf[content_len] = 0; |
return 0; |
} |
//content_len == 0 表示chunked模式 |
if (!proxy_flag && !content_len) { |
if (temp) { |
temp += 4; |
bufoffset = 0; |
} |
while ((chunk_return = read_chunk(fd, buf + bufoffset, temp, |
rcvsize - (temp - rcvbuf))) >= 0) { |
if (chunk_return == 0) { |
break ; |
} else { |
temp = NULL; |
bufoffset += chunk_return; |
} |
} |
} else { |
bufoffset = rcvsize - (temp + 4 - rcvbuf); |
memcpy (buf, temp + 4, bufoffset); |
if ((rcvsize = read_full(fd, buf + bufoffset, content_len - bufoffset, proxy_flag)) < 0) { |
return -1; |
} |
bufoffset += rcvsize; |
} |
buf[bufoffset] = 0; |
return 0; |
} |
char *geturl( char *url, char *proxy) |
{ |
static char rcvbuf[MAX_RCVBUF_LEN]; |
int fd, content_len = 0; |
char *temp_p; |
char ipbuf[MAX_IP_LEN]; |
struct sockaddr_in saddr; |
int addrlen = sizeof ( struct sockaddr_in); |
unsigned short port; |
int proxy_flag = 0; |
struct hostent *hostinfo_p; |
struct url_parse_t parsed_url; |
saddr.sin_family = AF_INET; |
bzero(rcvbuf, MAX_RCVBUF_LEN); |
if (proxy) { |
if (parse_proxy(proxy, ipbuf, &port) < 0) { |
fprintf (stderr, "parse proxy error\n" ); |
} else { |
saddr.sin_addr.s_addr = inet_addr(ipbuf); |
saddr.sin_port = htons(port); |
proxy_flag = 1; |
} |
} |
if (parse_url(url, &parsed_url) < 0) { |
fprintf (stderr, "parse url error" ); |
return NULL; |
} |
if (!proxy_flag) { |
hostinfo_p = gethostbyname(parsed_url.host); |
saddr.sin_addr = *( struct in_addr *)(hostinfo_p->h_addr); |
saddr.sin_port = htons(parsed_url.port); |
} |
if ((fd = socket(AF_INET, SOCK_STREAM, 0)) < 0) { |
perror ( "socket error" ); |
return NULL; |
} |
if (connect(fd, ( struct sockaddr *)&saddr, addrlen) < 0) { |
perror ( "connect error" ); |
return NULL; |
} |
if (recv_page(fd, rcvbuf, proxy_flag, &parsed_url) < 0) { |
return NULL; |
} |
return rcvbuf; |
} |
int main( int argc, char **argv) |
{ |
char *temp; |
if (argc != 2) { |
fprintf (stderr, "%s host\n" , argv[0]); |
exit (1); |
} |
temp = geturl(argv[1], "110.4.12.170:80" ); |
//temp = geturl(argv[1], NULL); |
if (temp) { |
printf ( "%s\n" , temp); |
} else { |
printf ( "temp NULL\n" ); |
} |
return 0; |
} |