[c]代码库
/*
* 没有解析服务器返回码
* 仅返回服务器发送的资源数据
* 数据大小最大支持 MAX_RCVBUF_LEN (800*1024)
*
*/
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <strings.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <errno.h>
#include <netdb.h>
#define DEFAULT_PORT 80
#define CONTENT_LEN_STR "Content-Length"
#define CHUNKED_STR "Transfer-Encoding: chunked"
#define MAX_URL_LEN 2048
#define MAX_BUF_LEN 2048
#define MAX_IP_LEN 16
#define MAX_RCVBUF_LEN (800*1024)
#define CHUNK_SIZE_LEN 6
#define CHUNK_END_STR "\r\n"
#define CHUNK_END_STR_LEN 2
#define REQUEST_FORMART "GET %s HTTP/1.1\r\nHost:%s\r\nConnection: close\r\nUser-Agent: Mozilla/5.0 (X11; Linux i686) AppleWebKit/534.34 (KHTML, like Gecko) rekonq/1.1 Safari/534.34\r\nAccept: text/ihtml,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8\r\nAccept-Encoding: \r\nAccept-Charset:utf-8,*;q=0.5\r\nAccept-Language: zh-CN, en-US; q=0.8, en; q=0.6\r\n\r\n"
struct url_parse_t {
char host[MAX_URL_LEN];
unsigned short port;
char resource[MAX_URL_LEN];
};
static int parse_url(char *url, struct url_parse_t *url_parse)
{
char *temp;
char *parse_p = url;
char host[1024] = {0};
char resource[1024] = {0};
int port = 80;
if (temp = strstr(parse_p, "http://")) {
parse_p = url + 7;
}
if (temp = strstr(parse_p, "/")) {
strncpy(host, parse_p, temp - parse_p);
strcpy(resource, temp);
} else {
strcpy(host, parse_p);
resource[0] = '/';
}
if (temp = strstr(host, ":")) {
port = atoi(temp + 1);
if (port <= 0) {
return -1;
}
}
strcpy(url_parse->host, host);
strcpy(url_parse->resource, resource);
url_parse->port = port;
return 0;
}
static int parse_proxy(char *proxy, char *ip, unsigned short *port)
{
if (sscanf(proxy, "%[^':']:%hd", ip, port) != 2) {
return -1;
}
return 0;
}
static int read_full(int fd, char *buf, int content_len, int proxy_flag)
{
int templen = 0;
int left_bytes = content_len;
while(left_bytes) {
if ((templen = read(fd, buf + content_len - left_bytes, left_bytes)) < 0) {
return -1;
} else if (!templen && proxy_flag) {
break;
}
left_bytes -= templen;
}
return content_len;
}
//recved_bytes保存已接收的属于本块的数据
//用于确定chunk-size
//
//return value:
//0 for chunk-size == 0 传输结束
//大于0 正常结束 本块接收完毕 可以开始接收下一块
//-1 接收异常
static int read_chunk(int fd, char *buf, const char *recved_bytes, int recved_size)
{
char *temp;
char temp_buf[MAX_BUF_LEN];
const char *recved_p;
int chunk_len;
int rcvsize;
int bufoffset = 0;
//printf("in read chunk\n");
if (recved_bytes) {
//如果没有\r\n说明chunk-size没有接收完 需要继续接收
if (!(temp = strstr(recved_bytes, "\r\n"))) {
memcpy(temp_buf, recved_bytes, recved_size);
if ((rcvsize = read_full(fd, temp_buf + recved_size, CHUNK_SIZE_LEN, 0)) < 0) {
return -1;
}
recved_size += CHUNK_SIZE_LEN;
recved_p = temp_buf;
} else {
recved_p = recved_bytes;
}
} else {
if ((rcvsize = read_full(fd, temp_buf, CHUNK_SIZE_LEN/2, 0)) < 0) {
return -1;
}
if (!strncmp(temp_buf, "0\r\n", CHUNK_SIZE_LEN/2)) {
return 0;
} else {
if (rcvsize = read_full(fd, temp_buf + CHUNK_SIZE_LEN/2, CHUNK_SIZE_LEN - CHUNK_SIZE_LEN/2, 0) < 0) {
return -1;
}
}
recved_p = temp_buf;
recved_size = CHUNK_SIZE_LEN;
}
sscanf(recved_p, "%x", &chunk_len);
if (chunk_len < 1000) {
printf("%s$$$$$", recved_p);
}
if (!chunk_len) {
return chunk_len;
}
if (!(temp = strstr(recved_p, CHUNK_END_STR))) {
return -1;
}
if (temp + CHUNK_END_STR_LEN == recved_p + recved_size) {
recved_size = 0;
bufoffset = 0;
} else if (temp + CHUNK_END_STR_LEN > recved_p + recved_size) {
return -1;
} else {
bufoffset = recved_size - (temp + CHUNK_END_STR_LEN - recved_p);
memcpy(buf, temp + CHUNK_END_STR_LEN, bufoffset);
}
if ((rcvsize = read_full(fd, buf + bufoffset, chunk_len - bufoffset, 0)) < 0) {
return -1;
}
if (*(buf + bufoffset + rcvsize - 1) != '\n' && *(buf + bufoffset + rcvsize - 2) != '\r') {
//clear the end "\r\n"
if (read_full(fd, temp_buf, CHUNK_END_STR_LEN, 0) < 0) {
return -1;
}
}
*(buf + chunk_len) = 0;
return chunk_len;
}
static int recv_page(int fd, char *buf, int proxy_flag, struct url_parse_t *parsed_url)
{
char rcvbuf[MAX_BUF_LEN];
char resourcebuf[MAX_URL_LEN];
char *temp;
int content_len = MAX_RCVBUF_LEN - 1;
int bufoffset = 0;
int templen;
int rcvsize;
int chunk_return;
if (proxy_flag) {
sprintf(resourcebuf, "http://%s/%s", parsed_url->host, parsed_url->resource);
} else {
sprintf(resourcebuf, "%s", parsed_url->resource);
}
//sprintf(rcvbuf, REQUEST_FORMART, resourcebuf, parsed_url->host, parsed_url->port);
sprintf(rcvbuf, REQUEST_FORMART, resourcebuf, parsed_url->host);
if (write(fd, rcvbuf, strlen(rcvbuf)) < 0) {
return -1;
}
rcvsize = read(fd, rcvbuf, sizeof(rcvbuf));
printf("rcvsize:%d, response:%s\n", rcvsize, rcvbuf);
/*
* if "chunked"
* loop:
* send bytes num --n first
* then n bytes data
* do loop
* stop when bytes num == 0 end with "\r\n"
* then end all data end with "\r\n"
*/
if (!proxy_flag) {
if ((temp = strstr(rcvbuf, CONTENT_LEN_STR))) {
sscanf(temp, "Content-Length:%d", &content_len);
} else if (temp = strstr(rcvbuf, CHUNKED_STR)){
content_len = 0;
} else {
return -1;
}
}
temp = strstr(rcvbuf, "\r\n\r\n");
//in case recv done.
if (content_len && content_len <= rcvsize - (temp + 4 - rcvbuf)) {
memcpy(buf, temp + 4, content_len);
buf[content_len] = 0;
return 0;
}
//content_len == 0 表示chunked模式
if (!proxy_flag && !content_len) {
if (temp) {
temp += 4;
bufoffset = 0;
}
while((chunk_return = read_chunk(fd, buf + bufoffset, temp,
rcvsize - (temp - rcvbuf))) >= 0) {
if (chunk_return == 0) {
break;
} else {
temp = NULL;
bufoffset += chunk_return;
}
}
} else {
bufoffset = rcvsize - (temp + 4 - rcvbuf);
memcpy(buf, temp + 4, bufoffset);
if ((rcvsize = read_full(fd, buf + bufoffset, content_len - bufoffset, proxy_flag)) < 0) {
return -1;
}
bufoffset += rcvsize;
}
buf[bufoffset] = 0;
return 0;
}
char *geturl(char *url, char *proxy)
{
static char rcvbuf[MAX_RCVBUF_LEN];
int fd, content_len = 0;
char *temp_p;
char ipbuf[MAX_IP_LEN];
struct sockaddr_in saddr;
int addrlen = sizeof(struct sockaddr_in);
unsigned short port;
int proxy_flag = 0;
struct hostent *hostinfo_p;
struct url_parse_t parsed_url;
saddr.sin_family = AF_INET;
bzero(rcvbuf, MAX_RCVBUF_LEN);
if (proxy) {
if (parse_proxy(proxy, ipbuf, &port) < 0) {
fprintf(stderr, "parse proxy error\n");
} else {
saddr.sin_addr.s_addr = inet_addr(ipbuf);
saddr.sin_port = htons(port);
proxy_flag = 1;
}
}
if (parse_url(url, &parsed_url) < 0) {
fprintf(stderr, "parse url error");
return NULL;
}
if (!proxy_flag) {
hostinfo_p = gethostbyname(parsed_url.host);
saddr.sin_addr = *(struct in_addr *)(hostinfo_p->h_addr);
saddr.sin_port = htons(parsed_url.port);
}
if ((fd = socket(AF_INET, SOCK_STREAM, 0)) < 0) {
perror("socket error");
return NULL;
}
if (connect(fd, (struct sockaddr *)&saddr, addrlen) < 0) {
perror("connect error");
return NULL;
}
if (recv_page(fd, rcvbuf, proxy_flag, &parsed_url) < 0) {
return NULL;
}
return rcvbuf;
}
int main(int argc, char **argv)
{
char *temp;
if (argc != 2) {
fprintf(stderr, "%s host\n", argv[0]);
exit(1);
}
temp = geturl(argv[1], "110.4.12.170:80");
//temp = geturl(argv[1], NULL);
if (temp) {
printf("%s\n", temp);
} else {
printf("temp NULL\n");
}
return 0;
}