用户注册



邮箱:

密码:

用户登录


邮箱:

密码:
记住登录一个月忘记密码?

发表随想


还能输入:200字
云代码 - c代码库

geturl

2013-07-01 作者: kelly举报

[c]代码库

/*
 * 没有解析服务器返回码
 * 仅返回服务器发送的资源数据 
 * 数据大小最大支持 MAX_RCVBUF_LEN (800*1024)
 *
 */
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <strings.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <errno.h>
#include <netdb.h>

#define DEFAULT_PORT 80
#define CONTENT_LEN_STR "Content-Length"
#define CHUNKED_STR "Transfer-Encoding: chunked"
#define MAX_URL_LEN 2048
#define MAX_BUF_LEN 2048
#define MAX_IP_LEN 16
#define MAX_RCVBUF_LEN (800*1024)
#define CHUNK_SIZE_LEN 6
#define CHUNK_END_STR "\r\n"
#define CHUNK_END_STR_LEN 2

#define REQUEST_FORMART "GET %s HTTP/1.1\r\nHost:%s\r\nConnection: close\r\nUser-Agent: Mozilla/5.0 (X11; Linux i686) AppleWebKit/534.34 (KHTML, like Gecko) rekonq/1.1 Safari/534.34\r\nAccept: text/ihtml,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8\r\nAccept-Encoding: \r\nAccept-Charset:utf-8,*;q=0.5\r\nAccept-Language: zh-CN, en-US; q=0.8, en; q=0.6\r\n\r\n"

struct url_parse_t {
	char host[MAX_URL_LEN];
	unsigned short port;
	char resource[MAX_URL_LEN];
};

static int parse_url(char *url, struct url_parse_t *url_parse)
{
	char *temp;
	char *parse_p = url;
	char host[1024] = {0};
	char resource[1024] = {0};
	int port = 80;

	if (temp = strstr(parse_p, "http://")) {
		parse_p = url + 7;
	}

	if (temp = strstr(parse_p, "/")) {
		strncpy(host, parse_p, temp - parse_p);
		strcpy(resource, temp);
	} else {
		strcpy(host, parse_p);
		resource[0] = '/';
	}

	if (temp = strstr(host, ":")) {
		port = atoi(temp + 1);
		if (port <= 0) {
			return -1;
		}
	}

	strcpy(url_parse->host, host);
	strcpy(url_parse->resource, resource);
	url_parse->port = port;
	return 0;
}

static int parse_proxy(char *proxy, char *ip, unsigned short *port)
{
	if (sscanf(proxy, "%[^':']:%hd", ip, port) != 2) {
		return -1;
	}
	return 0;
}

static int read_full(int fd, char *buf, int content_len, int proxy_flag)
{
	int templen = 0;
	int left_bytes = content_len;
	while(left_bytes) {
		if ((templen = read(fd, buf + content_len - left_bytes, left_bytes)) < 0) {
			return -1;
		} else if (!templen && proxy_flag) {
			break;
		}
		left_bytes -= templen;
	}
	return content_len;
}

//recved_bytes保存已接收的属于本块的数据
//用于确定chunk-size
//
//return value:
//0 for chunk-size == 0 传输结束
//大于0 正常结束 本块接收完毕 可以开始接收下一块
//-1 接收异常
static int read_chunk(int fd, char *buf, const char *recved_bytes, int recved_size)
{
	char *temp;
	char temp_buf[MAX_BUF_LEN];
	const char *recved_p;
	int chunk_len;
	int rcvsize;
	int bufoffset = 0;
	//printf("in read chunk\n");

	if (recved_bytes) {
		//如果没有\r\n说明chunk-size没有接收完 需要继续接收
		if (!(temp = strstr(recved_bytes, "\r\n"))) {
			memcpy(temp_buf, recved_bytes, recved_size);
			if ((rcvsize = read_full(fd, temp_buf + recved_size, CHUNK_SIZE_LEN, 0)) < 0) {
				return -1;
			}
			recved_size += CHUNK_SIZE_LEN;
			recved_p = temp_buf;
		} else {
			recved_p = recved_bytes;
		}
	} else {
		if ((rcvsize = read_full(fd, temp_buf, CHUNK_SIZE_LEN/2, 0)) < 0) {
			return -1;
		}
		if (!strncmp(temp_buf, "0\r\n", CHUNK_SIZE_LEN/2)) {
			return 0;
		} else {
			if (rcvsize = read_full(fd, temp_buf + CHUNK_SIZE_LEN/2, CHUNK_SIZE_LEN - CHUNK_SIZE_LEN/2, 0) < 0) {
				return -1;
			}
		}
		recved_p = temp_buf;
		recved_size = CHUNK_SIZE_LEN;
	}

	sscanf(recved_p, "%x", &chunk_len);

	if (chunk_len < 1000) {
		printf("%s$$$$$", recved_p);
	}
	if (!chunk_len) {
		return chunk_len;
	}

	if (!(temp = strstr(recved_p, CHUNK_END_STR))) {
		return -1;
	}

	if (temp + CHUNK_END_STR_LEN == recved_p + recved_size) {
		recved_size = 0;
		bufoffset = 0;
	} else if (temp + CHUNK_END_STR_LEN > recved_p + recved_size) {
		return -1;
	} else {
		bufoffset = recved_size - (temp + CHUNK_END_STR_LEN - recved_p);
		memcpy(buf, temp + CHUNK_END_STR_LEN, bufoffset);
	}

	if ((rcvsize = read_full(fd, buf + bufoffset, chunk_len - bufoffset, 0)) < 0) {
		return -1;
	}
	if (*(buf + bufoffset + rcvsize - 1) != '\n' && *(buf + bufoffset + rcvsize - 2) != '\r') {
		//clear the end "\r\n"
		if (read_full(fd, temp_buf, CHUNK_END_STR_LEN, 0) < 0) {
			return -1;
		}
	}
	*(buf + chunk_len) = 0;
	return chunk_len;
}

static int recv_page(int fd, char *buf, int proxy_flag, struct url_parse_t *parsed_url)
{
	char rcvbuf[MAX_BUF_LEN];
	char resourcebuf[MAX_URL_LEN];
	char *temp;
	int content_len = MAX_RCVBUF_LEN - 1;
	int bufoffset = 0;
	int templen;
	int rcvsize;
	int chunk_return;

	if (proxy_flag) {
		sprintf(resourcebuf, "http://%s/%s", parsed_url->host, parsed_url->resource);
	} else {
		sprintf(resourcebuf, "%s", parsed_url->resource);
	}

	//sprintf(rcvbuf, REQUEST_FORMART, resourcebuf, parsed_url->host, parsed_url->port);
	sprintf(rcvbuf, REQUEST_FORMART, resourcebuf, parsed_url->host);

	if (write(fd, rcvbuf, strlen(rcvbuf)) < 0) {
		return -1;
	}

	rcvsize = read(fd, rcvbuf, sizeof(rcvbuf));
	printf("rcvsize:%d, response:%s\n", rcvsize, rcvbuf);

	/*  
	 *   if "chunked" 
	 *   loop:
	 *   send bytes num --n first
	 *   then n bytes data
	 *   do loop
	 *   stop when bytes num == 0 end with "\r\n"
	 *   then end all data end with "\r\n"
	 */
	if (!proxy_flag) {
		if ((temp = strstr(rcvbuf, CONTENT_LEN_STR))) {
			sscanf(temp, "Content-Length:%d", &content_len);
		} else if (temp = strstr(rcvbuf, CHUNKED_STR)){ 
			content_len = 0;
		} else {
			return -1;
		}
	}

	temp = strstr(rcvbuf, "\r\n\r\n");
	//in case recv done.
	if (content_len && content_len <= rcvsize - (temp + 4 - rcvbuf)) {
		memcpy(buf, temp + 4, content_len);
		buf[content_len] = 0;
		return 0;
	}

	//content_len == 0 表示chunked模式
	if (!proxy_flag && !content_len) {
		if (temp) {
			temp += 4;
			bufoffset = 0;
		}
		while((chunk_return = read_chunk(fd, buf + bufoffset, temp,
						rcvsize - (temp - rcvbuf))) >= 0) {
			if (chunk_return == 0) {
				break;
			} else {
				temp = NULL;
				bufoffset += chunk_return;
			}
		}
	} else {
		bufoffset = rcvsize - (temp + 4 - rcvbuf);
		memcpy(buf, temp + 4, bufoffset);
		if ((rcvsize = read_full(fd, buf + bufoffset, content_len - bufoffset, proxy_flag)) < 0) {
			return -1;
		}
		bufoffset += rcvsize;
	}

	buf[bufoffset] = 0;
	return 0;
}

char *geturl(char *url, char *proxy)
{
	static char rcvbuf[MAX_RCVBUF_LEN];
	int fd, content_len = 0;
	char *temp_p;
	char ipbuf[MAX_IP_LEN];

	struct sockaddr_in saddr;
	int addrlen = sizeof(struct sockaddr_in);
	unsigned short port;

	int proxy_flag = 0;

	struct hostent *hostinfo_p;
	struct url_parse_t parsed_url;

	saddr.sin_family = AF_INET;
	bzero(rcvbuf, MAX_RCVBUF_LEN);

	if (proxy) {
		if (parse_proxy(proxy, ipbuf, &port) < 0) {
			fprintf(stderr, "parse proxy error\n");
		} else {
			saddr.sin_addr.s_addr = inet_addr(ipbuf);
			saddr.sin_port = htons(port);
			proxy_flag = 1;
		}
	}

	if (parse_url(url, &parsed_url) < 0) {
		fprintf(stderr, "parse url error");
		return NULL;
	}

	if (!proxy_flag) {
		hostinfo_p = gethostbyname(parsed_url.host);
		saddr.sin_addr = *(struct in_addr *)(hostinfo_p->h_addr);
		saddr.sin_port = htons(parsed_url.port);
	}

	if ((fd = socket(AF_INET, SOCK_STREAM, 0)) < 0) {
		perror("socket error");
		return NULL;
	}

	if (connect(fd, (struct sockaddr *)&saddr, addrlen) < 0) {
		perror("connect error");
		return NULL;
	}

	if (recv_page(fd, rcvbuf, proxy_flag, &parsed_url) < 0) {
		return NULL;
	}

	return rcvbuf;
}

int main(int argc, char **argv)
{
	char *temp;
	if (argc != 2) {
		fprintf(stderr, "%s host\n", argv[0]);
		exit(1);
	}
	temp = geturl(argv[1], "110.4.12.170:80");
	//temp = geturl(argv[1], NULL);
	if (temp) {
		printf("%s\n", temp);
	} else {
		printf("temp NULL\n");
	}
	return 0;
}


网友评论    (发表评论)


发表评论:

评论须知:

  • 1、评论每次加2分,每天上限为30;
  • 2、请文明用语,共同创建干净的技术交流环境;
  • 3、若被发现提交非法信息,评论将会被删除,并且给予扣分处理,严重者给予封号处理;
  • 4、请勿发布广告信息或其他无关评论,否则将会删除评论并扣分,严重者给予封号处理。


扫码下载

加载中,请稍后...

输入口令后可复制整站源码

加载中,请稍后...