用户注册



邮箱:

密码:

用户登录


邮箱:

密码:
记住登录一个月忘记密码?

发表随想


还能输入:200字
云代码 - c代码库

geturl

2013-07-01 作者: kelly举报

[c]代码库

/*
 * 没有解析服务器返回码
 * 仅返回服务器发送的资源数据
 * 数据大小最大支持 MAX_RCVBUF_LEN (800*1024)
 *
 */
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <strings.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <errno.h>
#include <netdb.h>
 
#define DEFAULT_PORT 80
#define CONTENT_LEN_STR "Content-Length"
#define CHUNKED_STR "Transfer-Encoding: chunked"
#define MAX_URL_LEN 2048
#define MAX_BUF_LEN 2048
#define MAX_IP_LEN 16
#define MAX_RCVBUF_LEN (800*1024)
#define CHUNK_SIZE_LEN 6
#define CHUNK_END_STR "\r\n"
#define CHUNK_END_STR_LEN 2
 
#define REQUEST_FORMART "GET %s HTTP/1.1\r\nHost:%s\r\nConnection: close\r\nUser-Agent: Mozilla/5.0 (X11; Linux i686) AppleWebKit/534.34 (KHTML, like Gecko) rekonq/1.1 Safari/534.34\r\nAccept: text/ihtml,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8\r\nAccept-Encoding: \r\nAccept-Charset:utf-8,*;q=0.5\r\nAccept-Language: zh-CN, en-US; q=0.8, en; q=0.6\r\n\r\n"
 
struct url_parse_t {
    char host[MAX_URL_LEN];
    unsigned short port;
    char resource[MAX_URL_LEN];
};
 
static int parse_url(char *url, struct url_parse_t *url_parse)
{
    char *temp;
    char *parse_p = url;
    char host[1024] = {0};
    char resource[1024] = {0};
    int port = 80;
 
    if (temp = strstr(parse_p, "http://")) {
        parse_p = url + 7;
    }
 
    if (temp = strstr(parse_p, "/")) {
        strncpy(host, parse_p, temp - parse_p);
        strcpy(resource, temp);
    } else {
        strcpy(host, parse_p);
        resource[0] = '/';
    }
 
    if (temp = strstr(host, ":")) {
        port = atoi(temp + 1);
        if (port <= 0) {
            return -1;
        }
    }
 
    strcpy(url_parse->host, host);
    strcpy(url_parse->resource, resource);
    url_parse->port = port;
    return 0;
}
 
static int parse_proxy(char *proxy, char *ip, unsigned short *port)
{
    if (sscanf(proxy, "%[^':']:%hd", ip, port) != 2) {
        return -1;
    }
    return 0;
}
 
static int read_full(int fd, char *buf, int content_len, int proxy_flag)
{
    int templen = 0;
    int left_bytes = content_len;
    while(left_bytes) {
        if ((templen = read(fd, buf + content_len - left_bytes, left_bytes)) < 0) {
            return -1;
        } else if (!templen && proxy_flag) {
            break;
        }
        left_bytes -= templen;
    }
    return content_len;
}
 
//recved_bytes保存已接收的属于本块的数据
//用于确定chunk-size
//
//return value:
//0 for chunk-size == 0 传输结束
//大于0 正常结束 本块接收完毕 可以开始接收下一块
//-1 接收异常
static int read_chunk(int fd, char *buf, const char *recved_bytes, int recved_size)
{
    char *temp;
    char temp_buf[MAX_BUF_LEN];
    const char *recved_p;
    int chunk_len;
    int rcvsize;
    int bufoffset = 0;
    //printf("in read chunk\n");
 
    if (recved_bytes) {
        //如果没有\r\n说明chunk-size没有接收完 需要继续接收
        if (!(temp = strstr(recved_bytes, "\r\n"))) {
            memcpy(temp_buf, recved_bytes, recved_size);
            if ((rcvsize = read_full(fd, temp_buf + recved_size, CHUNK_SIZE_LEN, 0)) < 0) {
                return -1;
            }
            recved_size += CHUNK_SIZE_LEN;
            recved_p = temp_buf;
        } else {
            recved_p = recved_bytes;
        }
    } else {
        if ((rcvsize = read_full(fd, temp_buf, CHUNK_SIZE_LEN/2, 0)) < 0) {
            return -1;
        }
        if (!strncmp(temp_buf, "0\r\n", CHUNK_SIZE_LEN/2)) {
            return 0;
        } else {
            if (rcvsize = read_full(fd, temp_buf + CHUNK_SIZE_LEN/2, CHUNK_SIZE_LEN - CHUNK_SIZE_LEN/2, 0) < 0) {
                return -1;
            }
        }
        recved_p = temp_buf;
        recved_size = CHUNK_SIZE_LEN;
    }
 
    sscanf(recved_p, "%x", &chunk_len);
 
    if (chunk_len < 1000) {
        printf("%s$$$$$", recved_p);
    }
    if (!chunk_len) {
        return chunk_len;
    }
 
    if (!(temp = strstr(recved_p, CHUNK_END_STR))) {
        return -1;
    }
 
    if (temp + CHUNK_END_STR_LEN == recved_p + recved_size) {
        recved_size = 0;
        bufoffset = 0;
    } else if (temp + CHUNK_END_STR_LEN > recved_p + recved_size) {
        return -1;
    } else {
        bufoffset = recved_size - (temp + CHUNK_END_STR_LEN - recved_p);
        memcpy(buf, temp + CHUNK_END_STR_LEN, bufoffset);
    }
 
    if ((rcvsize = read_full(fd, buf + bufoffset, chunk_len - bufoffset, 0)) < 0) {
        return -1;
    }
    if (*(buf + bufoffset + rcvsize - 1) != '\n' && *(buf + bufoffset + rcvsize - 2) != '\r') {
        //clear the end "\r\n"
        if (read_full(fd, temp_buf, CHUNK_END_STR_LEN, 0) < 0) {
            return -1;
        }
    }
    *(buf + chunk_len) = 0;
    return chunk_len;
}
 
static int recv_page(int fd, char *buf, int proxy_flag, struct url_parse_t *parsed_url)
{
    char rcvbuf[MAX_BUF_LEN];
    char resourcebuf[MAX_URL_LEN];
    char *temp;
    int content_len = MAX_RCVBUF_LEN - 1;
    int bufoffset = 0;
    int templen;
    int rcvsize;
    int chunk_return;
 
    if (proxy_flag) {
        sprintf(resourcebuf, "http://%s/%s", parsed_url->host, parsed_url->resource);
    } else {
        sprintf(resourcebuf, "%s", parsed_url->resource);
    }
 
    //sprintf(rcvbuf, REQUEST_FORMART, resourcebuf, parsed_url->host, parsed_url->port);
    sprintf(rcvbuf, REQUEST_FORMART, resourcebuf, parsed_url->host);
 
    if (write(fd, rcvbuf, strlen(rcvbuf)) < 0) {
        return -1;
    }
 
    rcvsize = read(fd, rcvbuf, sizeof(rcvbuf));
    printf("rcvsize:%d, response:%s\n", rcvsize, rcvbuf);
 
    /* 
     *   if "chunked"
     *   loop:
     *   send bytes num --n first
     *   then n bytes data
     *   do loop
     *   stop when bytes num == 0 end with "\r\n"
     *   then end all data end with "\r\n"
     */
    if (!proxy_flag) {
        if ((temp = strstr(rcvbuf, CONTENT_LEN_STR))) {
            sscanf(temp, "Content-Length:%d", &content_len);
        } else if (temp = strstr(rcvbuf, CHUNKED_STR)){
            content_len = 0;
        } else {
            return -1;
        }
    }
 
    temp = strstr(rcvbuf, "\r\n\r\n");
    //in case recv done.
    if (content_len && content_len <= rcvsize - (temp + 4 - rcvbuf)) {
        memcpy(buf, temp + 4, content_len);
        buf[content_len] = 0;
        return 0;
    }
 
    //content_len == 0 表示chunked模式
    if (!proxy_flag && !content_len) {
        if (temp) {
            temp += 4;
            bufoffset = 0;
        }
        while((chunk_return = read_chunk(fd, buf + bufoffset, temp,
                        rcvsize - (temp - rcvbuf))) >= 0) {
            if (chunk_return == 0) {
                break;
            } else {
                temp = NULL;
                bufoffset += chunk_return;
            }
        }
    } else {
        bufoffset = rcvsize - (temp + 4 - rcvbuf);
        memcpy(buf, temp + 4, bufoffset);
        if ((rcvsize = read_full(fd, buf + bufoffset, content_len - bufoffset, proxy_flag)) < 0) {
            return -1;
        }
        bufoffset += rcvsize;
    }
 
    buf[bufoffset] = 0;
    return 0;
}
 
char *geturl(char *url, char *proxy)
{
    static char rcvbuf[MAX_RCVBUF_LEN];
    int fd, content_len = 0;
    char *temp_p;
    char ipbuf[MAX_IP_LEN];
 
    struct sockaddr_in saddr;
    int addrlen = sizeof(struct sockaddr_in);
    unsigned short port;
 
    int proxy_flag = 0;
 
    struct hostent *hostinfo_p;
    struct url_parse_t parsed_url;
 
    saddr.sin_family = AF_INET;
    bzero(rcvbuf, MAX_RCVBUF_LEN);
 
    if (proxy) {
        if (parse_proxy(proxy, ipbuf, &port) < 0) {
            fprintf(stderr, "parse proxy error\n");
        } else {
            saddr.sin_addr.s_addr = inet_addr(ipbuf);
            saddr.sin_port = htons(port);
            proxy_flag = 1;
        }
    }
 
    if (parse_url(url, &parsed_url) < 0) {
        fprintf(stderr, "parse url error");
        return NULL;
    }
 
    if (!proxy_flag) {
        hostinfo_p = gethostbyname(parsed_url.host);
        saddr.sin_addr = *(struct in_addr *)(hostinfo_p->h_addr);
        saddr.sin_port = htons(parsed_url.port);
    }
 
    if ((fd = socket(AF_INET, SOCK_STREAM, 0)) < 0) {
        perror("socket error");
        return NULL;
    }
 
    if (connect(fd, (struct sockaddr *)&saddr, addrlen) < 0) {
        perror("connect error");
        return NULL;
    }
 
    if (recv_page(fd, rcvbuf, proxy_flag, &parsed_url) < 0) {
        return NULL;
    }
 
    return rcvbuf;
}
 
int main(int argc, char **argv)
{
    char *temp;
    if (argc != 2) {
        fprintf(stderr, "%s host\n", argv[0]);
        exit(1);
    }
    temp = geturl(argv[1], "110.4.12.170:80");
    //temp = geturl(argv[1], NULL);
    if (temp) {
        printf("%s\n", temp);
    } else {
        printf("temp NULL\n");
    }
    return 0;
}


网友评论    (发表评论)


发表评论:

评论须知:

  • 1、评论每次加2分,每天上限为30;
  • 2、请文明用语,共同创建干净的技术交流环境;
  • 3、若被发现提交非法信息,评论将会被删除,并且给予扣分处理,严重者给予封号处理;
  • 4、请勿发布广告信息或其他无关评论,否则将会删除评论并扣分,严重者给予封号处理。


扫码下载

加载中,请稍后...

输入口令后可复制整站源码

加载中,请稍后...