使用socket 的api 来实现下载http 网络资源,其实整个流程也十分简单
1、解析url 得到域名 和 资源位置
2、根据域名得到服务器ip地址,然后通过ip 和端口建立socket连接
3、发送http 请求
4、解析http 响应
5、根据返回的响应内容,等到资源的大小,接收资源数据,保存到文件即可
需要引用到的头文件和一些常量的定义
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <netdb.h>
#define HTTP_REQ_LENGTH 512
// http 请求头信息
static char http_header[] =
\"GET %s HTTP/1.1\\r\\n\"
\"Host: %s\\r\\n\"
\"Range: bytes=%d-\\r\\n\"
\"Connection: Close\\r\\n\"
\"Accept: */*\\r\\n\"
\"\\r\\n\";
static char http_req_content[HTTP_REQ_LENGTH] = {0};
根据域名创建socket 连接的实现函数
static int create_request_socket(const char* host)
{
int sockfd;
struct hostent *server;
struct sockaddr_in serv_addr;
sockfd = socket(AF_INET, SOCK_STREAM, 0);
if (sockfd < 0)
{
printf(\"[http_demo] create_request_socket create socket fail.\\n\");
return -1;
}
/* lookup the ip address */
server = gethostbyname(host);
if(server == NULL)
{
printf(\"[http_demo] create_request_socket gethostbyname fail.\\n\");
close(sockfd);
return -1;
}
memset(&serv_addr,0,sizeof(serv_addr));
serv_addr.sin_family = AF_INET;
serv_addr.sin_port = htons(80);
memcpy(&serv_addr.sin_addr.s_addr,server->h_addr,server->h_length);
if (connect(sockfd,(struct sockaddr *)&serv_addr,sizeof(serv_addr)) < 0)
{
printf(\"[http_demo] create_request_socket connect fail.\\n\");
close(sockfd);
return -1;
}
return sockfd;
}
解析响应头部信息,获取资源大小的函数
static int get_http_content_length(int sock_fd)
{
int ret;
int flag =0;
int recv_len = 0;
char res_header[1024] = {0};
while(recv_len<1023)
{
ret = recv(sock_fd, res_header+recv_len, 1,0);
if(ret<1) // recv fail
{
break;
}
//找到响应头的头部信息, 两个\"\\r\\n\"为分割点
if((res_header[recv_len]==\'\\r\'&&(flag==0||flag==2))||(res_header[recv_len]==\'\\n\'&&(flag==1||flag==3)))
{
flag++;
}
else
{
flag = 0;
}
recv_len+=ret;
if(flag==4)
{
break;
}
}
//printf(\"[http_demo] recv_len=%d res_header = %s.\\n\",recv_len,res_header);
/*获取响应头的信息*/
int status_code=0;
char content_type[128] = {0};
int content_length =0;
char *pos = strstr(res_header, \"HTTP/\");
if(pos)
{
sscanf(pos, \"%*s %d\", &status_code);//返回状态码
}
if(status_code!=200 && status_code!=206)
{
printf(\"[http_demo] get_content_length status_code = %d\\n\",status_code);
return -1;
}
pos = strstr(res_header, \"Content-Type:\");//返回内容类型
if(pos)
{
sscanf(pos, \"%*s %s\", content_type);
}
pos = strstr(res_header, \"Content-Length:\");//内容的长度(字节)
if(pos)
{
sscanf(pos, \"%*s %d\", &content_length);
}
return content_length;
}
解析出域名和资源位置参数的函数
/**
* @brief http_parser_url 根据url解析出host 和 path ; host 和 path 为动态申请的内存,使用完后需要释放
* @param url
* @param host
* @param path
* @return
*/
static int http_parser_url(const char* url,char **host,char **path)
{
if(url == NULL || host == NULL || path == NULL)
{
printf(\"[http_demo] url or host or path is null.\\n\");
return -1;
}
if(url[0]!=\'h\'||url[1]!=\'t\'||url[2]!=\'t\'||url[3]!=\'p\')
{
printf(\"[http_demo] illegal url = %s.\\n\",url);
return -1;
}
int host_index = 4;
const char *temp = url+4;
if(url[4]==\'s\')
{
temp++;
host_index++;
}
if(*temp++ !=\':\'||*temp++ !=\'/\'||*temp++ !=\'/\')
{
printf(\"[http_demo] illegal url = %s.\\n\",url);
return -1;
}
host_index +=3;
while(*temp!=\'/\') //next /
{
if(*temp == \'\\0\') //
{
printf(\"[http_demo] illegal url = %s.\\n\",url);
return -1;
}
temp++;
}
int host_len = temp-url-host_index; //减掉http:// 或者https://
int path_len = strlen(temp);
char *host_temp = (char *)malloc(host_len + 1); //多一个字符串结束标识 \\0
if(host_temp == NULL)
{
printf(\"[http_demo] malloc host fail.\\n\");
return -1;
}
char *path_temp = (char *)malloc(path_len + 1); //多一个字符串结束标识 \\0
if(path_temp == NULL)
{
printf(\"[http_demo] malloc path fail.\\n\");
free(host_temp);
return -1;
}
memcpy(host_temp,url+host_index,host_len);
memcpy(path_temp,temp,path_len);
host_temp[host_len] = \'\\0\';
path_temp[path_len] = \'\\0\';
*host = host_temp;
*path = path_temp;
return 0;
}
下载网络资源并保存到本地文件的函数
/**
* @brief http_download_file 下载网络资源
* @param url 网络资源的位置
* @param file 保存到本地文件的路径
* @return 成功返回0,否则失败
*/
static int http_download_file(const char* url,const char* file)
{
int ret = -1;
int req_fd = -1;
FILE *fp = NULL;
char *host = NULL;
char *path = NULL;
if(http_parser_url(url,&host,&path))
{
printf(\"[http_demo] http_parser_url fail.\\n\");
return -1;
}
req_fd = create_request_socket(host);
if(req_fd<0)
{
printf(\"[http_demo] read create_request_socket fail fd = %d.\\n\",req_fd);
goto download_file_end;
}
int content_len = snprintf(http_req_content,HTTP_REQ_LENGTH, http_header, path, host, 0);
//printf(\"[http_demo] http_req_content = \\n %s\",http_req_content);
send(req_fd, http_req_content, content_len,0);
content_len = get_http_content_length(req_fd);
if(content_len <1)
{
printf(\"[http_demo] read get_http_content_length fail content_len = %d.\\n\",content_len);
goto download_file_end;
}
fp = fopen(file,\"wb\");
if(fp == NULL)
{
printf(\"[http_demo] fopen fail.\\n\");
goto download_file_end;
}
int recv_size = 0;
int total_size = 0;
char buff[256] = {0};
while(total_size<content_len)
{
recv_size = recv(req_fd, buff, 256,0);
if(recv_size < 1)
{
printf(\"[http_demo] recv buff fail\\n\");
goto download_file_end;
}
total_size += recv_size;
fwrite(buff,1,recv_size,fp); //写到文件里面
}
fflush(fp);
ret = 0;
download_file_end:
if(req_fd>-1)
{
close(req_fd);
}
if(fp != NULL)
{
fclose(fp);
}
if(host != NULL)
{
free(host);
}
if(path != NULL)
{
free(path);
}
return ret;
}
如果不想从头开始下载,想要从某个位置开始下载,只要修改http 头的请求参数即可。
\"Range: bytes=%d-\\r\\n\"
假设这个网络资源的大小是128字节,如果只想下载后面的64个字节内容,参数可以设置成 \"Range: bytes=64-127\\r\\n\"
版权声明
本文仅代表作者观点,不代表百度立场。
本文系作者授权百度百家发表,未经许可,不得转载。


