socket实现下载http资源

小编 2026-06-24 阅读:1841 评论:0

使用socket 的api 来实现下载http 网络资源，其实整个流程也十分简单 1、解析url 得到域名和资源位置 2、根据域名得到服务器ip地址，然后通过ip 和端口建立socket连接 3、...

使用socket 的api 来实现下载http 网络资源，其实整个流程也十分简单

1、解析url 得到域名和资源位置

2、根据域名得到服务器ip地址，然后通过ip 和端口建立socket连接

3、发送http 请求

4、解析http 响应

5、根据返回的响应内容，等到资源的大小，接收资源数据，保存到文件即可

需要引用到的头文件和一些常量的定义

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <netdb.h>

#define HTTP_REQ_LENGTH         512

// http 请求头信息
static char http_header[] =
    \"GET %s HTTP/1.1\\r\\n\"
    \"Host: %s\\r\\n\"
    \"Range: bytes=%d-\\r\\n\"
    \"Connection: Close\\r\\n\"
    \"Accept: */*\\r\\n\"
    \"\\r\\n\";

static char http_req_content[HTTP_REQ_LENGTH] = {0};

根据域名创建socket 连接的实现函数

static int create_request_socket(const char* host)
{
    int sockfd;
    struct hostent *server;
    struct sockaddr_in serv_addr;

    sockfd = socket(AF_INET, SOCK_STREAM, 0);
    if (sockfd < 0)
    {
        printf(\"[http_demo] create_request_socket create socket fail.\\n\");
        return -1;
    }

    /* lookup the ip address */
    server = gethostbyname(host);
    if(server == NULL)
    {
        printf(\"[http_demo] create_request_socket gethostbyname fail.\\n\");
        close(sockfd);
        return -1;
    }

    memset(&serv_addr,0,sizeof(serv_addr));
    serv_addr.sin_family = AF_INET;
    serv_addr.sin_port = htons(80);
    memcpy(&serv_addr.sin_addr.s_addr,server->h_addr,server->h_length);

    if (connect(sockfd,(struct sockaddr *)&serv_addr,sizeof(serv_addr)) < 0)
    {
        printf(\"[http_demo] create_request_socket connect fail.\\n\");
        close(sockfd);
        return -1;
    }
    return sockfd;
}

解析响应头部信息，获取资源大小的函数

static int get_http_content_length(int sock_fd)
{
    int ret;
    int flag =0;
    int recv_len = 0;
    char res_header[1024] = {0};
    while(recv_len<1023)
    {
        ret = recv(sock_fd, res_header+recv_len, 1,0);
        if(ret<1)  // recv fail
        {
            break;
        }
        //找到响应头的头部信息, 两个\"\\r\\n\"为分割点
        if((res_header[recv_len]==\'\\r\'&&(flag==0||flag==2))||(res_header[recv_len]==\'\\n\'&&(flag==1||flag==3)))
        {
            flag++;
        }
        else
        {
            flag = 0;
        }
        recv_len+=ret;
        if(flag==4)
        {
            break;
        }
    }
    //printf(\"[http_demo] recv_len=%d res_header = %s.\\n\",recv_len,res_header);
    /*获取响应头的信息*/
    int status_code=0;
    char content_type[128] = {0};
    int content_length =0;
    char *pos = strstr(res_header, \"HTTP/\");
    if(pos)
    {
        sscanf(pos, \"%*s %d\", &status_code);//返回状态码
    }
    if(status_code!=200 && status_code!=206)
    {
        printf(\"[http_demo] get_content_length status_code = %d\\n\",status_code);
        return -1;
    }
    pos = strstr(res_header, \"Content-Type:\");//返回内容类型
    if(pos)
    {
        sscanf(pos, \"%*s %s\", content_type);
    }
    pos = strstr(res_header, \"Content-Length:\");//内容的长度(字节)
    if(pos)
    {
        sscanf(pos, \"%*s %d\", &content_length);
    }
    return content_length;
}

解析出域名和资源位置参数的函数

/**
 * @brief http_parser_url 根据url解析出host 和 path ; host 和 path 为动态申请的内存，使用完后需要释放
 * @param url
 * @param host
 * @param path
 * @return
 */
static int http_parser_url(const char* url,char **host,char **path)
{
    if(url == NULL || host == NULL || path == NULL)
    {
         printf(\"[http_demo] url or host or path is null.\\n\");
         return -1;
    }

    if(url[0]!=\'h\'||url[1]!=\'t\'||url[2]!=\'t\'||url[3]!=\'p\')
    {
         printf(\"[http_demo] illegal url = %s.\\n\",url);
         return -1;
    }
    int host_index = 4;
    const char *temp = url+4;
    if(url[4]==\'s\')
    {
        temp++;
        host_index++;
    }
    if(*temp++ !=\':\'||*temp++ !=\'/\'||*temp++ !=\'/\')
    {
        printf(\"[http_demo] illegal url = %s.\\n\",url);
        return -1;
    }
    host_index +=3;
    while(*temp!=\'/\')  //next /
    {
        if(*temp == \'\\0\')  //
        {
            printf(\"[http_demo] illegal url = %s.\\n\",url);
            return -1;
        }
        temp++;
    }

    int host_len = temp-url-host_index;  //减掉http:// 或者https://
    int path_len = strlen(temp);
    char *host_temp = (char *)malloc(host_len + 1);  //多一个字符串结束标识 \\0
    if(host_temp == NULL)
    {
        printf(\"[http_demo] malloc host fail.\\n\");
        return -1;
    }
    char *path_temp = (char *)malloc(path_len + 1);  //多一个字符串结束标识 \\0
    if(path_temp == NULL)
    {
        printf(\"[http_demo] malloc path fail.\\n\");
        free(host_temp);
        return -1;
    }
    memcpy(host_temp,url+host_index,host_len);
    memcpy(path_temp,temp,path_len);
    host_temp[host_len] = \'\\0\';
    path_temp[path_len] = \'\\0\';
    *host = host_temp;
    *path = path_temp;
    return 0;
}

下载网络资源并保存到本地文件的函数

/**
 * @brief http_download_file 下载网络资源
 * @param url    网络资源的位置
 * @param file   保存到本地文件的路径
 * @return       成功返回0,否则失败
 */
static int http_download_file(const char* url,const char* file)
{
    int ret = -1;
    int req_fd = -1;
    FILE *fp = NULL;
    char *host = NULL;
    char *path = NULL;
    if(http_parser_url(url,&host,&path))
    {
         printf(\"[http_demo] http_parser_url fail.\\n\");
         return -1;
    }
    req_fd = create_request_socket(host);
    if(req_fd<0)
    {
        printf(\"[http_demo] read create_request_socket fail fd = %d.\\n\",req_fd);
        goto download_file_end;
    }
    int content_len = snprintf(http_req_content,HTTP_REQ_LENGTH, http_header, path,  host, 0);
    //printf(\"[http_demo] http_req_content = \\n %s\",http_req_content);
    send(req_fd, http_req_content, content_len,0);
    content_len = get_http_content_length(req_fd);
    if(content_len <1)
    {
        printf(\"[http_demo] read get_http_content_length fail content_len = %d.\\n\",content_len);
        goto download_file_end;
    }
    fp = fopen(file,\"wb\");
    if(fp == NULL)
    {
        printf(\"[http_demo] fopen fail.\\n\");
        goto download_file_end;
    }

    int recv_size = 0;
    int total_size = 0;
    char buff[256] = {0};
    while(total_size<content_len)
    {
         recv_size = recv(req_fd, buff, 256,0);
         if(recv_size < 1)
         {
             printf(\"[http_demo] recv buff fail\\n\");
             goto download_file_end;
         }
         total_size += recv_size;
         fwrite(buff,1,recv_size,fp);  //写到文件里面
    }
    fflush(fp);
    ret = 0;
download_file_end:
    if(req_fd>-1)
    {
        close(req_fd);
    }
    if(fp != NULL)
    {
        fclose(fp);
    }
    if(host != NULL)
    {
        free(host);
    }
    if(path != NULL)
    {
        free(path);
    }
    return ret;
}

如果不想从头开始下载，想要从某个位置开始下载，只要修改http 头的请求参数即可。

\"Range: bytes=%d-\\r\\n\"

假设这个网络资源的大小是128字节，如果只想下载后面的64个字节内容，参数可以设置成 \"Range: bytes=64-127\\r\\n\"

版权声明

本文仅代表作者观点，不代表百度立场。
本文系作者授权百度百家发表，未经许可，不得转载。

上一篇：Unix权限这点事 下一篇：重新打包用户量过亿的开源截图软件——加入图片自动上传到图床的功能

socket实现下载http资源

版权声明

热门文章

机房智能化温湿度解决方式之POE供电以太网温湿度传感器

Sequential Monte Carlo Methods (SMC) 序列蒙特卡洛/粒子滤波/Bootstrap Filtering

HTTP状态保持的原理

Hive 系统函数及示例

CSRF的原理和防范措施

最近发表

标签列表

socket实现下载http资源

版权声明

相关阅读

JAVA基础3（异常）

快速排序

JSP第二篇【内置对象的介绍、4种属性范围、应用场景】

sudo apt-get upgrade相关错误

2018最新小象学院Python数据分析视频教程升级版第2期

惠普HP Pavilion Notebook进入BIOS页面开启Intel VT-x

热门文章

机房智能化温湿度解决方式之POE供电以太网温湿度传感器

Sequential Monte Carlo Methods (SMC) 序列蒙特卡洛/粒子滤波/Bootstrap Filtering

HTTP状态保持的原理

Hive 系统函数及示例

CSRF的原理和防范措施

最近发表

标签列表