第一阶段冲刺(四)

日期:2021.05.06

作者:杨传伟

完成任务:学习爬取后台json数据,re正则匹配字符串,xpath解析,requests请求json数据。爬取爱奇艺电影片库5000+条电影数据(电影名字、播放链接、评分、播放权限)并存到数据库。

爬虫源码:

复制代码
  1 import time
  2 import traceback
  3 import requests
  4 from lxml import etree
  5 import re
  6 from bs4 import BeautifulSoup
  7 from lxml.html.diff import end_tag
  8 import json
  9 import pymysql
 10 #连接数据库  获取游标
 11 def get_conn():
 12     """
 13     :return: 连接,游标
 14     """
 15     # 创建连接
 16     conn = pymysql.connect(host="82.157.112.34",
 17                     user="root",
 18                     password="root",
 19                     db="MovieRankings",
 20                     charset="utf8")
 21     # 创建游标
 22     cursor = conn.cursor()  # 执行完毕返回的结果集默认以元组显示
 23     if ((conn != None) & (cursor != None)):
 24         print("数据库连接成功!游标创建成功!")
 25     else:
 26         print("数据库连接失败!")
 27     return conn, cursor
 28 #关闭数据库连接和游标
 29 def close_conn(conn, cursor):
 30     if cursor:
 31         cursor.close()
 32     if conn:
 33         conn.close()
 34     return 1
 35 def get_iqy():
 36     #   获取数据库总数据条数
 37     conn, cursor = get_conn()
 38     sql = "select count(*) from movieiqy"
 39     cursor.execute(sql)     #   执行sql语句
 40     conn.commit()       #   提交事务
 41     all_num = cursor.fetchall()[0][0]       #cursor 返回值的类型是一个元祖的嵌套形式 比如( ( ) ,)
 42     pagenum=int(all_num/48)+1               #这里是计算一个下面循环的起始值    每48个电影分一组
 43     # print(pagenum)
 44     print("movieiqy数据库有", all_num, "条数据!")
 45 
 46     url = "https://pcw-api.iqiyi.com/search/recommend/list?channel_id=1&data_type=1&mode=11&page_id=1&ret_num=48&session=ee4d98ebb4e8e44c8d4b14fa90615fb7"
 47     headers = {
 48         "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.93 Safari/537.36"
 49     }
 50     # response=requests.get(url=url,headers=headers)
 51     # response.encoding="utf-8"
 52     # page_text=response.text
 53     # print(page_text)
 54     """
 55     """
 56     #
 57     temp_list = []      #暂时存放单部电影的数据
 58     dataRes = []        #每次循环把单部电影数据放到这个list
 59     for i in range(1, 137):         #循环1-136 第137 json 是空的 也就是全部爬完
 60         url = "https://pcw-api.iqiyi.com/search/recommend/list?channel_id=1&data_type=1&mode=11&page_id=1&ret_num=48&session=ee4d98ebb4e8e44c8d4b14fa90615fb7"
 61         url_0 = "https://pcw-api.iqiyi.com/search/recommend/list?channel_id=1&data_type=1&mode=11&page_id="
 62         url_0 = url_0 + str(i) + "&ret_num=48&session=ad1d98bb953b7e5852ff097c088d66f2"
 63         print(url_0)        #输出拼接好的url
 64         response = requests.get(url=url_0, headers=headers)
 65         response.encoding = "utf-8"
 66         try:
 67             page_text = response.text
 68             #解析json对象
 69             json_obj = json.loads(page_text)
 70             #这里的异常捕获是因为     测试循环的次数有可能超过电影网站提供的电影数 为了防止后续爬到空的json对象报错
 71             json_list = json_obj['data']['list']
 72         except:
 73             print("捕获异常!")
 74             return dataRes          #json为空 程序结束
 75         for j in json_list:         #   开始循环遍历json串
 76             # print(json_list)
 77             name = j['name']        #找到电影名
 78             print(name)
 79             temp_list.append(name)
 80             #异常捕获,防止出现电影没有评分的现象
 81             try:
 82                 score = j['score']      #找到电影评分
 83                 print(score)
 84                 temp_list.append(score)
 85             except KeyError:
 86                 print( "评分---KeyError")
 87                 temp_list.append("iqy暂无评分")            #替换字符串
 88 
 89             link = j['playUrl']             #找到电影链接
 90             temp_list.append(link)
 91             # 解析播放状态
 92             """
 93             独播:https://www.iqiyipic.com/common/fix/site-v4/video-mark/only.png
 94             VIP:https://pic0.iqiyipic.com/common/20171106/ac/1b/vip_100000_v_601_0_21.png
 95             星钻:https://www.iqiyipic.com/common/fix/site-v4/video-mark/star-movie.png
 96             """
 97             state = []
 98             pay_text = j['payMarkUrl']         #因为播放状态只有在一个图片链接里有 所以需要使用re解析出类似vip和only(独播)的字样
 99             print(pay_text)
100             if (len(pay_text) == 0):            #如果没有这个图片链接 说明电影是免费播放
101                 state="免费"
102             else:
103                 find_state = re.compile("(.*?).png")
104                 state = re.findall(find_state, pay_text)        #正则匹配链接找到vip
105                 # print(state[0])
106 
107                 if(len(state)!=0):              #只有当链接不为空再执行
108                     # print(state)
109                     # 再次解析
110                     part_state=str(state[0])
111                     part_state=part_state.split('/')
112                     print(part_state[-1])
113                     state = part_state[-1][0:3]      #字符串分片
114                     # 这里只输出了三个字符,如果是独播,页面显示的是only,我们设置为”独播“
115                     if (state == "onl"):
116                         state = "独播"
117                     if (state == "sta"):
118                         state = "星钻"
119                     if(state == "vip"):
120                         state="VIP"
121             print(state)
122             # 添加播放状态
123             # print(state)
124             temp_list.append(state)
125             dataRes.append(temp_list)
126             # print(temp_list)
127             temp_list = []
128 
129         print('___________________________')
130     return dataRes
131 
132 def insert_iqy():
133     cursor = None
134     conn = None
135     try:
136         count=0
137         list = get_iqy()
138         print(f"{time.asctime()}开始插入爱奇艺电影数据")
139         conn, cursor = get_conn()
140         sql = "insert into movieiqy (id,name,score,path,state) values(%s,%s,%s,%s,%s)"
141         for item in list:
142             print(item)
143             count = count + 1
144             if (count % 48 == 0):
145                 print('___________________________')
146             #异常捕获,防止数据库主键冲突
147             try:
148                 cursor.execute(sql, [0, item[0], item[1], item[2], item[3] ])
149             except pymysql.err.IntegrityError:
150                 print("重复!跳过!")
151 
152         conn.commit()  # 提交事务 update delete insert操作
153         print(f"{time.asctime()}插入爱奇艺电影数据完毕")
154     except:
155         traceback.print_exc()
156     finally:
157         close_conn(conn, cursor)
158     return;
159 
160 if __name__ == '__main__':
161     # get_iqy()
162     insert_iqy()
复制代码

截图示例:

 

 

 

 

 


 

5.6 李楠

今日实现了我的页面的,想看,在看,看过的页面内容的显示,原先想使用Fragment的嵌套,
但是没有成功,于是就给每个radiobutton绑定了一个事件,监听radiogroup是否发生改变,然后返回对应的数据,
注意不能使用activity要在fragment中编写,然后返回视图:

Fragment部分

复制代码
 1 package com.example.cloudlibrary.Fragment;
 2 
 3 import androidx.annotation.NonNull;
 4 import androidx.annotation.Nullable;
 5 import androidx.fragment.app.Fragment;
 6 import androidx.fragment.app.FragmentActivity;
 7 import androidx.viewpager.widget.ViewPager;
 8 
 9 import android.os.Bundle;
10 import android.view.LayoutInflater;
11 import android.view.View;
12 import android.view.ViewGroup;
13 import android.widget.ListView;
14 import android.widget.RadioButton;
15 import android.widget.RadioGroup;
16 
17 import com.example.cloudlibrary.Adapter.MyPagerListAdapter;
18 import com.example.cloudlibrary.Data.ListData;
19 import com.example.cloudlibrary.R;
20 
21 import java.util.ArrayList;
22 import java.util.List;
23 
24 public class MyPageFragment extends Fragment implements RadioGroup.OnCheckedChangeListener{
25     private RadioGroup my_pager_group;
26     private RadioButton want_look;
27     private RadioButton now_look;
28     private RadioButton have_look;
29     private List<ListData> list_data=new ArrayList<>();
30     private ListView mypager_like_list;
31     private ListData listData;
32     private View view;
33     private MyPagerListAdapter myPagerListAdapter;
34 
35 
36     public static final int PAGE_ONE = 0;
37     public static final int PAGE_TWO = 1;
38     public static final int PAGE_THREE = 2;
39     public MyPageFragment(){
40     }
41     @Override
42     public View onCreateView(@NonNull LayoutInflater inflater, @Nullable ViewGroup container, @Nullable Bundle savedInstanceState) {
43         view = inflater.inflate(R.layout.activity_my_page, container, false);
44         my_pager_group = (RadioGroup) view.findViewById(R.id.my_pager_group);
45         want_look = (RadioButton) view.findViewById(R.id.want_look);
46         now_look = (RadioButton) view.findViewById(R.id.now_look);
47         have_look = (RadioButton) view.findViewById(R.id.have_look);
48         my_pager_group.setOnCheckedChangeListener(this);
49         RadioButton[] rbs = new RadioButton[3];
50         rbs[0] =want_look;
51         rbs[1] = now_look;
52         rbs[2] = have_look;
53         return view;
54     }
55 
56     @Override
57     public void onCheckedChanged(RadioGroup group, int checkedId) {
58         switch (checkedId) {
59             case R.id.want_look:
60                 list_data=new ArrayList<>();
61                 listData=new ListData("head1","吹响吧,上低音号!");
62                 list_data.add(listData);
63                 //list_list_Data= dataDao.queryData("",query_stuid.getText().toString());
64                 mypager_like_list=(ListView)view.findViewById(R.id.mypager_like_list);
65                 myPagerListAdapter=new MyPagerListAdapter(getContext(),list_data);
66                 mypager_like_list.setAdapter(myPagerListAdapter);
67                 break;
68             case R.id.now_look:
69                 list_data=new ArrayList<>();
70                 listData=new ListData("head2","AIR");
71                 list_data.add(listData);
72                 //list_list_Data= dataDao.queryData("",query_stuid.getText().toString());
73                 mypager_like_list=(ListView)view.findViewById(R.id.mypager_like_list);
74                 myPagerListAdapter=new MyPagerListAdapter(getContext(),list_data);
75                 mypager_like_list.setAdapter(myPagerListAdapter);
76                 break;
77             case R.id.have_look:
78                 list_data=new ArrayList<>();
79                 listData=new ListData("head3","百变小樱");
80                 list_data.add(listData);
81                 //list_list_Data= dataDao.queryData("",query_stuid.getText().toString());
82                 mypager_like_list=(ListView)view.findViewById(R.id.mypager_like_list);
83                 myPagerListAdapter=new MyPagerListAdapter(getContext(),list_data);
84                 mypager_like_list.setAdapter(myPagerListAdapter);
85                 break;
86         }
87     }
88 }
复制代码

listview adapter部分:

复制代码
 1 package com.example.cloudlibrary.Adapter;
 2 
 3 import android.content.Context;
 4 import android.view.LayoutInflater;
 5 import android.view.View;
 6 import android.view.ViewGroup;
 7 import android.widget.BaseAdapter;
 8 import android.widget.ImageView;
 9 import android.widget.ListAdapter;
10 import android.widget.TextView;
11 
12 import com.example.cloudlibrary.Data.ListData;
13 import com.example.cloudlibrary.R;
14 
15 import org.w3c.dom.Text;
16 
17 import java.util.ArrayList;
18 import java.util.List;
19 
20 public class MyPagerListAdapter extends BaseAdapter {
21     private List<ListData> list_data=new ArrayList<>();
22     private Context context;
23     public MyPagerListAdapter(Context context, List<ListData> list_data){
24         this.context=context;
25         this.list_data=list_data;
26     }
27     @Override
28     public int getCount() {
29         return list_data.size();
30     }
31 
32     @Override
33     public Object getItem(int position) {
34         return null;
35     }
36 
37     @Override
38     public long getItemId(int position) {
39         return 0;
40     }
41 
42     @Override
43     public View getView(int position, View convertView, ViewGroup parent) {
44         if(convertView==null)
45         {
46             convertView= LayoutInflater.from(context).inflate(R.layout.mypager_like_list,null);
47         }
48         ImageView picture_list=(ImageView)convertView.findViewById(R.id.picture_list);
49         TextView name_list=(TextView)convertView.findViewById(R.id.name_list);
50         ListData listData=list_data.get(position);
51         name_list.setText(listData.getName());
52         switch (listData.getImg()){
53             case "head1":
54                 picture_list.setImageResource(R.mipmap.head1);
55                 break;
56             case "head2":
57                 picture_list.setImageResource(R.mipmap.head2);
58                 break;
59             case "head3":
60                 picture_list.setImageResource(R.mipmap.head3);
61                 break;
62         }
63         return convertView;
64     }
65 }
复制代码

 

xml部分:

复制代码
  1 <?xml version="1.0" encoding="utf-8"?>
  2 <LinearLayout xmlns:android="http://schemas.android.com/apk/res/android"
  3     xmlns:app="http://schemas.android.com/apk/res-auto"
  4     xmlns:tools="http://schemas.android.com/tools"
  5     android:layout_width="match_parent"
  6     android:layout_height="match_parent"
  7     android:orientation="vertical">
  8 
  9     <LinearLayout
 10         android:layout_width="match_parent"
 11         android:layout_height="80dp"
 12         android:orientation="horizontal">
 13 
 14         <ImageView
 15             android:id="@+id/first_head_picture"
 16             android:layout_width="60dp"
 17             android:layout_height="60dp"
 18             android:layout_marginTop="10dp"
 19             android:layout_marginLeft="10dp"
 20             android:src="@mipmap/headpictrue"></ImageView>
 21 
 22         <View
 23             android:layout_width="2px"
 24             android:layout_height="45dp"
 25             android:background="@color/login_line_color"
 26             android:layout_marginTop="20dp"
 27             android:layout_marginLeft="20dp"/>
 28 
 29         <LinearLayout
 30             android:layout_width="wrap_content"
 31             android:layout_height="wrap_content"
 32             android:orientation="vertical">
 33 
 34             <TextView
 35                 android:id="@+id/my_name"
 36                 android:layout_width="wrap_content"
 37                 android:layout_height="wrap_content"
 38                 android:layout_marginTop="10dp"
 39                 android:textSize="20dp"
 40                 android:layout_marginLeft="30dp"
 41                 android:textColor="#EEAAFF"
 42                 android:text="风吹过半夏"></TextView>
 43 
 44 
 45             <TextView
 46                 android:id="@+id/my_phone"
 47                 android:layout_width="wrap_content"
 48                 android:layout_height="wrap_content"
 49                 android:layout_marginTop="10dp"
 50                 android:textSize="20dp"
 51                 android:layout_marginLeft="30dp"
 52                 android:textColor="#EEAAFF"
 53                 android:text="157****5171"></TextView>
 54 
 55         </LinearLayout>
 56 
 57     </LinearLayout>
 58 
 59     <LinearLayout
 60         android:layout_width="match_parent"
 61         android:layout_height="match_parent"
 62         android:orientation="vertical">
 63 
 64         <LinearLayout
 65             android:layout_width="match_parent"
 66             android:layout_height="wrap_content"
 67             android:orientation="horizontal">
 68 
 69             <TextView
 70                 android:layout_width="wrap_content"
 71                 android:layout_height="wrap_content"
 72                 android:text="我的收藏"
 73                 android:textSize="20dp"></TextView>
 74 
 75             <RadioGroup
 76                 android:id="@+id/my_pager_group"
 77                 android:layout_width="match_parent"
 78                 android:layout_height="30dp"
 79                 android:layout_alignParentBottom="true"
 80                 android:background="#ffffff"
 81                 android:orientation="horizontal">
 82 
 83                 <RadioButton android:id="@+id/want_look"
 84                     android:layout_width="wrap_content"
 85                     android:layout_height="wrap_content"
 86                     style="@style/tab_menu_item"
 87                     android:text="想看"></RadioButton>
 88 
 89                 <RadioButton android:id="@+id/now_look"
 90                     android:layout_width="wrap_content"
 91                     android:layout_height="wrap_content"
 92                     style="@style/tab_menu_item"
 93                     android:text="在看"></RadioButton>
 94 
 95                 <RadioButton android:id="@+id/have_look"
 96                     android:layout_width="wrap_content"
 97                     android:layout_height="wrap_content"
 98                     style="@style/tab_menu_item"
 99                     android:text="看过"></RadioButton>
100 
101             </RadioGroup>
102 
103         </LinearLayout>
104 
105         <View
106             android:id="@+id/div_tab_bar"
107             android:layout_width="match_parent"
108             android:layout_height="2px"
109             android:layout_above="@id/main_group"
110             android:background="#DFDBDB" />
111 
112         <ListView android:id="@+id/mypager_like_list"
113             android:layout_width="match_parent"
114             android:layout_height="wrap_content">
115         </ListView>
116 
117     </LinearLayout>
118 
119 
120 </LinearLayout>
复制代码

 

 

 

 

 


 

 

5.6 章英杰

任务进度:完成了根据电影分类进行多条件筛选功能。可根据类型、年份和地区进行多条件筛选。

产品页面:

电影分类部分主要代码:

复制代码
 1 <!--电影分类-->
 2     <div id="classfiy">
 3         <aside>
 4             <i>类型:</i>
 5             <div>
 6                 <span>全部</span>
 7                 <span>喜剧</span>
 8                 <span>动作</span>
 9                 <span>爱情</span>
10                 <span>惊悚</span>
11                 <span>犯罪</span>
12                 <span>悬疑</span>
13                 <span>战争</span>
14                 <span>科幻</span>
15                 <span>动画</span>
16                 <span>恐怖</span>
17                 <span>家庭</span>
18                 <span>传记</span>
19                 <span>冒险</span>
20                 <span>奇幻</span>
21                 <span>武侠</span>
22                 <span>历史</span>
23             </div>
24         </aside>
25         <aside>
26             <i>年份:</i>
27             <div>
28                 <span>全部</span>
29                 <span>2021</span>
30                 <span>2020</span>
31                 <span>2019</span>
32                 <span>2018</span>
33                 <span>2017</span>
34                 <span>2016</span>
35                 <span>2015</span>
36                 <span>2011-2014</span>
37                 <span>2006-2010</span>
38                 <span>2000-2005</span>
39                 <span>90年代</span>
40                 <span>80年代</span>
41                 <span>其他</span>
42             </div>
43         </aside>
44         <aside>
45             <i>地区:</i>
46             <div>
47                 <span>全部</span>
48                 <span>内地</span>
49                 <span>香港</span>
50                 <span>美国</span>
51                 <span>欧洲</span>
52                 <span>台湾</span>
53                 <span>日本</span>
54                 <span>韩国</span>
55                 <span>印度</span>
56                 <span>泰国</span>
57                 <span>英国</span>
58                 <span>法国</span>
59                 <span>德国</span>
60                 <span>加拿大</span>
61                 <span>西班牙</span>
62                 <span>意大利</span>
63                 <span>其他</span>
64             </div>
65         </aside>
66         <div class="last">已选择:
67             <div id="yi"></div>
68         </div>
69     </div>
70     <script>
71         var oDivLength = [];
72         var div = document.getElementsByTagName('div');
73         var divSpan = document.getElementsByTagName('span');
74         //判断有几个列表
75         for (var i = 0; i < div.length; i++) {
76             div[i].index = i;
77         }
78         for (var i = 0; i < divSpan.length; i++) {
79             divSpan[i].onclick = function() {
80                 oDivLength[this.parentElement.index] = this.innerText;
81                 var oChild = this.parentElement.children;
82                 for (var j = 0; j < oChild.length; j++) {
83                     oChild[j].className = '';
84                 }
85                 this.className = 'mystyle'; //已选中的当前列的当前元素添加样式
86                 document.getElementById('yi').innerHTML = '';
87                 for (var m = 0; m < oDivLength.length; m++) { //放到已选择里面
88                     if (oDivLength[m] == '' || oDivLength[m] !== undefined) {
89                         var para = document.createElement("span");
90                         var node = document.createTextNode(oDivLength[m]);
91                         para.appendChild(node);
92                         document.getElementById('yi').appendChild(para);
93                     }
94                 }
95             }
96         }
97     </script>
98     <!--电影分类模块结束-->
复制代码

 

 

任务看板

posted @ 2021-05-06 11:39  CherriesOvO  阅读(98)  评论(0编辑  收藏  举报