1 <?php
2 /**
3 * 读取的xml的格式
4 * <urlset>
5 * <url>
6 * <loc>http://www.51buy.com/0.html</loc>
7 * <priority>1.0</priority>
8 * <lastmod>Wed, 12 Jun 2013 21:37:52 +0800</lastmod>
9 * <changefreq>Always</changefreq>
10 * </url>
11 * <url>
12 * <loc>http://www.baidu.com/1.html</loc>
13 * <priority>1.0</priority>
14 * <lastmod>Tue, 11 Jun 2013 15:39:17 +0800</lastmod>
15 * <changefreq>Always</changefreq>
16 * </url>
17 * <url>
18 * <loc>http://www.jd.com/2.html</loc>
19 * <priority>1.0</priority>
20 * <lastmod>Tue, 11 Jun 2013 01:21:46 +0800</lastmod>
21 * <changefreq>Always</changefreq>
22 * </url>
23 * </urlset>
24 */
25 //1读取xml
26 header("Content-type: text/html; charset=utf-8");
27 // 首先要建一个DOMDocument对象
28 $xml = new DOMDocument();
29 // 加载Xml文件
30 $xml->load("http://www.baidu.com/sitemap.xml");
31 // 获取所有的post标签
32 $postDom = $xml->getElementsByTagName("url");
33 // 循环遍历post标签
34 $array = array();
35 foreach($postDom as $post){
36 // 获取Title标签Node
37 $title = $post->getElementsByTagName("loc");
38 $url = $title->item(0)->nodeValue;
39 //替换数组中某个值为指定字符串(没有需要的此行可以删除)
40 $url1 = str_replace(array("w.baidu.com",'book.baidu.com','iworld.baidu.com'),"www.baidu.com/nihao",$url);
41 //priority
42 $priority= $post->getElementsByTagName("priority")->item(0)->nodeValue;
43 //lastmod
44 $lastmod= $post->getElementsByTagName("lastmod")->item(0)->nodeValue;
45 //changefreq
46 $changefreq= $post->getElementsByTagName("changefreq")->item(0)->nodeValue;
47 $article_array = array('loc'=>$url1, 'priority'=>$priority, 'lastmod'=>$lastmod, 'changefreq'=>$changefreq);
48 $array[] = $article_array;
49
50 }
51 //echo "<pre>";
52 //var_dump($array);
53 //print_r($array);
54 //删除XML文件
55 /*
56 $file = "2222.html";//此处
57 if (!unlink($file))
58 {
59 echo ("Error deleting $file");
60 }
61 else
62 {
63 echo ("Deleted $file");
64 }
65 */
66
67 /**
68 * 写xml的数组的形式
69 * Array
70 (
71 [0] => Array
72 (
73 [loc] => http://www.51buy.com/0.html
74 [priority] => 1.0
75 [lastmod] => Wed, 12 Jun 2013 21:37:52 +0800
76 [changefreq] => Always
77 )
78
79 [1] => Array
80 (
81 [loc] => http://www.51buy.com/0.html
82 [priority] => 1.0
83 [lastmod] => Tue, 11 Jun 2013 15:39:17 +0800
84 [changefreq] => Always
85 )
86
87 [2] => Array
88 (
89 [loc] => http://www.51buy.com/0.html
90 [priority] => 1.0
91 [lastmod] => Tue, 11 Jun 2013 01:21:46 +0800
92 [changefreq] => Always
93 )
94 )
95 */
96 //2写xml
97 $dom = new DOMDocument('1.0', 'UTF-8');
98 $dom->formatOutput = true;
99 $rootelement = $dom->createElement("urlset");
100 foreach ($array as $key=>$value){
101 $article = $dom->createElement("url");
102 //$article = $dom->createElement("article", $key);
103 $loc = $dom->createElement("loc", $value['loc']);
104 $priority = $dom->createElement("priority", $value['priority']);
105 $lastmod = $dom->createElement("lastmod", $value['lastmod']);
106 $changefreq = $dom->createElement("changefreq", $value['changefreq']);
107 $article->appendChild($loc);
108 $article->appendChild($priority);
109 $article->appendChild($lastmod);
110 $article->appendChild($changefreq);
111 $rootelement->appendChild($article);
112 }
113 $dom->appendChild($rootelement);
114 $filename = "./test.xml";
115 echo 'XML文件大小' . $dom->save($filename) . '字节';