//
// main.swift
// C150805_libxml2r2
// http://git.oschina.net/yao_yu/Swift2015/tree/master/C150805_libxml2r2?dir=1&filepath=C150805_libxml2r2&oid=f80a7498226526b991e7913298c15cd38480aea5&sha=c073af33d0534a10098bb8fcc0706c2fd489dc3f
//
// Created by yao_yu on 15/8/5.
// Copyright © 2015年 yao_yu. All rights reserved.
//
import Foundation
/* ---------- 扩展 ---------- */
extension NSString{
convenience init?(urlString:String, encoding:NSStringEncoding) {
let url = NSURL(string: urlString)
do {
try self.init(contentsOfURL: url!, encoding: encoding)
} catch {}
}
}
extension String {
init?(XMLChar char: UnsafePointer<xmlChar>){
self.init()
if char != nil {
self = String.fromCString(UnsafePointer<CChar>(char))!
}
}
}
/* ---------- XML节点 ---------- */
class XMLNode {
var xmlDoc:xmlDocPtr = nil
var xmlNode:xmlNodePtr = nil
init(node:xmlNodePtr, document:xmlDocPtr) {
self.xmlNode = node
self.xmlDoc = document
}
convenience init(document:xmlDocPtr) {
self.init(node:xmlDocGetRootElement(document), document:document)
}
lazy var rawContent:String? = {
return XMLNodeGetContent(self.xmlNode)
// return XMLNodeGetString(self.xmlDoc, xmlNode: self.xmlNode)
}()
lazy var children:[XMLNode] = {
return self.xmlNodes2XMLNodes(XMLNodeGetChildren(self.xmlNode))
}()
lazy var attributes: [String: String] = {
return XMLNodeGetAttributes(self.xmlNode)
}()
subscript(key:String) -> String? {
return attributes[key]
}
private func xmlNodes2XMLNodes(nodes:[xmlNodePtr]) -> [XMLNode] {
var xmlNodes = [XMLNode]()
for node in nodes{
xmlNodes.append(XMLNode(node: node, document: xmlDoc))
}
return xmlNodes
//下面的代码引发:Command failed due to signal: Abort trap: 6
//return nodes.map{[unowned self] in XMLNode(node:$0, document:self.xmlDoc)}
}
}
extension XMLNode {
func xPath(xpath: String) -> [XMLNode] {
return xmlNodes2XMLNodes(XMLFindXPath(self.xmlDoc, xPath: xpath))
}
}
/* ---------- libxml2读取工具函数 ---------- */
func XMLNodeGetString(doc:xmlDocPtr, xmlNode:xmlNodePtr) -> String? {
let contentChars = xmlNodeListGetString(doc, xmlNode, 1)
if contentChars == nil { return nil }
let contentString = String(XMLChar: contentChars)
free(contentChars)
assert(contentString != nil, "XMLNodeGetString: 值转换不成功")
return contentString
}
func XMLNodeGetContent(xmlNode:xmlNodePtr) -> String? {
let contentChars = xmlNodeGetContent(xmlNode)
if contentChars == nil { return nil }
let contentString = String(XMLChar: contentChars)
free(contentChars)
assert(contentString != nil, "XMLNodeGetContent: 值转换不成功")
return contentString
}
func XMLNodeGetChildren(xmlNode: xmlNodePtr) -> [xmlNodePtr] {
var children = [xmlNodePtr]()
for var childNodePointer = xmlNode.memory.children;
childNodePointer != nil;
childNodePointer = childNodePointer.memory.next
{
if xmlNodeIsText(childNodePointer) == 0 {
children.append(childNodePointer)
}
}
return children
}
func XMLNodeGetAttributes(xmlNode: xmlNodePtr) -> [String: String] {
var result:[String: String] = [String: String]()
for var attribute: xmlAttrPtr = xmlNode.memory.properties;
attribute != nil;
attribute = attribute.memory.next
{
if let key:String = String(XMLChar: attribute.memory.name) {
if let value:String = XMLNodeGetContent(attribute.memory.children) {
result[key] = value
} else {
result[key] = ""
}
} else {
print((">>>>>>>>>>>>>>>>>>>>>>>>错误:", String(XMLChar: attribute.memory.name)))
}
}
return result
}
func XMLNodeGetAttribute(xmlNode: xmlNodePtr, key: String) -> String? {
for var attribute: xmlAttrPtr = xmlNode.memory.properties;
attribute != nil;
attribute = attribute.memory.next
{
if key == String(XMLChar: attribute.memory.name) {
return XMLNodeGetContent(attribute.memory.children)
}
}
return nil
}
func XMLFindXPath(xmlDoc:xmlDocPtr, xPath: String) -> [xmlNodePtr] {
let xPathContext = xmlXPathNewContext(xmlDoc)
if xPathContext == nil {
return []
}
xPathContext.memory.node = nil
let xPathObject = xmlXPathEvalExpression(UnsafePointer<xmlChar>(xPath.cStringUsingEncoding(NSUTF8StringEncoding)!), xPathContext)
xmlXPathFreeContext(xPathContext)
if xPathObject == nil {
return []
}
let nodeSet = xPathObject.memory.nodesetval
if nodeSet == nil || nodeSet.memory.nodeNr == 0 || nodeSet.memory.nodeTab == nil {
xmlXPathFreeObject(xPathObject)
return []
}
var resultNodes = [xmlNodePtr]()
for i in 0 ..< Int(nodeSet.memory.nodeNr) {
resultNodes.append(nodeSet.memory.nodeTab[i])
}
xmlXPathFreeObject(xPathObject)
return resultNodes
}
func XMLReadNSData(data:NSData?, encoding:NSStringEncoding = NSUTF8StringEncoding, isXML:Bool = false) -> xmlDocPtr? {
if let data = data {
let cBuffer = UnsafePointer<CChar>(data.bytes)
let cSize = CInt(data.length)
//
// let cfEncoding = CFStringConvertNSStringEncodingToEncoding(encoding)
// let cfEncodingAsString:CFStringRef = CFStringConvertEncodingToIANACharSetName(cfEncoding)
// let cEncoding:UnsafePointer<CChar> = CFStringGetCStringPtr(cfEncodingAsString, CFStringEncoding(0))
if isXML {
let options = CInt(XML_PARSE_RECOVER.rawValue)
return xmlReadMemory(cBuffer, cSize, nil, nil, options)
} else {
let options = CInt(HTML_PARSE_RECOVER.rawValue | HTML_PARSE_NOWARNING.rawValue | HTML_PARSE_NOERROR.rawValue)
return htmlReadMemory(cBuffer, cSize, nil, nil, options)
}
}
return nil
}
let GB18030_2000_Encoding = CFStringConvertEncodingToNSStringEncoding(CFStringEncoding(CFStringEncodings.GB_18030_2000.rawValue))
/* ---------- 测试代码 ---------- */
class CElapseTime {
var startTime:NSDate
var prompt:String
var unsed:Bool = false
init(prompt:String) {
self.startTime = NSDate()
self.prompt = prompt
}
var newprompt:String {
return "\(prompt)耗时:\(NSDate().timeIntervalSinceDate(startTime))"
}
}
func testParseSina() {
var sURL:String
var encoding:UInt
(sURL,encoding) = ("http://www.baidu.com", NSUTF8StringEncoding)
print("<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\(sURL)")
var timer = CElapseTime(prompt: "读取网页")
//let sContent = NSString(urlString:sURL, encoding: encoding)
var sContent:NSString? = nil
do{
try sContent = NSString(contentsOfFile: "/Volumes/Data/Document/Test/sample.txt", encoding: NSUTF8StringEncoding)
} catch {
}
print(timer.newprompt)
let sTimer1 = timer.newprompt
timer = CElapseTime(prompt: "数据解析")
if let doc = XMLReadNSData(sContent?.dataUsingEncoding(NSUTF8StringEncoding)){
let rootNode = XMLNode(document: doc)
let findNodes = rootNode.xPath("//div")
for childNode in findNodes {
autoreleasepool{
let _ = (childNode.attributes, childNode.rawContent)
}
// if let content = childNode.rawContent {
// print(content)
// }
}
print(findNodes.count)
}
print(sTimer1)
print(timer.newprompt)
}
testParseSina()