原文:developer.android.com/training/ba…
一个网页StackOverflow.com
<?xml version="1.0" encoding="utf-8"?>
<feed xmlns="http://www.w3.org/2005/Atom" xmlns:creativeCommons="http://backend.userland.com/creativeCommonsRssModule" ...">
<title type="text">newest questions tagged android - Stack Overflow</title>
...
<entry>
...
</entry>
<entry>
<id>http://stackoverflow.com/q/9439999</id>
<re:rank scheme="http://stackoverflow.com">0</re:rank>
<title type="text">Where is my data file?</title>
<category scheme="http://stackoverflow.com/feeds/tag?tagnames=android&sort=newest/tags" term="android"/>
<category scheme="http://stackoverflow.com/feeds/tag?tagnames=android&sort=newest/tags" term="file"/>
<author>
<name>cliff2310</name>
<uri>http://stackoverflow.com/users/1128925</uri>
</author>
<link rel="alternate" href="http://stackoverflow.com/questions/9439999/where-is-my-data-file" />
<published>2012-02-25T00:30:54Z</published>
<updated>2012-02-25T00:30:54Z</updated>
<summary type="html">
<p>I have an Application that requires a data file...</p>
</summary>
</entry>
<entry>
...
</entry>
...
</feed>
创建XML解析器
import android.util.Xml;
import org.xmlpull.v1.XmlPullParser;
import org.xmlpull.v1.XmlPullParserException;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;
/**
* 解析 stackoverflow.com 的xml feed.
* 提供一个 InputStream 数据, 返回一个 List.
*/
public class StackOverflowXmlParser {
private static final String ns = null;
// We don't use namespaces
public List<Entry> parse(InputStream in) throws XmlPullParserException, IOException {
try {
XmlPullParser parser = Xml.newPullParser();
parser.setFeature(XmlPullParser.FEATURE_PROCESS_NAMESPACES, false);
parser.setInput(in, null);
// 通过调用 nextTag() 开始解析
parser.nextTag();
// readFeed() 方法会提取并处理 app 需要的数据
return readFeed(parser);
} finally {
in.close();
}
}
/**
* 寻找一个 "entry" 的标签作为递归处理整个 feed 的起点
* 当整个 feed 都被递归处理后,readFeed() 会返回一个从 feed 中提取的包含了 entry 标签内容(包括里面的数据成员)的 List。
* 然后这个 List 成为 parser 的返回值
* @param parser
* @return
* @throws XmlPullParserException
* @throws IOException
*/
private List<Entry> readFeed(XmlPullParser parser) throws XmlPullParserException, IOException {
List<Entry> entries = new ArrayList<Entry>();
parser.require(XmlPullParser.START_TAG, ns, "feed");
while (parser.next() != XmlPullParser.END_TAG) {
if (parser.getEventType() != XmlPullParser.START_TAG) {
continue;
}
String name = parser.getName();
// 寻找一个 "entry" 的标签作为递归处理整个 entry 内容的起点
if (name.equals("entry")) {
entries.add(readEntry(parser));
} else {
skip(parser);
}
}
return entries;
}
/*
* 这个类在 XML 的 feed 中代表一个单一的 entry (post) .
* 它包含的数据成员 "title," "link," 和 "summary."
*/
public static class Entry {
public final String title;
public final String link;
public final String summary;
private Entry(String title, String summary, String link) {
this.title = title;
this.summary = summary;
this.link = link;
}
}
/**
* 解析 XML , 解析一个 entry 的内容.<br/>
* 为每一个我们想要获取的标签创建一个 "read" 方法。例如 readEntry(),readTitle() 等等。<br/>
* 解析器从输入流中读取标签。当读取到 entry,title,link 或者 summary 标签时,它会为那些标签调用相应的方法。<br/>
* 否则,跳过这个标签。<br/>
*
* @param parser
* @return
* @throws XmlPullParserException
* @throws IOException
*/
private Entry readEntry(XmlPullParser parser) throws XmlPullParserException, IOException {
parser.require(XmlPullParser.START_TAG, ns, "entry");
String title = null;
String summary = null;
String link = null;
while (parser.next() != XmlPullParser.END_TAG) {
if (parser.getEventType() != XmlPullParser.START_TAG) {
continue;
}
String name = parser.getName();
if (name.equals("title")) {
title = readTitle(parser);
} else if (name.equals("summary")) {
summary = readSummary(parser);
} else if (name.equals("link")) {
link = readLink(parser);
} else {
skip(parser);
}
}
return new Entry(title, summary, link);
}
// 解析 title 标签.
private String readTitle(XmlPullParser parser) throws IOException, XmlPullParserException {
parser.require(XmlPullParser.START_TAG, ns, "title");
String title = readText(parser);
parser.require(XmlPullParser.END_TAG, ns, "title");
return title;
}
// 解析 link 标签.
private String readLink(XmlPullParser parser) throws IOException, XmlPullParserException {
String link = "";
parser.require(XmlPullParser.START_TAG, ns, "link");
String tag = parser.getName();
String relType = parser.getAttributeValue(null, "rel");
if (tag.equals("link")) {
if (relType.equals("alternate")) {
link = parser.getAttributeValue(null, "href");
parser.nextTag();
}
}
parser.require(XmlPullParser.END_TAG, ns, "link");
return link;
}
// 解析 summary 标签.
private String readSummary(XmlPullParser parser) throws IOException, XmlPullParserException {
parser.require(XmlPullParser.START_TAG, ns, "summary");
String summary = readText(parser);
parser.require(XmlPullParser.END_TAG, ns, "summary");
return summary;
}
// 提取标题和摘要标签的文本值。
private String readText(XmlPullParser parser) throws IOException, XmlPullParserException {
String result = "";
if (parser.next() == XmlPullParser.TEXT) {
result = parser.getText();
parser.nextTag();
}
return result;
}
/**
* 跳过不关心的标签<br/>
* 如果当前事件不是一个 START_TAG,抛出异常。<br/>
* 它消耗掉 START_TAG 以及接下来的所有内容,包括与开始标签配对的 END_TAG。<br/>
* 为了保证方法在遇到正确的 END_TAG 时停止,而不是在最开始的 START_TAG 后面的第一个标签,方法随时记录嵌套深度。<br/>
* @param parser
* @throws XmlPullParserException
* @throws IOException
*/
private void skip(XmlPullParser parser) throws XmlPullParserException, IOException {
if (parser.getEventType() != XmlPullParser.START_TAG) {
throw new IllegalStateException();
}
int depth = 1;
while (depth != 0) {
switch (parser.next()) {
case XmlPullParser.END_TAG:
depth--;
break;
case XmlPullParser.START_TAG:
depth++;
break;
}
}
}
}
下载xml数据,并使用解析器解析
// 实现AsyncTask用来从stackoverflow.com下载XML feed。
private class DownloadXmlTask extends AsyncTask<String, Void, String> {
@Override
protected String doInBackground(String... urls) {
try {
return loadXmlFromNetwork(urls[0]);
} catch (IOException e) {
return getResources().getString(R.string.connection_error);
} catch (XmlPullParserException e) {
return getResources().getString(R.string.xml_error);
}
}
@Override
protected void onPostExecute(String result) {
setContentView(R.layout.network_main);
// 在UI中通过WebView显示的HTML字符串
WebView myWebView = (WebView) findViewById(R.id.webview);
myWebView.loadData(result, "text/html", null);
}
}
/**
* 从stackoverflow.com获取数据并上传XML解析器,获取结果后结合HTML标记。返回的HTML字符串。
*/
private String loadXmlFromNetwork(String urlString) throws XmlPullParserException, IOException {
InputStream stream = null;
StackOverflowXmlParser stackOverflowXmlParser = new StackOverflowXmlParser();
List<StackOverflowXmlParser.Entry> entries = null;
String title = null;
String url = null;
String summary = null;
Calendar rightNow = Calendar.getInstance();
DateFormat formatter = new SimpleDateFormat("MMM dd h:mmaa");
StringBuilder htmlString = new StringBuilder();
htmlString.append("<h3>" + getResources().getString(R.string.page_title) + "</h3>");
htmlString.append("<em>" + getResources().getString(R.string.updated) + " " +
formatter.format(rightNow.getTime()) + "</em>");
try {
stream = downloadUrl(urlString);
entries = stackOverflowXmlParser.parse(stream);
// 确保应用程序使用完 InputStream 后关掉它。
} finally {
if (stream != null) {
stream.close();
}
}
// 遍历结果,加上 HTML 标记
for (StackOverflowXmlParser.Entry entry : entries) {
htmlString.append("<p><a href='");
htmlString.append(entry.link);
htmlString.append("'>" + entry.title + "</a></p>");
htmlString.append(entry.summary);
}
return htmlString.toString();
}
/**
* 给定一个URL的字符串建立了一个连接, 获取输入流input stream.
*/
private InputStream downloadUrl(String urlString) throws IOException {
URL url = new URL(urlString);
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
conn.setReadTimeout(10000 /* milliseconds */);
conn.setConnectTimeout(15000 /* milliseconds */);
conn.setRequestMethod("GET");
conn.setDoInput(true);
// 开始查询
conn.connect();
InputStream stream = conn.getInputStream();
return stream;
}