爬取HTML
import java.io.*;
import java.net.*;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.List;
import java.util.stream.Collectors;
public class GetHtmlContent {
public static void main(String[] args) throws IOException {
File file =new File("D:\\hello","hao3.txt");
file.createNewFile();
wirteToFile(file);
}
private static void wirteToFile(File file) throws IOException {
FileOutputStream bos = new FileOutputStream(file);
System.setOut(new PrintStream(bos));
InetAddress inetAddress = InetAddress.getByName("www.hao123.com");
System.out.println("网站地址:"+inetAddress);
Socket s = new Socket();
SocketAddress sa = new InetSocketAddress(inetAddress,80);
s.connect(sa,10000);
PrintWriter pw = new PrintWriter(new OutputStreamWriter(s.getOutputStream(),StandardCharsets.UTF_8));
StringBuffer sb = new StringBuffer();
sb.append("GET /index.html HTTP/1.1\r\n");
sb.append("Host:www.hao123.com\r\n");
sb.append("Connection:Keep-Alive\r\n");
sb.append("\r\n");
pw.write(sb.toString());
pw.flush();
BufferedReader br = new BufferedReader(new InputStreamReader(s.getInputStream(),StandardCharsets.UTF_8));
br.lines().forEach(System.out::println);
}
}