Arronlong / httpclientutil

该项目基于HttpClient-4.4.1封装的一个工具类。提供了更为简单的GET、POST及上传下载等功能。同时支持插件式配置Http-Header(包含自定义header)、配置SSL和Proxy等。
https://blog.csdn.net/xiaoxian8023/category_9264781.html
Apache License 2.0
1.41k stars 719 forks source link

请教为何一直都是返回错误页面 #4

Closed kleen closed 8 years ago

Arronlong commented 8 years ago

1.__VIEWSTATE 这个参数,提交表单时未传值 2.访问最终页面要带上cookie,直接访问http://www.szcredit.com.cn/web/gspt/newGSPTList.aspx。即可

下面是我使用了最新版本的HttpClientUtil写的代码:

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.http.Header;
import org.apache.http.cookie.Cookie;

import com.tgb.ccl.http.common.HttpConfig;
import com.tgb.ccl.http.common.HttpCookies;
import com.tgb.ccl.http.common.HttpHeader;
import com.tgb.ccl.http.exception.HttpProcessException;
import com.tgb.ccl.http.httpclient.HttpClientUtil;
public class Test {
    public static void main(String[] args) throws HttpProcessException {

        //主页
        String newIndex = "http://www.szcredit.com.cn/web/newIndex.aspx";
        //验证码地址
        String checkCodeUrl = "http://www.szcredit.com.cn/web/WebPages/Member/CheckCode.aspx?"+Math.random();
        //结果显示页面
        String url = "http://www.szcredit.com.cn/web/gspt/newGSPTList.aspx";        

        // 定义cookie存储
        HttpCookies cookies = HttpCookies.custom();
        //设置header信息
        Header[] headers = HttpHeader.custom().userAgent("User-Agent: Mozilla/5.0").build();

        //设置请求参数(最新版本推荐使用HttpConfig来封装请求参数)
        HttpConfig config = HttpConfig.custom().headers(headers).context(cookies.getContext()).encoding("UTF-8");

        //配置Fiddler进行监控
        //HttpClient client = HCB.custom().proxy("localhost", 8888).build();
        //config.client(client);

        // ①. 访问主页,获取表单参数
        String newIndexHtml = HttpClientUtil.get(config.url(newIndex));

        Map<String, Object> map = new HashMap<String, Object>();
        map.put("__ASYNCPOST", "true");
        map.put("__EVENTARGUMENT", "");
        map.put("__EVENTTARGET", "");

        // System.out.println("__EVENTVALIDATION:"+regex("id=\"__EVENTVALIDATION\"\\s*value=\"([^\"]*)\"", newIndexHtml)[0]);
        // System.out.println("__VIEWSTATE:"+regex("id=\"__VIEWSTATE\"\\s*value=\"([^\"]*)\"", newIndexHtml)[0]);
        // System.out.println("__VIEWSTATEGENERATOR:"+regex("id=\"__VIEWSTATEGENERATOR\"\\s*value=\"([^\"]*)\"", newIndexHtml)[0]);

        map.put("__EVENTVALIDATION", regex("id=\"__EVENTVALIDATION\"\\s*value=\"([^\"]*)\"", newIndexHtml)[0]);
        map.put("__VIEWSTATE", regex("id=\"__VIEWSTATE\"\\s*value=\"([^\"]*)\"", newIndexHtml)[0]);//需要传这个值
        map.put("__VIEWSTATEGENERATOR", regex("id=\"__VIEWSTATEGENERATOR\"\\s*value=\"([^\"]*)\"", newIndexHtml)[0]);
        map.put("hfuser", "");
        map.put("ibtnSearch.x", "77");
        map.put("ibtnSearch.y", "32");
        map.put("lbKeyword", "");
        map.put("smObj", "upGetCheckCode|ibtnSearch");
        map.put("txtKeyword", "440306115180944");

        //②. 获取验证码图片及答案
        HttpClientUtil.get(config.url(checkCodeUrl));
        for (Cookie cookie : cookies.getCookieStore().getCookies()) {
            if (cookie.getName().equals("CheckCode")) {
                map.put("txtCheckCode", cookie.getValue());
                //System.out.println(cookie.getValue());
            }
        }

        // ③. 发送登录请求
        HttpClientUtil.post(config.url(newIndex).map(map));

        //④. 获取结果
        String result = HttpClientUtil.get(config.url(url));        
        //System.out.println(result);

        System.out.println("企业名称:"+regex("<td style=\"width:35%;\"><a href='[^']*' target=\"_blank\">([^<]*)</a>", result)[0]);
        System.out.println("注册号:"+regex("<td style=\"width:15%;\">([^<]*)</td>", result)[0]);
        System.out.println("企业类型:"+regex("<td style=\"width:20%;\">([^<]*)</td>", result)[0]);
        System.out.println("成立日期:"+regex("<td style=\"width:15%; text-align:center;\">([^<]*)</td>", result)[0]);
        System.out.println("详细资料url:"+"http://www.szcredit.com.cn/web/gspt/"+regex("<td style=\"width:35%;\"><a href='([^']*)' target=\"_blank\">[^<]*</a>", result)[0]);
    }

    /**
     * 通过正则表达式获取内容
     * 
     * @param regex     正则表达式
     * @param from      原字符串
     * @return
     */
    public static String[] regex(String regex, String from){
        Pattern pattern = Pattern.compile(regex); 
        Matcher matcher = pattern.matcher(from);
        List<String> results = new ArrayList<String>();
        while(matcher.find()){
            for (int i = 0; i < matcher.groupCount(); i++) {
                results.add(matcher.group(i+1));
            }
        }
        return results.toArray(new String[]{});
    }
}