using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.Windows.Forms;
using Newtonsoft.Json;
using System.Net;
using System.IO;
using System.Text.RegularExpressions;
using System.Security.Cryptography.X509Certificates;
using System.Net.Security;
namespace _58GatherProgress
{
public partial class Form1 : Form
{
public Form1()
{
InitializeComponent();
}
private void button1_Click(object sender, EventArgs e)
{
string url = textBox1.Text;
if (!string.IsNullOrEmpty(url))
{
string result = "";
try
{
result = Get(url);
}
catch (Exception ex)
{
result = ex.Message;
}
List<InfoModel> list = GetHtml(result);
ShowData(list);
}
}
private string GetRequest(string _url,string postData)
{
ASCIIEncoding encoding = new ASCIIEncoding();
byte[] data = encoding.GetBytes(postData);
HttpWebRequest myRequest = (HttpWebRequest)WebRequest.Create(_url);
myRequest.Accept = "text/html, */*; q=0.01";
myRequest.Method = "POST";
myRequest.KeepAlive = true;
myRequest.Timeout = 10000;
myRequest.ContentType = "application/x-www-form-urlencoded; charset=UTF-8";
myRequest.ContentLength = data.Length;
myRequest.UserAgent = "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Mobile Safari/537.36";
Stream newStream = myRequest.GetRequestStream();
newStream.Write(data, 0, data.Length);
newStream.Close();
HttpWebResponse myResponse = (HttpWebResponse)myRequest.GetResponse();
StreamReader reader = new StreamReader(myResponse.GetResponseStream(), Encoding.UTF8);
string result = reader.ReadToEnd();
return result;
}
public static bool CheckValidationResult(object sender, X509Certificate certificate, X509Chain chain, SslPolicyErrors errors)
{
return true;
}
public string Get(string url)
{
string result = "";
HttpWebRequest request = null;
HttpWebResponse response = null;
try
{
ServicePointManager.DefaultConnectionLimit = 200;
if (url.StartsWith("https", StringComparison.OrdinalIgnoreCase))
{
ServicePointManager.ServerCertificateValidationCallback =
new RemoteCertificateValidationCallback(CheckValidationResult);
ServicePointManager.SecurityProtocol = SecurityProtocolType.Ssl3 | SecurityProtocolType.Tls | SecurityProtocolType.Tls11 | SecurityProtocolType.Tls12;
}
request = (HttpWebRequest)WebRequest.Create(url);
request.UserAgent = "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Mobile Safari/537.36";
request.Method = "GET";
request.Headers.Add(HttpRequestHeader.Cookie, "f=n; ABTESTCOOKIEVALUE=1; HISTORY_CATE_IDS=70124%2C246%7C%E4%BA%8C%E6%89%8B%E8%AE%BE%E5%A4%87%7C1%7C246; cookieuid=e4bde6aa-74aa-408a-b2cc-121b786d09c7; device=m; id58=CocI02Pq6N9nzcH1CRH2Ag==; sessionid=55affd17-8ba3-4661-8e8d-19b1dc4d675f");
response = (HttpWebResponse)request.GetResponse();
StreamReader sr = new StreamReader(response.GetResponseStream(), Encoding.UTF8);
result = sr.ReadToEnd().Trim();
sr.Close();
}
catch (WebException e)
{
response = (HttpWebResponse)e.Response;
StreamReader sr = new StreamReader(response.GetResponseStream(), Encoding.UTF8);
result = sr.ReadToEnd().Trim();
sr.Close();
}
catch (Exception e)
{
result = "Exception:" + e.Message;
throw e;
}
finally
{
if (response != null)
{
response.Close();
}
if (request != null)
{
request.Abort();
}
}
return result;
}
private void ShowData(List<InfoModel> infolist)
{
dataGridView1.DataSource = infolist;
}
public List<InfoModel> GetHtml(string infoCont)
{
if (!string.IsNullOrEmpty(infoCont))
{
string[] values = infoCont.Split(new string[] { "<li class=\"item item_new item-new\"" }, StringSplitOptions.RemoveEmptyEntries);
textBox2.Text = "共"+values.Length+"条";
if (values.Length > 0)
{
List<InfoModel> list = new List<InfoModel>();
for (int k = 1; k < values.Length; k++)
{
string insertValues1 = string.Empty;
insertValues1 = values[k].ToString();
InfoModel model = new InfoModel();
string[] units = insertValues1.Split(new string[] { "<a" }, StringSplitOptions.RemoveEmptyEntries);
model.InfoID = getPatternValue(units[1], @"infoid='([0-9]*)");
model.LinkMan = getPatternValue(units[1], @"[\u4e00-\u9fa5]+");
model.InfoTitle = getPatternValue(units[2], @"<strong (.*)>[\s\S]*<\/strong>");
model.Href = getPatternValue(units[2], @"href=([""'])?(?<href>[^'""]+)\1[^>]*>");
model.Area = getPatternValue(units[2], "<span class=\"info-serve-txt\">[\\S]*?<\\/span>");
model.Remark = units[2];
if (units.Length > 0)
{
list.Add(model);
}
}
return list;
}
}
return null;
}
public static string getPatternValue(string content, string pattern)
{
string result = "";
Regex reg = new Regex(pattern, RegexOptions.IgnoreCase | RegexOptions.Multiline);
MatchCollection mc = reg.Matches(content);
int i = 0;
foreach (Match m in mc)
{
if (i > 0)
{
result = m.Groups[i].Value.Trim();
result += Regex.Replace(Regex.Replace(Regex.Replace(Regex.Replace(Regex.Replace(Regex.Replace(result, "</span>", ""), "<span.*?>", ""), "<b.*?>", ""), "</b>", ""), "<strong.*?>", ""), "</strong>", "").Replace("infoid='","").Replace("\r\n", "");
}
else
{
result = m.Groups[0].Value.Trim();
result = Regex.Replace(Regex.Replace(Regex.Replace(Regex.Replace(Regex.Replace(Regex.Replace(result, "</span>", ""), "<span.*?>", ""), "<b.*?>", ""), "</b>", ""), "<strong.*?>", ""), "</strong>", "").Replace("infoid='","").Replace("\r\n", "");
}
i++;
}
return result.Trim();
}
public static string GetRegValue(string content, string pattern, string groupName)
{
string result = "";
if (pattern.IndexOf(groupName) > 0)
{
Regex reg = new Regex(pattern, RegexOptions.Multiline | RegexOptions.IgnoreCase);
MatchCollection mc = reg.Matches(content);
int i = 0;
foreach (Match m in mc)
{
if (i > 0)
result += "," + Regex.Replace(Regex.Replace(Regex.Replace(Regex.Replace(Regex.Replace(Regex.Replace(Regex.Replace(Regex.Replace(Regex.Replace(m.Groups[groupName].Value.Trim(), "<a.*?>", ""), "</a>", ""), "<span.*?>", ""), "</span>", ""), " ", ""), "<font.*?>", ""), "</font>", ""), "<strong.*?>", ""), "</strong>", "");
else
result = Regex.Replace(Regex.Replace(Regex.Replace(Regex.Replace(Regex.Replace(Regex.Replace(Regex.Replace(Regex.Replace(Regex.Replace(m.Groups[groupName].Value.Trim(), "<a.*?>", ""), "</a>", ""), "<span.*?>", ""), "</span>", ""), " ", ""), "<font.*?>", ""), "</font>", ""), "<strong.*?>", ""), "</strong>", "");
i++;
}
}
return result;
}
}
}