Skip to content

Commit

Permalink
restruct prepare start url
Browse files Browse the repository at this point in the history
  • Loading branch information
zlzforever committed May 24, 2016
1 parent 6a06f3a commit 3a3b025
Show file tree
Hide file tree
Showing 3 changed files with 135 additions and 73 deletions.
7 changes: 6 additions & 1 deletion src/Java2Dotnet.Spider.Common/EmailClient.cs
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,7 @@ public void SendMail(EmaillMessage m)
}
catch (Exception e)
{
Console.WriteLine(e);
}
}
}
Expand Down Expand Up @@ -190,7 +191,11 @@ private string Receive()
responseData = Encoding.UTF8.GetString(data, 0, bytes);
return responseData;
}
catch { return null; }
catch (Exception e)
{
Console.WriteLine(e);
return null;
}
}

private void Send(string s)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -287,13 +287,33 @@ private List<PrepareStartUrls> GetPrepareStartUrls(List<JObject> jobjects)
list.Add(jobject.ToObject<LinkSpiderPrepareStartUrls>());
break;
}
case Configuration.PrepareStartUrls.Types.Base:
{
var generalDbPrepareStartUrls = new BaseDbPrepareStartUrls();
foreach (var column in jobject.SelectTokens("$.Columns[*]"))
{
var c = new ConfigurableDbPrepareStartUrls.Column()
{
Name = column.SelectToken("$.Name").ToString()
};
foreach (var format in column.SelectTokens("$.Formatters[*]"))
{
var name = format.SelectToken("$.Name").ToString();
var formatterType = FormatterFactory.GetFormatterType(name);
c.Formatters.Add((Formatter)format.ToObject(formatterType));
}
generalDbPrepareStartUrls.Columns.Add(c);
}
list.Add(generalDbPrepareStartUrls);
break;
}
}
}

return list;
}

private void SetDbPrepareStartUrls(AbstractDbPrepareStartUrls generalDbPrepareStartUrls, JObject jobject)
private void SetDbPrepareStartUrls(ConfigurableDbPrepareStartUrls generalDbPrepareStartUrls, JObject jobject)
{
generalDbPrepareStartUrls.ConnectString = jobject.SelectToken("$.ConnectString")?.ToString();
generalDbPrepareStartUrls.Filters = jobject.SelectToken("$.Filters")?.ToObject<List<string>>();
Expand All @@ -304,7 +324,7 @@ private void SetDbPrepareStartUrls(AbstractDbPrepareStartUrls generalDbPrepareSt
generalDbPrepareStartUrls.Source = jobject.SelectToken("$.Source").ToObject<DataSource>();
foreach (var column in jobject.SelectTokens("$.Columns[*]"))
{
var c = new AbstractDbPrepareStartUrls.Column()
var c = new ConfigurableDbPrepareStartUrls.Column()
{
Name = column.SelectToken("$.Name").ToString()
};
Expand Down
177 changes: 107 additions & 70 deletions src/Java2Dotnet.Spider.Extension/Configuration/PrepareStartUrls.cs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
using System.Text.RegularExpressions;
using System.Data.SqlClient;
using System.Data.Common;
using static Java2Dotnet.Spider.Extension.Configuration.BaseDbPrepareStartUrls;
using Newtonsoft.Json;
#if !NET_CORE
using System.Web;
#else
Expand Down Expand Up @@ -50,6 +52,7 @@ public abstract class PrepareStartUrls
[Flags]
public enum Types
{
Base,
CommonDb,
GeneralDb,
DbList,
Expand All @@ -67,9 +70,9 @@ public enum Types

public Dictionary<string, object> Extras { get; set; }

public abstract Types Type { get; internal set; }

public abstract void Build(Site site, dynamic obj);

public abstract Types Type { get; internal set; }
}

public class CyclePrepareStartUrls : PrepareStartUrls
Expand Down Expand Up @@ -108,7 +111,7 @@ public override void Build(Site site, dynamic obj)
}
}

public abstract class AbstractDbPrepareStartUrls : PrepareStartUrls
public class BaseDbPrepareStartUrls : PrepareStartUrls
{
public class Column
{
Expand All @@ -117,94 +120,39 @@ public class Column
public List<Formatter> Formatters { get; set; } = new List<Formatter>();
}

public override Types Type { get; internal set; }
public override Types Type { get; internal set; } = Types.Base;

public DataSource Source { get; set; } = DataSource.MySql;

public string ConnectString { get; set; }

public string GroupBy { get; set; }

public string OrderBy { get; set; }

/// <summary>
/// 数据来源表名, 需要Schema/数据库名
/// </summary>
public string TableName { get; set; }

/// <summary>
/// 对表的筛选
/// 如: cdate='2016-03-01', isUsed=true
/// </summary>
public List<string> Filters { get; set; }
public string QueryString { get; set; }

/// <summary>
/// 用于拼接Url所需要的列
/// </summary>
public List<Column> Columns { get; set; } = new List<Column>();

public int Limit { get; set; }

/// <summary>
/// 拼接Url的方式, 会把Columns对应列的数据传入
/// https://s.taobao.com/search?q={0},s=0;
/// </summary>
public List<string> FormateStrings { get; set; }

protected string GetSelectQueryString()
{
switch (Source)
{
case DataSource.MySql:
{
StringBuilder builder = new StringBuilder($"SELECT * FROM {TableName}");
if (Filters != null && Filters.Count > 0)
{
builder.Append(" WHERE " + Filters.First());
if (Filters.Count > 1)
{
for (int i = 1; i < Filters.Count; ++i)
{
builder.Append(" AND " + Filters[i]);
}
}
}

if (!string.IsNullOrEmpty(GroupBy))
{
builder.Append($" {GroupBy} ");
}

if (!string.IsNullOrEmpty(OrderBy))
{
builder.Append($" {OrderBy} ");
}

if (Limit > 0)
{
builder.Append($" LIMIT {Limit} ");
}


return builder.ToString();
}
}
throw new SpiderExceptoin($"Unsport Source: {Source}");
}

protected List<Dictionary<string, object>> PrepareDatas()
{
List<Dictionary<string, object>> list = new List<Dictionary<string, object>>();
using (var conn = DataSourceUtil.GetConnection(Source, ConnectString))
{
string sql = GetSelectQueryString();
string sql = QueryString;
conn.Open();
var command = conn.CreateCommand();
command.CommandText = sql;
command.CommandTimeout = 60000;
command.CommandType = CommandType.Text;

var reader = command.ExecuteReader();

while (reader.Read())
{
Dictionary<string, object> data = new Dictionary<string, object>();
Expand Down Expand Up @@ -248,9 +196,100 @@ protected List<string> PrepareArguments(Dictionary<string, object> data)
}
return arguments;
}

protected virtual void BuildQueryString()
{
}

public override void Build(Site site, dynamic obj)
{
var datas = PrepareDatas();
foreach (var data in datas)
{
var arguments = PrepareArguments(data);

foreach (var formate in FormateStrings)
{
string tmpUrl = string.Format(formate, arguments.Cast<object>().ToArray());
site.AddStartRequest(new Request(tmpUrl, 0, data)
{
Method = Method,
Origin = Origin,
PostBody = DbPrepareStartUrls.GetPostBody(PostBody, data),
Referer = Referer
});
}
}
}
}

public class ConfigurableDbPrepareStartUrls : BaseDbPrepareStartUrls
{
/// <summary>
/// 数据来源表名, 需要Schema/数据库名
/// </summary>
public string TableName { get; set; }

/// <summary>
/// 对表的筛选
/// 如: cdate='2016-03-01', isUsed=true
/// </summary>
public List<string> Filters { get; set; }

public int Limit { get; set; }

public string GroupBy { get; set; }

public string OrderBy { get; set; }

protected override void BuildQueryString()
{
switch (Source)
{
case DataSource.MySql:
{
StringBuilder builder = new StringBuilder($"SELECT * FROM {TableName}");
if (Filters != null && Filters.Count > 0)
{
builder.Append(" WHERE " + Filters.First());
if (Filters.Count > 1)
{
for (int i = 1; i < Filters.Count; ++i)
{
builder.Append(" AND " + Filters[i]);
}
}
}

if (!string.IsNullOrEmpty(GroupBy))
{
builder.Append($" {GroupBy} ");
}

if (!string.IsNullOrEmpty(OrderBy))
{
builder.Append($" {OrderBy} ");
}

if (Limit > 0)
{
builder.Append($" LIMIT {Limit} ");
}

QueryString = builder.ToString();
return;
}
}

throw new SpiderExceptoin($"Unsport Source: {Source}");
}

public override void Build(Site site, dynamic obj)
{
}
}

public class DbCommonPrepareStartUrls : AbstractDbPrepareStartUrls
public class DbCommonPrepareStartUrls : ConfigurableDbPrepareStartUrls
{
public override Types Type { get; internal set; } = Types.CommonDb;

Expand Down Expand Up @@ -303,7 +342,7 @@ private string GetPostBody(string postBody, Dictionary<string, object> data, int
}
}

public class DbPrepareStartUrls : AbstractDbPrepareStartUrls
public class DbPrepareStartUrls : ConfigurableDbPrepareStartUrls
{
public override Types Type { get; internal set; } = Types.GeneralDb;

Expand Down Expand Up @@ -356,9 +395,9 @@ public static string GetPostBody(string postBody, Dictionary<string, object> dat
}
}

public class DbListPrepareStartUrls : AbstractDbPrepareStartUrls
public class DbListPrepareStartUrls : ConfigurableDbPrepareStartUrls
{
public override Types Type { get; internal set; } = Types.GeneralDb;
public override Types Type { get; internal set; } = Types.DbList;

public int Interval { get; set; }
public string ColumnSeparator { get; set; }
Expand Down Expand Up @@ -430,7 +469,7 @@ public class LinkSpiderPrepareStartUrls : PrepareStartUrls
/// <summary>
/// 用于拼接Url所需要的列
/// </summary>
public List<AbstractDbPrepareStartUrls.Column> Columns { get; set; } = new List<AbstractDbPrepareStartUrls.Column>();
public List<Column> Columns { get; set; } = new List<Column>();

/// <summary>
/// 拼接Url的方式, 会把Columns对应列的数据传入
Expand Down Expand Up @@ -480,6 +519,4 @@ public override void Build(Site site, dynamic obj)
}
}
}


}

0 comments on commit 3a3b025

Please sign in to comment.