Skip to content

Commit 9eef07c

Browse files
committed
fix data collector
1 parent 114bf97 commit 9eef07c

11 files changed

Lines changed: 8202 additions & 8131 deletions

File tree

spring-security-oauth/pom.xml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -185,6 +185,12 @@
185185
<artifactId>guava</artifactId>
186186
<version>${guava.version}</version>
187187
</dependency>
188+
189+
<dependency>
190+
<groupId>org.togglz</groupId>
191+
<artifactId>togglz-spring</artifactId>
192+
<version>2.1.0.Final</version>
193+
</dependency>
188194

189195
<!-- logging -->
190196

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
package org.baeldung.config;
2+
3+
import java.io.IOException;
4+
5+
import org.baeldung.reddit.util.MyFeatures;
6+
import org.springframework.context.annotation.Configuration;
7+
import org.springframework.core.io.ClassPathResource;
8+
import org.togglz.core.Feature;
9+
import org.togglz.core.manager.TogglzConfig;
10+
import org.togglz.core.repository.StateRepository;
11+
import org.togglz.core.repository.file.FileBasedStateRepository;
12+
import org.togglz.core.user.UserProvider;
13+
14+
@Configuration
15+
public class FeatureToggleConfig implements TogglzConfig {
16+
17+
@Override
18+
public Class<? extends Feature> getFeatureClass() {
19+
return MyFeatures.class;
20+
}
21+
22+
@Override
23+
public StateRepository getStateRepository() {
24+
try {
25+
return new FileBasedStateRepository(new ClassPathResource("features.properties").getFile());
26+
} catch (final IOException e) {
27+
e.printStackTrace();
28+
return null;
29+
}
30+
}
31+
32+
@Override
33+
public UserProvider getUserProvider() {
34+
return null;
35+
}
36+
37+
}

spring-security-oauth/src/main/java/org/baeldung/config/ServletInitializer.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ public class ServletInitializer extends AbstractDispatcherServletInitializer {
1313
@Override
1414
protected WebApplicationContext createServletApplicationContext() {
1515
final AnnotationConfigWebApplicationContext context = new AnnotationConfigWebApplicationContext();
16-
context.register(PersistenceJPAConfig.class, WebConfig.class, SecurityConfig.class);
16+
context.register(PersistenceJPAConfig.class, WebConfig.class, SecurityConfig.class, FeatureToggleConfig.class);
1717
return context;
1818
}
1919

@@ -34,7 +34,6 @@ public void onStartup(ServletContext servletContext) throws ServletException {
3434
servletContext.addListener(new SessionListener());
3535
registerProxyFilter(servletContext, "oauth2ClientContextFilter");
3636
registerProxyFilter(servletContext, "springSecurityFilterChain");
37-
3837
}
3938

4039
private void registerProxyFilter(ServletContext servletContext, String name) {

spring-security-oauth/src/main/java/org/baeldung/reddit/classifier/RedditClassifier.java

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
import java.io.BufferedReader;
44
import java.io.FileReader;
55
import java.io.IOException;
6+
import java.util.Calendar;
7+
import java.util.TimeZone;
68

79
import org.apache.mahout.classifier.sgd.L2;
810
import org.apache.mahout.classifier.sgd.OnlineLogisticRegression;
@@ -18,6 +20,7 @@ public class RedditClassifier {
1820

1921
public static int GOOD = 0;
2022
public static int BAD = 1;
23+
public static int MIN_SCORE = 5;
2124
private final OnlineLogisticRegression classifier;
2225
private final FeatureVectorEncoder titleEncoder;
2326
private final FeatureVectorEncoder domainEncoder;
@@ -44,7 +47,7 @@ public void trainClassifier(String fileName) throws IOException {
4447
}
4548

4649
while ((line != null) && (line != "")) {
47-
category = (line.startsWith("good")) ? GOOD : BAD;
50+
category = extractCategory(line);
4851
trainCount[category]++;
4952
features = convertLineToVector(line);
5053
classifier.train(category, features);
@@ -76,7 +79,7 @@ public double evaluateClassifier() throws IOException {
7679
Vector features;
7780
String line = reader.readLine();
7881
while ((line != null) && (line != "")) {
79-
category = (line.startsWith("good")) ? GOOD : BAD;
82+
category = extractCategory(line);
8083
evalCount[category]++;
8184
features = convertLineToVector(line);
8285
result = classify(features);
@@ -94,12 +97,21 @@ public double evaluateClassifier() throws IOException {
9497
}
9598

9699
// ==== private
100+
private int extractCategory(String line) {
101+
final int score = Integer.parseInt(line.substring(0, line.indexOf(';')));
102+
return (score < MIN_SCORE) ? BAD : GOOD;
103+
}
104+
97105
private Vector convertLineToVector(String line) {
98106
final Vector features = new RandomAccessSparseVector(4);
99107
final String[] items = line.split(";");
108+
final Calendar cal = Calendar.getInstance(TimeZone.getTimeZone("GMT"));
109+
cal.setTimeInMillis(Long.parseLong(items[1]) * 1000);
110+
final int hour = cal.get(Calendar.HOUR_OF_DAY);
111+
100112
titleEncoder.addToVector(items[3], features);
101113
domainEncoder.addToVector(items[4], features);
102-
features.set(2, Integer.parseInt(items[1])); // hour of day
114+
features.set(2, hour); // hour of day
103115
features.set(3, Integer.parseInt(items[2])); // number of words in the title
104116
return features;
105117
}

spring-security-oauth/src/main/java/org/baeldung/reddit/classifier/RedditDataCollector.java

Lines changed: 16 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,7 @@
22

33
import java.io.FileWriter;
44
import java.io.IOException;
5-
import java.text.SimpleDateFormat;
65
import java.util.ArrayList;
7-
import java.util.Date;
86
import java.util.List;
97

108
import org.baeldung.reddit.util.UserAgentInterceptor;
@@ -20,20 +18,20 @@
2018
public class RedditDataCollector {
2119
public static final String TRAINING_FILE = "src/main/resources/train.csv";
2220
public static final String TEST_FILE = "src/main/resources/test.csv";
21+
public static final int LIMIT = 100;
22+
public static final Long YEAR = 31536000L;
2323
private final Logger logger = LoggerFactory.getLogger(getClass());
2424

25-
private String postAfter;
25+
private Long timestamp;
2626
private final RestTemplate restTemplate;
2727
private final String subreddit;
28-
private final int minScore;
2928

3029
public RedditDataCollector() {
3130
restTemplate = new RestTemplate();
3231
final List<ClientHttpRequestInterceptor> list = new ArrayList<ClientHttpRequestInterceptor>();
3332
list.add(new UserAgentInterceptor());
3433
restTemplate.setInterceptors(list);
35-
subreddit = "all";
36-
minScore = 4;
34+
subreddit = "java";
3735
}
3836

3937
public RedditDataCollector(String subreddit, int minScore) {
@@ -42,35 +40,30 @@ public RedditDataCollector(String subreddit, int minScore) {
4240
list.add(new UserAgentInterceptor());
4341
restTemplate.setInterceptors(list);
4442
this.subreddit = subreddit;
45-
this.minScore = minScore;
4643
}
4744

4845
public void collectData() {
49-
final int limit = 100;
5046
final int noOfRounds = 80;
47+
timestamp = System.currentTimeMillis() / 1000;
5148
try {
5249
final FileWriter writer = new FileWriter(TRAINING_FILE);
5350
for (int i = 0; i < noOfRounds; i++) {
54-
getPosts(limit, writer);
51+
getPosts(writer);
5552
}
5653
writer.close();
5754

5855
final FileWriter testWriter = new FileWriter(TEST_FILE);
59-
getPosts(limit, testWriter);
56+
getPosts(testWriter);
6057
testWriter.close();
6158
} catch (final Exception e) {
6259
logger.error("write to file error", e);
6360
}
6461
}
6562

66-
// ==== private
67-
68-
private void getPosts(int limit, FileWriter writer) {
69-
String fullUrl = "http://www.reddit.com/r/" + subreddit + "/new.json?limit=" + limit;
70-
if (postAfter != null) {
71-
fullUrl += "&count=" + limit + "&after=" + postAfter;
72-
}
63+
// ==== Private
7364

65+
private void getPosts(FileWriter writer) {
66+
final String fullUrl = "http://www.reddit.com/r/" + subreddit + "/search.json?sort=new&q=timestamp:" + (timestamp - YEAR) + ".." + timestamp + "&restrict_sr=on&syntax=cloudsearch&limit=" + LIMIT;
7467
try {
7568
final JsonNode node = restTemplate.getForObject(fullUrl, JsonNode.class);
7669
parseNode(node, writer);
@@ -82,22 +75,19 @@ private void getPosts(int limit, FileWriter writer) {
8275
}
8376

8477
private void parseNode(JsonNode node, FileWriter writer) throws IOException {
85-
postAfter = node.get("data").get("after").asText();
86-
System.out.println(postAfter);
8778
String line;
88-
String category;
8979
List<String> words;
90-
final SimpleDateFormat df = new SimpleDateFormat("HH");
80+
int score;
9181
for (final JsonNode child : node.get("data").get("children")) {
92-
category = (child.get("data").get("score").asInt() < minScore) ? "bad" : "good";
82+
score = child.get("data").get("score").asInt();
9383
words = Splitter.onPattern("\\W").omitEmptyStrings().splitToList(child.get("data").get("title").asText());
94-
final Date date = new Date(child.get("data").get("created_utc").asLong() * 1000);
84+
timestamp = child.get("data").get("created_utc").asLong();
9585

96-
line = category + ";";
97-
line += df.format(date) + ";";
86+
line = score + ";";
87+
line += timestamp + ";";
9888
line += words.size() + ";" + Joiner.on(' ').join(words) + ";";
9989
line += child.get("data").get("domain").asText() + "\n";
100-
90+
System.out.println(line);
10191
writer.write(line);
10292
}
10393
}
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
package org.baeldung.reddit.util;
2+
3+
import org.togglz.core.Feature;
4+
import org.togglz.core.annotation.EnabledByDefault;
5+
import org.togglz.core.annotation.Label;
6+
import org.togglz.core.context.FeatureContext;
7+
8+
public enum MyFeatures implements Feature {
9+
10+
@EnabledByDefault
11+
@Label("Prediction feature")
12+
PREDICTION_FEATURE;
13+
14+
public boolean isActive() {
15+
return FeatureContext.getFeatureManager().isActive(this);
16+
}
17+
18+
}
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
PREDICTION_FEATURE=false

0 commit comments

Comments
 (0)