並åå¦çã§Webã³ã³ãã³ãããã¦ã³ãã¼ãããæ¹æ³ - Groovy, Scala, C#, Java, Ruby
è¤æ°ã®Webã³ã³ãã³ãï¼HTMLãç»åãªã©ï¼ããã¦ã³ãã¼ãããéã« 1件ãã¤å¦çãã¦ããã®ã§ã¯éå¹çã§ãã
ã¨ããããã§ã並åçã«Webã³ã³ãã³ãããã¦ã³ãã¼ãããããã°ã©ã ã Groovy, Scala, C#, Java, Ruby ã§å®è£
ãã¦ã¿ã¾ããã
主ãªä»æ§ã¯ä»¥ä¸ã§ãå¤é¨ã©ã¤ãã©ãªã使ç¨ããã«å®è£ ãã¾ããã
- å®è¡æã®ç¬¬1å¼æ°ã§åºåå ãã£ã¬ã¯ããªãæå®
- ãã¦ã³ãã¼ã対象㮠URL ãæ¨æºå ¥åã§æå®ï¼æ¹è¡åºåãã§è¤æ°æå®ï¼
- URL å ã®ãã¡ã¤ã«åãåºåãã¡ã¤ã«åã¨ãã¦ä½¿ç¨
å®è¡ä¾
groovy download_web.groovy destdir < urls.txt
ãµã³ãã«ã½ã¼ã¹ã¯ http://github.com/fits/try_samples/tree/master/blog/20110925/
Groovy ã®å ´å
Groovy 1.8 ã§ã¯ GPars ãå梱ããã¦ããã®ã§ãGPars ã«ãã並åã³ã¬ã¯ã·ã§ã³ã使ãã°ç°¡åã«å®è£
ã§ãã¾ãã
GParsExecutorsPool.withPool ã«æ¸¡ããã¯ãã¼ã¸ã£å
ã®ã³ã¬ã¯ã·ã§ã³ã§ä¸¦åå¦çç¨ã®ã¡ã½ããï¼ä¸è¨ã® eachParallelï¼ã使ããããã«ãªãã¾ãã
- Groovy 1.8.2
download_web.groovy
import groovyx.gpars.GParsExecutorsPool def dir = args[0] GParsExecutorsPool.withPool { //並åæ°ãåºå®åãããªã以ä¸ã®ããã«ãã //GParsExecutorsPool.withPool(5) { System.in.readLines() eachParallel {u -> def url = new URL(u) try { def file = new File(dir, new File(url.file).name) url.withInputStream {input -> file.bytes = input.bytes } println "downloaded: $url => $file" } catch (e) { println "failed: $url, $e" } } }
Scala ã®å ´å
Scala 2.9 ã§ã¯ä¸¦åã³ã¬ã¯ã·ã§ã³ã使ãã¾ãã
ã³ã¬ã¯ã·ã§ã³ã«å¯¾ã㦠par ã¡ã½ãããå¼ã³åºãã¨ä¸¦åã³ã¬ã¯ã·ã§ã³åãããå¾ã¯ foreach çãå®è¡ããã°ä¸¦åã«å¦çããã¾ãã
ãã ããããã©ã«ãã§ã¯ JVM ã使ç¨ã§ããããã»ããµæ°â»ã¾ã§ãã並ååãããªããããªã®ã§ãä»åã®ãããªç¨éã§ã¯ä¸¦åæ°ãå°ãªãããããã¾ããã
â» scala.collection.parallel.availableProcessors ã§æ°å¤ãåç §å¯ å®éã«ã¯ java.lang.Runtime.getRuntime().availableProcessors() ã®å¤ãè¨å®ããã¦ãã
ãªãããã¡ã¤ã«ä¿åå¦çãç°¡åã«å®è£ ãããããJavaSE 7 ã§å°å ¥ããã java.nio.file.Files ã¯ã©ã¹çã使ç¨ãã¦ãã¾ãã
- Scala 2.9.1ï¼JavaSE 7 ä¾åï¼
download_web_scala
import scala.io.Source import java.io.File import java.net.URL import java.nio.file.{Paths, Files} import java.nio.file.StandardCopyOption._ val dir = args(0) val using = (st: InputStream) => (block: InputStream => Unit) => try {block(st)} finally {st.close()} Source.stdin.getLines.toArray.par.foreach {u => val url = new URL(u) val filePath = Paths.get(dir, new File(url.getFile()).getName()) try { using (url.openStream()) {stream => Files.copy(stream, filePath, REPLACE_EXISTING) } printf("downloaded: %s => %s\n", url, filePath) } catch { case e: Exception => printf("failed: %s, %s\n", url, e) } }
C# ã®å ´å
.NET Framework 4 ã§ã¯ä¸¦åã¿ã¹ã¯ã使ãã¾ãã
Parallel.ForEach ã«ã³ã¬ã¯ã·ã§ã³ã¨ãã®å¦çå
容ã渡ãã°ä¸¦ååããã¾ãã
DownloadWeb.cs
using System; using System.IO; using System.Net; using System.Threading.Tasks; public class DownloadWeb { public static void Main(string[] args) { var urls = Console.In.ReadToEnd().Split(new string[]{Environment.NewLine}, StringSplitOptions.RemoveEmptyEntries); var dir = args[0]; Parallel.ForEach(urls, (u) => { var url = new Uri(u); try { var filePath = Path.Combine(dir, Path.GetFileName(url.LocalPath)); new WebClient().DownloadFile(url, filePath); Console.WriteLine("downloaded: {0} => {1}", url, filePath); } catch (Exception e) { Console.WriteLine("failed: {0}, {1}", url, e); } }); } }
Java ã®å ´å
Java ã®å ´åãä»ã®ã¨ãã並åã³ã¬ã¯ã·ã§ã³çã®ä»çµã¿ãç¨æããã¦ããªããããªã®ã§ Concurrency Utilities ã使ã£ã¦å®è£
ãã¾ããã
ãã¡ã¤ã«ã®ä¿åå¦çã«ã¯ JavaSE 7 ã§å°å
¥ããã java.nio.file.Files ã¯ã©ã¹çã使ç¨ãã¦ãã¾ãã
ä¸è¨ã§ã¯ URL ã¯ã©ã¹ã®ä»£ããã« URI ã使ã£ã¦ãã¾ãããç¹ã«æ·±ãçç±ã¯ç¡ã Scala ã®ãµã³ãã«ã¨åæ§ã« URL ã¯ã©ã¹ã使ã£ã¦ãåé¡ããã¾ããã
ãªããPaths.get() ã®å¼æ°ã« URI ã渡ãã¾ãããç¾ãã¼ã¸ã§ã³ã§ã¯ "http://ã»ã»ã»" ããä½æãã URI ã渡ãäºã¯ã§ãã¾ããã§ãããï¼java.nio.file.FileSystemNotFoundException: Provider "http" not installed ã¨ãªãï¼
- JavaSE 7
DownloadWeb.java
import java.io.*; import java.net.URI; import java.util.concurrent.Executors; import java.util.concurrent.ExecutorService; import java.nio.file.Path; import java.nio.file.Paths; import java.nio.file.Files; import java.nio.file.StandardCopyOption; public class DownloadWeb { public static void main(String[] args) throws Exception { BufferedReader reader = new BufferedReader(new InputStreamReader(System.in)); ExecutorService exec = Executors.newCachedThreadPool(); //並åæ°ãåºå®åãããªã以ä¸ã®ããã«ãã //ExecutorService exec = Executors.newFixedThreadPool(5); final String dir = args[0]; String url = null; while ((url = reader.readLine()) != null) { final URI uri = URI.create(url); final Path filePath = Paths.get(dir, new File(uri.getPath()).getName()); exec.submit(new Runnable() { @Override public void run() { try (InputStream in = uri.toURL().openStream()) { Files.copy(in, filePath, StandardCopyOption.REPLACE_EXISTING); System.out.printf("downloaded: %s => %s\n", uri, filePath); } catch (Exception e) { System.out.printf("failed: %s, %s\n", uri, e); } } }); } //ãã¦ã³ãã¼ãçµäºã¾ã§å¾ æ© exec.shutdown(); } }
Ruby ã®å ´å
Ruby ã®å ´åãä»ã®ã¨ãã並åã³ã¬ã¯ã·ã§ã³çã®ä»çµã¿ãç¨æããã¦ããªãã¿ãããªã®ã§ Queue 㨠Thread ã使ã£ã¦å®è£ ãã¦ã¿ã¾ããã
ã¹ã¬ããæ°ãåºå®åãã¦ããã®ã§ãJava 㧠Executors.newFixedThreadPool(æ°å¤) ã使ã£ãã±ã¼ã¹ã Groovy 㧠GParsExecutorsPool.withPool(æ°å¤) {ã»ã»ã»} ã使ã£ãã±ã¼ã¹ã¨åæ§ã®å¦çã«ãªãã¨æãã¾ãã
download_web.rb
require "thread" require "uri" require "net/http" #並åæ°ï¼ã¹ã¬ããæ°ï¼ poolSize = 5 dir = ARGV[0] q = Queue.new #ãã¥ã¼ã« URL ãè¨å® $stdin.readlines.each {|l| q.push(l.chomp)} threads = [] poolSize.times do threads << Thread.start(q) do |tq| #ãã¥ã¼ã空ã«ãªãã¾ã§ã«ã¼ã while not q.empty? #ãã¥ã¼ãã URL åãåºã u = q.pop(true) begin url = URI.parse(u) filePath = File.join(dir, File.basename(url.path)) res = Net::HTTP.get_response(url) open(filePath, 'wb') {|f| f.puts res.body} puts "downloaded: #{url} => #{filePath}" rescue => e puts "failed: #{url}, #{e}" end end end end #ãã¦ã³ãã¼ãçµäºã¾ã§å¾ æ© threads.each {|t| t.join}