Generative AI in Computer Vision you own this product

'); $(document.body).append('
loading reading lists ...
'); function adjustReadingListIcon(isInReadingList){ $readingListToggle.toggleClass("fa-plus", !isInReadingList); $readingListToggle.toggleClass("fa-check", isInReadingList); var tooltipMessage = isInReadingList ? "edit in reading lists" : "add to reading list"; $readingListToggle.attr("title", tooltipMessage); $readingListToggle.attr("data-original-title", tooltipMessage); } $.ajax({ url: "/readingList/isInReadingList", data: { productId: 3607 } }).done(function (data) { adjustReadingListIcon(data && data.hasProductInReadingList); }).catch(function(e){ console.log(e); adjustReadingListIcon(false); }); $readingListToggle.on("click", function(){ if(codePromise == null){ showToast() } loadCode().then(function(store){ store.requestReadingListSpecificationForProduct({ id: window.readingListsServerVars.externalId, manningId: window.readingListsServerVars.productId, title: window.readingListsServerVars.title }); ReadingLists.ReactDOM.render( ReadingLists.React.createElement(ReadingLists.ManningOnlineReadingListModal, { store: store, }), document.getElementById("reading-lists-modal") ); }).catch(function(e){ console.log("Error loading code reading list code"); }); }); var codePromise var readingListStore function loadCode(){ if(codePromise) { return codePromise } return codePromise = new Promise(function (resolve, reject){ $.getScript(window.readingListsServerVars.libraryLocation).done(function(){ hideToast() readingListStore = new ReadingLists.ReadingListStore( new ReadingLists.ReadingListProvider( new ReadingLists.ReadingListWebProvider( ReadingLists.SourceApp.marketplace, getDeploymentType() ) ) ); readingListStore.onReadingListChange(handleChange); readingListStore.onReadingListModalChange(handleChange); resolve(readingListStore); }).catch(function(){ hideToast(); console.log("Error downloading reading lists source"); $readingListToggle.css("display", "none"); reject(); }); }); } function handleChange(){ if(readingListStore != null) { adjustReadingListIcon(readingListStore.isInAtLeastOneReadingList({ id: window.readingListsServerVars.externalId, manningId: window.readingListsServerVars.productId })); } } var $readingListToast = $("#reading-list-toast"); function showToast(){ $readingListToast.css("display", "flex"); setTimeout(function(){ $readingListToast.addClass("shown"); }, 16); } function hideToast(){ $readingListToast.removeClass("shown"); setTimeout(function(){ $readingListToast.css("display", "none"); }, 150); } function getDeploymentType(){ switch(window.readingListsServerVars.deploymentType){ case "development": case "test": return ReadingLists.DeploymentType.dev; case "qa": return ReadingLists.DeploymentType.qa; case "production": return ReadingLists.DeploymentType.prod; case "docker": return ReadingLists.DeploymentType.docker; default: console.error("Unknown deployment environment, defaulting to production"); return ReadingLists.DeploymentType.prod; } } }); } });
GANs, diffusion models, and transformers
Vladimir Bok
  • MEAP began September 2024
  • Publication in Summer 2025 (estimated)
  • ISBN 9781633437449
  • 350 pages (estimated)
  • printed in black & white

pro $24.99 per month

  • access to all Manning books, MEAPs, liveVideos, liveProjects, and audiobooks!
  • choose one free eBook per month to keep
  • exclusive 50% discount on all purchases

lite $19.99 per month

  • access to all Manning books, including MEAPs!

team

5, 10 or 20 seats+ for your team - learn more


Look inside
Master the essential models, algorithms, tools, and techniques for interpreting and generating images using AI.

From digital special effects to medical image augmentation and analysis, generative AI is revolutionizing the way we create and interpret visual information. Innovations including diffusion models, text-to-image generators, GANs, and more help you create photorealistic graphics, empower creative expression with text-to-image tools, and accurately recognize and describe visual elements for applications like image search. Generative AI in Computer Vision will teach you the foundations of modern computer vision and equip you with the practical techniques you need to bring your ideas to life.

In Generative AI in Computer Vision you’ll learn about:

  • Variational autoencoders (VAEs) and generative adversarial networks (GANs)
  • Diffusion models for high-quality image generation
  • Evaluating models with metrics such as inception score and Fréchet inception distance
  • Conditional and guided generation techniques
  • Bridging language and vision using transformers and models like CLIP
  • Implementing text-to-image models

Generative AI in Computer Vision guides you from core concepts of digital image creation to the cutting edge of AI-powered visual computing. You’ll unpack tools like DALL-E and Stable Diffusion and learn to build your own by following the detailed code samples and practical tutorials.

about the book

Generative AI in Computer Vision explores the inner workings of the generative AI models behind modern computer vision. You’ll start by developing a simple autoencoder and extending it into a variational autoencoder for image generation. Next, you’ll dive deep into GANs and discover how to upgrade their performance with next-generation techniques like Wasserstein GAN. Create your own denoising diffusion probabilistic models that can generate original imagery, and even implement a simplified text-to-image model! Plus, you’ll explore hybrid models that benefit from the strengths of multiple approaches, and even video-based generative AI. Throughout, real-world case studies demonstrate how these models can be put into action.

about the reader

For AI enthusiasts, developers, and data scientists familiar with machine learning basics and Python programming.

about the author

Vladimir Bok is a founding Applied AI Researcher at Mirage Security, a VC-backed startup developing AI simulations for enterprise security training and awareness. Prior to Mirage, Vladimir led AI/ML initiatives at tech industry giants including Meta and Microsoft, as well as various startups in ad tech, fintech, and biotech. He is a coauthor of GANs in Action. Vladimir holds a Computer Science degree from Harvard University.

choose your plan

team

monthly
annual
$49.99
$399.99
only $33.33 per month
  • five seats for your team
  • access to all Manning books, MEAPs, liveVideos, liveProjects, and audiobooks!
  • choose another free product every time you renew
  • choose twelve free products per year
  • exclusive 50% discount on all purchases
  • Generative AI in Computer Vision ebook for free

choose your plan

team

monthly
annual
$49.99
$399.99
only $33.33 per month
  • five seats for your team
  • access to all Manning books, MEAPs, liveVideos, liveProjects, and audiobooks!
  • choose another free product every time you renew
  • choose twelve free products per year
  • exclusive 50% discount on all purchases
  • Generative AI in Computer Vision ebook for free

You can see this entire book for free. Click anywhere in the table of contents to start reading detailed TOC'; var tocServerVars = { productId: 3607, getTocUrl: "/ajax/getTocHtml" } var tocStatusResolved = false; $(document).ready(loadTocIfVisible); function loadTocIfVisible(){ if(tocStatusResolved) { return; } if($('.toc-loading-container').isInViewport()){ window.removeEventListener("scroll", loadTocIfVisible); $(window).off("scroll", loadTocIfVisible); loadToc(); } } try { window.addEventListener("scroll", loadTocIfVisible, { passive: true}); } catch(e){ $(window).on("scroll", loadTocIfVisible); } function loadToc(){ tocStatusResolved = true; window.TocProvider.getToc(function (tocHtml) { var $tocLoadingContainer = $('.toc-loading-container'); $tocLoadingContainer.closest(".table-of-contents").prepend(titleHtml); $tocLoadingContainer.replaceWith(tocHtml); initializeToc(); }, function(){ $('.toc-loading-container').remove() }); } window.TocProvider = { toc: "", getToc: function(success, failure){ if(this.toc) { success && success(this.toc); return; } $.ajax({ url: tocServerVars.getTocUrl, data: { id: tocServerVars.productId } }).done(function(data){ if(data && data.tocHtml){ this.toc = data.tocHtml; success && success(this.toc); } else { failure && failure(); } }).fail(function(error){ failure && failure(); }); } }; } catch (e) {} })(); function initializeToc(){ // Append this only when children of their sectionbody siblings exist (appendices generally aren't expandable) $(".sectionbody *").children("*").parents(".sectionbody").siblings("h2").wrapInner("").append(controllo); $(".toc h2 > .chap-link, .toc h1, .toc h3, .toc h4").each(function() { $(this).html($(this).html().replace(/^(\b\w[0-9A-Z]{0,1}\b\.?)+\s/gi, "$&")); }); $(".sectionbody").addClass("hidden-toc"); $("body").on("click", ".toc-expando", function(e) { e.stopPropagation(); $(this).removeClass("toc-expando").addClass("toc-retracto").closest(".sect1").find(".sectionbody").removeClass("hidden-toc").addClass("shown-toc"); }); $("body").on("click", ".toc-retracto", function(e) { e.stopPropagation(); $(this).removeClass("toc-retracto").addClass("toc-expando").closest(".sect1").find(".sectionbody").removeClass("shown-toc").addClass("hidden-toc"); }); $("body").on("click", ".available h2", function() { $(this).find(".toc-controllo").click(); }); $("body").on("click", "#show-hide", function() { if ($("#show-hide").hasClass("hide-full-toc")) { $(".table-of-contents .sect2, .table-of-contents .sect3").removeClass("shown-toc").addClass("hidden-toc"); $("#show-hide").removeClass("hide-full-toc").html(" detailed TOC"); $(".table-of-contents .body").removeClass("full-toc"); $(".toc-retracto").click(); } else { $(".table-of-contents .sect2, .table-of-contents .sect3").addClass("shown-toc").removeClass("hidden-toc"); $("#show-hide").addClass("hide-full-toc").html(" detailed TOC"); $(".table-of-contents .body").addClass("full-toc"); $(".toc-expando").click(); } }); var wrapTocWithLink = function(tocElement, livebookUrl, chapterNumber) { var urlParams = "?origin=product-toc"; $(tocElement).children(".chap-link").wrap(""); if (!$(tocElement).children().length) { $(tocElement).html("" + $(tocElement).text() + ""); } $(tocElement).parent().prepend('Read in liveBook'); $(tocElement).parent().children("div[class*='sectionbody']").each(function(j, sectionsParent) { $(sectionsParent).children().each(function(k, sectionParent) { $(sectionParent).children("h3").each(function(l, section) { //wrap section in an a tag linking to livebook section k + 1 var sectionUrl = livebookUrl + "/section-" + chapterNumber + "-" + (k + 1); $(section).wrap(""); }); $(sectionParent).children("div[class='sect3']").each(function(m, subsectionParent) { $(subsectionParent).children("h4").each(function(n, subsection) { //wrap section in an a tag linking to livebook subsection m + 1 var subsectionUrl = livebookUrl + "/section-" + chapterNumber + "-" + (k + 1) + "-" + (m + 1); $(subsection).wrap(""); }); }); }); }); }; var $availableChapters = $("h2[id*='chapter_id_']").filter(function(i, chapter) { return $(chapter).parent().hasClass('available'); }); // All elements containing an '_' filtering out those starting by chapter_id_ and not having their parent the class available. var $availableNonChapterElements = $("h2[id*='_']").filter(function(i, element) { return !element.id.match(/^chapter_id_/) && $(element).parent().hasClass('available'); }); $availableChapters.each(function(i, chapter) { var chapterNumber = chapter.id.split(/_/).pop(); var livebookUrl = "https://livebook.manning.com/book/generative-ai-in-computer-vision/chapter-" + chapterNumber; wrapTocWithLink(chapter, livebookUrl, chapterNumber); }); var isSingleAppendix = $("h2[id*='_']").filter(function(i, element) { return !element.id.match(/^chapter_id_/) ; }).length === 1; $availableNonChapterElements.each(function(i, appendix) { var appendixGroups = appendix.textContent.match(/[aA]ppendix (\w)/); // i.e "Appendix A: Installation => ['Appendix A', 'A', index:0, input: 'Appendix A: Installation'] var section = isSingleAppendix ? 'a' : appendixGroups && appendixGroups.length === 2 && appendixGroups[1].toLowerCase(); var appendixPathName = isSingleAppendix ? 'appendix' : 'appendix-' + section; var livebookUrl = "https://livebook.manning.com/book/generative-ai-in-computer-vision/" + appendixPathName; wrapTocWithLink(appendix, livebookUrl, section); }); var $availableMiscElements = $("h2[id='foreword'], h2[id='preface'], h2[id='epilogue']").filter(function(i, element) { return $(element).parent().hasClass('available'); }); $availableMiscElements.each(function(i, miscElement){ var livebookUrl = "https://livebook.manning.com/book/generative-ai-in-computer-vision/" + $(miscElement).attr("id"); wrapTocWithLink(miscElement, livebookUrl, ""); }); $(".available h2, .available h3").attr("data-toggle", "tooltip"); $(".available h2, .available h3").attr("data-placement", "left"); $(".available h2, .available h3").attr("title", "Available"); $('[data-toggle="tooltip"]').tooltip(); } var addToWishlistUrl = "/wishList/addItemFromPage" + "?"; var removeFromWishlistUrl = "/wishList/removeItemFromPage" + "?"; $("body").on("click", ".wishlist-login", function() { localStorage.removeItem('dynamicloadcache'); }); $("body").on("click", ".wishlist-toggle.wishlist-add, .wishlist-toggle.wishlist-remove", function() { var productId = $(this).data("product-id"); var url = addToWishlistUrl; if ($(this).hasClass("wishlist-remove")) { url = removeFromWishlistUrl; $(".wishlist-container").removeClass("on-wishlist"); } else { $(".wishlist-container").addClass("on-wishlist"); } $.ajax({ type: "GET", url: url + "id=" + productId, timeout: 3000, dataType: "json", headers: { 'accept': "application/json" } }); }); $(document).trigger('activity-product-browse', {productId: '3607'}); })(jQuery); }

RECENTLY VIEWED