Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 41 additions & 5 deletions dashscope/aigc/video_synthesis.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
# Copyright (c) Alibaba, Inc. and its affiliates.

from typing import Any, Dict, Union
from typing import Any, Dict, Union, List

from dashscope.api_entities.dashscope_response import (DashScopeAPIResponse,
VideoSynthesisResponse)
from dashscope.client.base_api import BaseAsyncApi, BaseAsyncAioApi
from dashscope.common.constants import PROMPT
from dashscope.common.constants import PROMPT, REFERENCE_VIDEO_URLS
from dashscope.common.utils import _get_task_group_and_task
from dashscope.utils.oss_utils import check_and_upload_local

Expand Down Expand Up @@ -39,6 +39,8 @@ def call(cls,
template: str = None,
img_url: str = None,
audio_url: str = None,
reference_video_urls: List[str] = None,
reference_video_description: List[str] = None,
api_key: str = None,
extra_input: Dict = None,
workspace: str = None,
Expand All @@ -58,6 +60,8 @@ def call(cls,
template (str): LoRa input, such as gufeng, katong, etc.
img_url (str): The input image url, Generate the URL of the image referenced by the video.
audio_url (str): The input audio url
reference_video_urls (List[str]): list of character reference video file urls uploaded by the user
reference_video_description (List[str]): For the description information of the picture and sound of the reference video, corresponding to ref video, it needs to be in the order of the url. If the quantity is different, an error will be reported
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The docstring for reference_video_description could be rephrased for better clarity and conciseness.

Suggested change
reference_video_description (List[str]): For the description information of the picture and sound of the reference video, corresponding to ref video, it needs to be in the order of the url. If the quantity is different, an error will be reported
reference_video_description (List[str]): Descriptions for each reference video. The order must correspond to the order of URLs in `reference_video_urls`. An error will be reported if the number of descriptions does not match the number of URLs.

api_key (str, optional): The api api_key. Defaults to None.
workspace (str): The dashscope workspace id.
extra_input (Dict): The extra input parameters.
Expand All @@ -79,6 +83,8 @@ def call(cls,
prompt,
img_url=img_url,
audio_url=audio_url,
reference_video_urls=reference_video_urls,
reference_video_description=reference_video_description,
api_key=api_key,
extend_prompt=extend_prompt,
negative_prompt=negative_prompt,
Expand All @@ -98,6 +104,8 @@ def _get_input(cls,
prompt: Any = None,
img_url: str = None,
audio_url: str = None,
reference_video_urls: List[str] = None,
reference_video_description: List[str] = None,
# """@deprecated, use prompt_extend in parameters """
extend_prompt: bool = True,
negative_prompt: str = None,
Expand All @@ -119,6 +127,8 @@ def _get_input(cls,
inputs['template'] = template
if function:
inputs['function'] = function
if reference_video_description:
inputs['reference_video_description'] = reference_video_description

has_upload = False
upload_certificate = None
Expand Down Expand Up @@ -165,6 +175,17 @@ def _get_input(cls,
has_upload = True
inputs['last_frame_url'] = res_last_frame_url

if (reference_video_urls is not None
and reference_video_urls and len(reference_video_urls) > 0):
new_videos = []
for video in reference_video_urls:
is_upload, new_video, upload_certificate = check_and_upload_local(
model, video, api_key, upload_certificate)
if is_upload:
has_upload = True
new_videos.append(new_video)
inputs[REFERENCE_VIDEO_URLS] = new_videos

if extra_input is not None and extra_input:
inputs = {**inputs, **extra_input}
if has_upload:
Expand All @@ -185,6 +206,8 @@ def async_call(cls,
prompt: Any = None,
img_url: str = None,
audio_url: str = None,
reference_video_urls: List[str] = None,
reference_video_description: List[str] = None,
# """@deprecated, use prompt_extend in parameters """
extend_prompt: bool = True,
negative_prompt: str = None,
Expand All @@ -208,6 +231,8 @@ def async_call(cls,
template (str): LoRa input, such as gufeng, katong, etc.
img_url (str): The input image url, Generate the URL of the image referenced by the video.
audio_url (str): The input audio url.
reference_video_urls (List[str]): list of character reference video file urls uploaded by the user
reference_video_description (List[str]): For the description information of the picture and sound of the reference video, corresponding to ref video, it needs to be in the order of the url. If the quantity is different, an error will be reported
api_key (str, optional): The api api_key. Defaults to None.
workspace (str): The dashscope workspace id.
extra_input (Dict): The extra input parameters.
Expand All @@ -229,7 +254,8 @@ def async_call(cls,
task_group, function = _get_task_group_and_task(__name__)

inputs, kwargs, task = cls._get_input(
model, prompt, img_url, audio_url, extend_prompt, negative_prompt, template, api_key,
model, prompt, img_url, audio_url, reference_video_urls, reference_video_description,
extend_prompt, negative_prompt, template, api_key,
extra_input, task, function, head_frame, tail_frame,
first_frame_url, last_frame_url, **kwargs)

Expand Down Expand Up @@ -354,6 +380,8 @@ async def call(cls,
prompt: Any = None,
img_url: str = None,
audio_url: str = None,
reference_video_urls: List[str] = None,
reference_video_description: List[str] = None,
# """@deprecated, use prompt_extend in parameters """
extend_prompt: bool = True,
negative_prompt: str = None,
Expand All @@ -377,6 +405,8 @@ async def call(cls,
template (str): LoRa input, such as gufeng, katong, etc.
img_url (str): The input image url, Generate the URL of the image referenced by the video.
audio_url (str): The input audio url.
reference_video_urls (List[str]): list of character reference video file urls uploaded by the user
reference_video_description (List[str]): For the description information of the picture and sound of the reference video, corresponding to ref video, it needs to be in the order of the url. If the quantity is different, an error will be reported
api_key (str, optional): The api api_key. Defaults to None.
workspace (str): The dashscope workspace id.
extra_input (Dict): The extra input parameters.
Expand All @@ -396,7 +426,8 @@ async def call(cls,
"""
task_group, f = _get_task_group_and_task(__name__)
inputs, kwargs, task = VideoSynthesis._get_input(
model, prompt, img_url, audio_url, extend_prompt, negative_prompt, template, api_key,
model, prompt, img_url, audio_url, reference_video_urls, reference_video_description,
extend_prompt, negative_prompt, template, api_key,
extra_input, task, f, head_frame, tail_frame,
first_frame_url, last_frame_url, **kwargs)
response = await super().call(model, inputs, task_group, task, f, api_key, workspace, **kwargs)
Expand All @@ -408,6 +439,8 @@ async def async_call(cls,
prompt: Any = None,
img_url: str = None,
audio_url: str = None,
reference_video_urls: List[str] = None,
reference_video_description: List[str] = None,
# """@deprecated, use prompt_extend in parameters """
extend_prompt: bool = True,
negative_prompt: str = None,
Expand All @@ -431,6 +464,8 @@ async def async_call(cls,
template (str): LoRa input, such as gufeng, katong, etc.
img_url (str): The input image url, Generate the URL of the image referenced by the video.
audio_url (str): The input audio url.
reference_video_urls (List[str]): list of character reference video file urls uploaded by the user
reference_video_description (List[str]): For the description information of the picture and sound of the reference video, corresponding to ref video, it needs to be in the order of the url. If the quantity is different, an error will be reported
api_key (str, optional): The api api_key. Defaults to None.
workspace (str): The dashscope workspace id.
extra_input (Dict): The extra input parameters.
Expand All @@ -452,7 +487,8 @@ async def async_call(cls,
task_group, function = _get_task_group_and_task(__name__)

inputs, kwargs, task = VideoSynthesis._get_input(
model, prompt, img_url, audio_url, extend_prompt, negative_prompt, template, api_key,
model, prompt, img_url, audio_url, reference_video_urls, reference_video_description,
extend_prompt, negative_prompt, template, api_key,
extra_input, task, function, head_frame, tail_frame,
first_frame_url, last_frame_url, **kwargs)

Expand Down
1 change: 1 addition & 0 deletions dashscope/common/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
HISTORY = 'history'
CUSTOMIZED_MODEL_ID = 'customized_model_id'
IMAGES = 'images'
REFERENCE_VIDEO_URLS = 'reference_video_urls'
TEXT_EMBEDDING_INPUT_KEY = 'texts'
SERVICE_503_MESSAGE = 'Service temporarily unavailable, possibly overloaded or not ready.' # noqa E501
WEBSOCKET_ERROR_CODE = 44
Expand Down
12 changes: 8 additions & 4 deletions samples/test_video_synthesis.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,21 @@
from dashscope import VideoSynthesis
import os

prompt = "一幅史诗级可爱的场景。一只小巧可爱的卡通小猫将军,身穿细节精致的金色盔甲,头戴一个稍大的头盔,勇敢地站在悬崖上。他骑着一匹虽小但英勇的战马。悬崖下方,一支由老鼠组成的、数量庞大、无穷无尽的军队正带着临时制作的武器向前冲锋。这是一个戏剧性的、大规模的战斗场景,灵感来自中国古代的战争史诗。远处的雪山上空,天空乌云密布。整体氛围是“可爱”与“霸气”的搞笑和史诗般的融合"
prompt = "一只小猫在月光下奔跑"
audio_url = 'https://help-static-aliyun-doc.aliyuncs.com/file-manage-files/zh-CN/20250925/ozwpvi/rap.mp3'
reference_video_urls = ["https://test-data-center.oss-accelerate.aliyuncs.com/wanx/video/resources/with_human_voice_11s.mov"]
api_key = os.getenv("DASHSCOPE_API_KEY")


def simple_call():
print('----sync call, please wait a moment----')
rsp = VideoSynthesis.call(api_key=api_key,
model="wan2.5-t2v-preview",
prompt=prompt,
audio_url=audio_url)
model="wan2.6-r2v",
reference_video_urls=reference_video_urls,
shot_type="multi",
audio=True,
watermark=True,
prompt=prompt)
Comment on lines 13 to +19
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

This updated call to VideoSynthesis.call no longer uses audio_url. The audio_url variable defined on line 6 is now unused and can be removed to improve code clarity.

if rsp.status_code == HTTPStatus.OK:

print('response: %s' % rsp)
Expand Down