-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgradio_utils.py
325 lines (258 loc) · 19.9 KB
/
gradio_utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
import os
import cv2
import json
import time
import requests
import subprocess
import shutil
from openai import OpenAI
api_key = "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"
os.environ["OPENAI_API_KEY"]=api_key
def llm_prompt_generator(User_Input):
os.environ["OPENAI_API_KEY"]="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"
client = OpenAI()
# system_role = """
# You are an expert in Generative AI image and Video generation
# Your task is to write prompts that could result in best generation of images and videos given user querry
# User querry would be a scipt that needs to be reformatted for a well formed prompt
# <Important>
# for each user querry you needs to provide 2 prompts video prompt and image prompt
# User can also guide you in generating prompt by providing insights from his script
# Do NOT provide an introductory paragraph or sentence.
# Do NOT provide a conclusion.
# Your response should strictly be in JSON format only, with no preamble at the start or end of the JSON only fields should be Video_prompt, Image_prompt
# </Important>
# <Example1>
# User script: city scene of young women walking early morning in sidewalk
# Image_prompt:"A bustling city sidewalk scene during early morning. The street is crowded with a diverse group of people commuting. Focus on a young woman, around 25, with short brown hair, dressed in casual work attire, rushing through the crowd. The city buildings loom in the background with soft morning light filtering through"
# Video_prompt:"Focus on a young woman, around 25, with short brown hair, dressed in casual work attire, rushing through the crowd"
# <Example2>
# User script: women tying shoes on a bench
# Image_prompt:"A close-up image of a young woman, mid-20s with a lively face, sitting on a city bench. She is tying her vibrant blue Falcon Footwear sneakers. The focus is on her hands tying the laces, with blurred city movement in the background. Her expression is determined and focused."
# Video_prompt:"A close-up image of a young woman, mid-20s with a lively face, sitting on a city bench. She is tying her vibrant blue Falcon Footwear sneakers."
# <Example3>
# User script: women navigating crowded city street and green trail
# Image_prompt:"A dynamic scene showing a young woman, named Emily, navigating a crowded city street on one side, and an open, lush green trail on the other. The image should capture her in motion, half on the bustling city pavement and half on the serene trail. Her attire is sporty, suitable for both environments."
# Video_prompt:"A dynamic scene showing a young woman, named Emily, navigating a crowded city street on one side, and an open, lush green trail on the other."
# <Example4>
# User script: Emily stands at overlook and running on beach with friends
# Image_prompt:"Split scene image: On one side, Emily stands at a beautiful overlook, catching her breath with a satisfied smile. The background shows a panoramic view of nature. On the other side, she is running joyously on a sandy beach with a group of friends, all in casual sportswear, laughing and enjoying."
# Video_prompt:"Split scene image: On one side, Emily stands at a beautiful overlook, catching her breath with a satisfied smile. The background shows a panoramic view of nature."
# <Example5>
# User script: Emily and friends playing basketball in urban park
# Image_prompt:"An outdoor basketball court scene with Emily and her friends playing basketball. The court is in an urban park. Emily is actively participating in the game, wearing her Falcon Footwear sneakers. The focus is on the action and the energy of the game, with city buildings in the distant background."
# Video_prompt:"An outdoor basketball court scene with Emily and her friends playing basketball. The court is in an urban park."
# <Example6>
# User script: Emily in cozy café wearing Falcon Footwear
# Image_prompt:"A warm, inviting image of Emily sitting comfortably in a cozy café, looking out of the window with a thoughtful expression. She's wearing her Falcon Footwear, casually crossed at the ankle. The interior is stylish and modern, suggesting a moment of reflection after a day full of adventures."
# Video_prompt:"A warm, inviting image of Emily sitting comfortably in a cozy café, looking out of the window with a thoughtful expression. She's wearing her Falcon Footwear, casually crossed at the ankle."
# <Example7>
# User script: Falcon Footwear sneakers on wooden floor
# Image_prompt:"A simple, elegant image of the Falcon Footwear sneakers on a wooden floor. The lighting focuses on the sneakers, highlighting their vibrant blue color and sleek design. The background is a soft, blurred image of a city skyline during sunset, emphasizing the brand’s urban appeal."
# Video_prompt:"A simple, elegant image of the Falcon Footwear sneakers on a wooden floor."
# """
system_role= """You are an expert in Generative AI image and Video generation
Your task is to write prompts that could result in best generation of images and videos given user query
User query would be a script that needs to be reformatted into a well-formed prompt
<Important>
For each user query, you need to provide 2 prompts:
1. Image_prompt: Detailed description of the initial image/scene of the video
2. Video_prompt: Description of the key events/transitions that should occur over the course of the video. The events should be brief and to the point. Make sure not to add events that include drastic changes.
User can also guide you in generating prompts by providing insights from their script
Do NOT provide an introductory paragraph or sentence.
Do NOT provide a conclusion.
Your response should strictly be in JSON format only, with no preamble at the start or end. JSON fields should only be Video_prompt and Image_prompt.
</Important>
<Example1>
User script: city scene of young women walking early morning on sidewalk
Image_prompt: "Early morning in a bustling city. The rising sun casts long shadows across the crowded sidewalk. Focus on a young woman in her mid-20s with short brown hair, wearing casual business attire and comfortable shoes. She walks with purpose, a coffee cup in hand, ready to start her day. Towering skyscrapers and urban scenery fill the background."
Video_prompt: "The video opens with the young woman exiting her apartment building, stepping out onto the busy city sidewalk. She navigates through the rush of morning commuters, narrowly avoiding collisions. As she waits to cross a street, a bicycle messenger zooms past, startling her. She checks her watch, realizing she's running late, and picks up her pace. The video concludes with her hurrying into a large office building, disappearing into the revolving door."
<Example2>
User script: women tying shoes on a bench
Image_prompt: "Close-up shot of a young woman in her mid-20s sitting on a city bench. She has a vibrant, lively face and is focused on tying the laces of her bright blue Falcon Footwear sneakers. The bustling city environment is blurred in the background, emphasizing her action."
Video_prompt: "The scene begins with the woman jogging up to the bench, slightly out of breath. She sits down and begins untying her shoelaces. As she starts to re-tie them, a gust of wind blows a newspaper onto her lap, distracting her momentarily. She brushes it off and resumes tying her laces with determination. Once finished, she hops up from the bench and continues her jog, blending into the city crowd."
<Example3>
User script: women navigating crowded city street and green trail
Image_prompt: "Split-screen image: On the left, a young woman named Emily navigates a dense city street, surrounded by tall buildings and busy traffic. On the right, the same woman runs freely along a serene, green nature trail. In both scenarios, she wears versatile, sporty clothing suitable for the contrasting environments."
Video_prompt: "The video alternates between scenes of Emily in the city and on the nature trail. In the city, she dodges pedestrians, waits at crosswalks, and jogs in place at stoplights. On the trail, she runs at a steady pace, jumps over small obstacles, and takes a moment to appreciate the tranquil surroundings. The video concludes with a shot of Emily standing at the intersection of the city and the trail, representing the balance she's found."
<Example4>
User script: Emily stands at overlook and running on beach with friends
Image_prompt: "Dual scene: In one scene, Emily stands alone at a breathtaking overlook, catching her breath and admiring the expansive view of nature below. In the other scene, she's running along the shore of a sandy beach, laughing and joking with a group of friends all dressed in casual sportswear."
Video_prompt: "The video starts with Emily taking in the view at the overlook, then transitions to her running down a trail to meet her friends on the beach. Together, they jog along the shoreline, splashing in the shallow water and playfully racing each other. They take a break to do some stretches and enjoy the ocean view. The video ends with the group walking off into the distance, with the sun setting over the water."
<Example5>
User script: Emily and friends playing basketball in urban park
Image_prompt: "An energetic outdoor scene in an urban park. Emily and her diverse group of friends are in the midst of a lively basketball game on a weathered court. Emily is dribbling the ball, wearing her signature Falcon Footwear sneakers. The surrounding park and distant city skyline are visible in the background."
Video_prompt: "The video begins with Emily's friends warming up on the court, passing balls and taking practice shots. Emily arrives, laces up her sneakers, and joins in. The game starts, and the camera follows the fast-paced action, focusing on Emily's skills and teamwork. After a successful shot from Emily, the group celebrates. The game wraps up with high-fives and friendly banter as they leave the court, discussing their next adventure."
<Example6>
User script: Emily in cozy café wearing Falcon Footwear
Image_prompt: "Interior of a charming, cozy café. Emily sits comfortably in a booth by the window, wearing her Falcon Footwear sneakers, crossed at the ankles. She has a thoughtful, content expression as she gazes out at the city street. The café decor is modern and inviting, with warm lighting and an ambiance of relaxation."
Video_prompt: "The scene opens with an exterior shot of the café, then cuts to Emily entering and being greeted warmly by the barista. She orders her drink and finds a seat by the window. As she settles in, she reflects on her day, shown through a montage of flashbacks to her various adventures. The video concludes with Emily looking out the window, smiling softly, and then taking a sip of her drink, content and recharged."
<Example7>
User script: Falcon Footwear sneakers on wooden floor
Image_prompt: "Close-up of a pair of vibrant blue Falcon Footwear sneakers on a polished wooden floor. The lighting spotlights the shoes, highlighting their sleek design and bold color. In the soft, blurred background, a cityscape is visible through a large window, bathed in the warm glow of the setting sun."
Video_prompt: "The video begins with a close-up of the sneakers, then slowly zooms out to reveal the full room. As the camera pans around the space, it showcases various urban-inspired decor elements that complement the shoes' aesthetic. The video then transitions to a montage of scenes featuring the sneakers in action - on city streets, in parks, at a dance studio. It concludes with the shoes back on the wooden floor, representing the end of an eventful day."
"""
user_query=f"""
{User_Input}
"""
completion_response = client.chat.completions.create(
model="gpt-4-turbo-preview",
messages=[
{"role": "system", "content": system_role},
{"role": "user", "content": user_query}
],
temperature=0.0,
response_format={"type":"json_object"}
)
output=completion_response.choices[0].message.content
while 1:
try:
output=json.loads(output)
break
except:
pass
return output
class Generator:
def __init__(self, image_prompt, video_prompt, file_path):
self.name = "Generator"
self.image_prompt = image_prompt
self.video_prompt = video_prompt
self.file_path = file_path + ".txt"
self.image_name = "/root/VideoGenerator/data/test_images/" + file_path + ".jpg"
self.command = [
'/opt/conda/envs/exp/bin/python', '/root/VideoGenerator/inference.py',
'--cfg', 'configs/i2vgen_xl_infer.yaml',
'test_list_path', self.file_path,
'test_model', 'models/i2vgen_xl_00854500.pth'
]
self.errors = []
def image_generator(self):
"""
Generates an image from a prompt using OpenAI's API and saves it locally.
Parameters:
- prompt (str): The prompt to generate the image from.
- filename (str): The local filename to save the image.
"""
# Call the OpenAI API to generate the image
response = None
n = 1
while True:
try:
response = OpenAI().images.generate(
model="dall-e-3",
prompt=self.image_prompt,
size="1024x1024",
quality="standard",
n=1,
)
break
except:
n += 1
if n == 3:
self.errors.append("DallE Generation Failed.")
return False
# Get the image URL from the response
image_url = dict(response)['data'][0].url
# Download the image from the URL
image_response = requests.get(image_url)
# Save the image to a file
with open(self.image_name, 'wb') as file:
file.write(image_response.content)
print(f"Image saved as {self.image_name}")
return True
def create_input_list(self):
test_data = f"{self.image_name}|||{self.video_prompt}"
# Open the file in write mode ('w') which will create the file if it doesn't exist
with open(self.file_path, 'w') as file:
# Write the string to the file
file.write(test_data)
def save_last_frame(self, video_path):
# Capture video
cap = cv2.VideoCapture(video_path)
if not cap.isOpened():
print("Error: Couldn't open video file.")
return
last_frame = None
# Read through the video
while True:
ret, frame = cap.read()
if not ret:
break
last_frame = frame
# Save the last frame
if last_frame is not None:
cv2.imwrite(self.image_name, last_frame)
print(f"Last frame saved to {self.image_name}")
else:
print("No frames to save.")
# Release resources
cap.release()
def run_iterations(self, number_of_iterations=1):
print("Generating Image from Dalle-3")
image_generation_response = self.image_generator()
if image_generation_response is False:
return False
print("Creating Input list")
self.create_input_list()
print("Removing earlier generated video")
self.clear_directory('/root/VideoGenerator/workspace/experiments/gradio_test')
print("Runnning video generation")
# subprocess.run(self.command)
for i in range(number_of_iterations):
# Execute the command
subprocess.run(self.command)
# Print completion message
print(f"Run {i+1} of the script completed")
# Specify the directory
directory = f"workspace/experiments/{self.file_path.split('.')[0]}/"
# Get list of files in the directory
files = [os.path.join(directory, f) for f in os.listdir(directory) if os.path.isfile(os.path.join(directory, f)) and 'log' not in f]
# Find the latest file
latest_file = max(files, key=os.path.getctime)
new_name = os.path.join(directory, f'{i+1}.mp4')
# Rename the latest file
os.rename(latest_file, new_name)
print(f'Renamed "{latest_file}" to "{new_name}"')
self.save_last_frame(new_name)
print(f"Saved Last Video at {new_name}")
self.combine_videos()
return True
def clear_directory(self, directory):
if os.path.exists(directory):
for item in os.listdir(directory):
item_path = os.path.join(directory, item)
os.remove(item_path)
def combine_videos(self):
os.system("/root/VideoGenerator/combine_videos.sh output.mp4")
def combine_videos_old(self):
# Path to the folder containing the videos
folder_path = "workspace/experiments/" + self.file_path.split(".")[0]
# Get list of video files in the folder
video_files = [f for f in os.listdir(folder_path) if f.endswith('.mp4')]
# Sort video files based on their names (assuming they are named numerically)
video_files.sort()
# Initialize an empty list to store video frames
# Concatenate frames vertically (assuming all videos have the same resolution
os.makedirs("gradio_videos", exist_ok=True)
# Write the concatenated video to a file
output_path = "gradio_videos/"+ self.video_prompt[:60] +".mp4"
fourcc = cv2.VideoWriter_fourcc(*'xvid') # Specify the codec
# fourcc = cv2.VideoWriter_fourcc(*'mp4v') # for MP4 codec
out = cv2.VideoWriter(output_path, fourcc, 8, (1280, 704))
# Read each video and store frames
for video_file in video_files:
print(video_file)
video_path = os.path.join(folder_path, video_file)
video_capture = cv2.VideoCapture(video_path)
while True:
success, frame = video_capture.read()
if not success:
break
out.write(frame)
print(frame.shape)
# Release video capture object after reading the video
video_capture.release()
# # Release the VideoWriter
out.release()
print("Concatenated video saved successfully!")
print("=======================================================================================")