Now that we have a function to process a single image, the next step is to parse the content of the Excel spreadsheet, iterate over all the image URLs it contains and call process_image() for each of them.
import openpyxl
import concurrent.futures
def iterate_over_spreadsheet(spreadsheet_path, column_name, result_directory_path):
# Load the spreadsheet
wb = openpyxl.load_workbook(spreadsheet_path)
image_urls = []
# Loop over all sheets in the workbook
for sheet_name in wb.sheetnames:
sheet = wb[sheet_name]
for col_num, col_cells in enumerate(sheet.iter_cols(values_only=True)):
if col_cells[0] == column_name:
break
else:
print(f"Column {column_name} not found in sheet {sheet_name}.")
continue
for cell in sheet.iter_rows(min_row=2, min_col=col_num + 1, max_col=col_num + 1):
if cell[0].value:
image_urls.append(cell[0].value)
The code above contains the first half of the function iterate_over_spreadsheet(), here are its important steps:
we open the spreadsheet located at spreadsheet_path using the library openpyxl
we iterate over all the sheets contained in the document
we look for the column whose first cell contains the value column_name (in the case of the spreadsheet we use as an example, that value would be "Image URL")
we iterate over all the rows in that column, and collect their value in the array image_urls
Now that we have collected all the image_urls, we can write the second part of the function iterate_over_spreadsheet():
with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
for image_url in image_urls:
# Extracting the filename from the URL, stripping query parameters if any
base_name = os.path.basename(image_url.split('?')[0])
# Getting the filename without its original extension and appending '.png'
result_file_name = os.path.splitext(base_name)[0] + '.png'
result_path = os.path.join(result_directory_path, result_file_name)
if not os.path.exists(result_path): # don't re-process images
executor.submit(process_image, image_url, result_path)
In this second part of the function we:
iterate over the image_urls
compose the result_path where the result image will be saved
check that a file doesn't already exist at result_path (so as to not process the same image twice)
call the function process_image() through an executor, which allows us to execute 4 API calls in parallel
If you want, you can increase the value of the argument max_workers to run more API calls in parallel. Keep in mind though that the API is rate limited to 500 calls/minutes.
Step 3: Running the script
We're almost there, the last thing we need is to actually run the script.
To do this, we'll add this final piece of code:
if __name__ == "__main__":
SPREADSHEET_PATH = "./path_to_spreadsheet.xlsx"
COLUMN_NAME = "Image URL"
OUTPUT_DIRECTORY = "./output/"
if not os.path.exists(OUTPUT_DIRECTORY):
os.makedirs(OUTPUT_DIRECTORY)
iterate_over_spreadsheet(spreadsheet_path=SPREADSHEET_PATH, column_name=COLUMN_NAME, result_directory_path=OUTPUT_DIRECTORY)
Then, all that's left is to actually run the script using the terminal:
$ pip install requests openpyxl # run once to install the third-party libraries
$ python script.py
Conclusion
In this tutorial, we saw how to use a Python script to easily process images whose URLs are store inside an Excel spreadsheet with the Photoroom API.
We took the example of processing images through the Image Editing API, but this approach would also work perfectly with our Generate Background API.
Download the code sample
Here's the entire code sample, if you want to easily save it to a file:
import concurrent.futures
import openpyxl
import os
import requests
API_KEY = "REPLACE_WITH_YOUR_API_KEY"
def process_image(input_image_url, output_image_path):
try:
url = "https://image-api.photoroom.com/v2/edit"
query_string = {
"background.color": "FFFFFFFF",
"outputSize": "1000x1000",
"padding": "0.1",
"imageUrl": input_image_url
}
headers = {
"Accept": "image/png",
"x-api-key": API_KEY
}
response = requests.get(url, headers=headers, params=query_string)
response.raise_for_status()
with open(output_image_path, 'wb') as f:
f.write(response.content)
print(f"Image downloaded and saved to {output_image_path}")
except requests.RequestException as e:
print(f"Error: {str(e)}")
return str(e)
def iterate_over_spreadsheet(spreadsheet_path, column_name, result_directory_path):
# Load the spreadsheet
wb = openpyxl.load_workbook(spreadsheet_path)
image_urls = []
# Loop over all sheets in the workbook
for sheet_name in wb.sheetnames:
sheet = wb[sheet_name]
for col_num, col_cells in enumerate(sheet.iter_cols(values_only=True)):
if col_cells[0] == column_name:
break
else:
print(f"Column {column_name} not found in sheet {sheet_name}.")
continue
for cell in sheet.iter_rows(min_row=2, min_col=col_num + 1, max_col=col_num + 1):
if cell[0].value:
image_urls.append(cell[0].value)
with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
for image_url in image_urls:
# Extracting the filename from the URL, stripping query parameters if any
base_name = os.path.basename(image_url.split('?')[0])
# Getting the filename without its original extension and appending '.png'
result_file_name = os.path.splitext(base_name)[0] + '.png'
result_path = os.path.join(result_directory_path, result_file_name)
if not os.path.exists(result_path): # don't re-process images
executor.submit(process_image, image_url, result_path)
if __name__ == "__main__":
SPREADSHEET_PATH = "./path_to_spreadsheet.xlsx"
COLUMN_NAME = "Image URL"
OUTPUT_DIRECTORY = "./output/"
if not os.path.exists(OUTPUT_DIRECTORY):
os.makedirs(OUTPUT_DIRECTORY)
iterate_over_spreadsheet(spreadsheet_path=SPREADSHEET_PATH, column_name=COLUMN_NAME, result_directory_path=OUTPUT_DIRECTORY)