How to process images from an Excel spreadsheet
A common use case for the Image Editing API is to process a large amount of images, whose URLs are stored inside an Excel spreadsheet.
In this tutorial, we'll see how it's possible to process these images through a Python script that will:
parse the Excel spreadsheet
make the calls to the Image Editing API
save the result images to the disk.
Let's take the example of this spreadsheet:

Step 1: Calling the Image Editing API
First, we'll write a function that can process a single image through the Image Editing API and save the result image returned by the API to the disk:
import os
import requests
API_KEY = "REPLACE_WITH_YOUR_API_KEY"
def process_image(input_image_url, output_image_path):
try:
url = "https://image-api.photoroom.com/v2/edit"
query_string = {
"background.color": "FFFFFFFF",
"outputSize": "1000x1000",
"padding": "0.1",
"imageUrl": input_image_url
}
headers = {
"Accept": "image/png, application/json",
"x-api-key": API_KEY
}
response = requests.get(url, headers=headers, params=query_string)
response.raise_for_status()
with open(output_image_path, 'wb') as f:
f.write(response.content)
print(f"Image downloaded and saved to {output_image_path}")
except requests.RequestException as e:
print(f"Error: {str(e)}")
return str(e)
This code is pretty straightforward:
it configures the parameters of the call to the Image Editing API:
white background
output size of 1000x1000px
padding of 10%
uses the
requests
library to make the GET HTTP call to the APIsaves the result image at
output_image_path
Notice that you will need to update the value of the constant API_KEY
with your own API key.
Step 2: Parsing the Excel spreadsheet
Now that we have a function to process a single image, the next step is to parse the content of the Excel spreadsheet, iterate over all the image URLs it contains and call process_image()
for each of them.
import openpyxl
import concurrent.futures
def iterate_over_spreadsheet(spreadsheet_path, column_name, result_directory_path):
# Load the spreadsheet
wb = openpyxl.load_workbook(spreadsheet_path)
image_urls = []
# Loop over all sheets in the workbook
for sheet_name in wb.sheetnames:
sheet = wb[sheet_name]
for col_num, col_cells in enumerate(sheet.iter_cols(values_only=True)):
if col_cells[0] == column_name:
break
else:
print(f"Column {column_name} not found in sheet {sheet_name}.")
continue
for cell in sheet.iter_rows(min_row=2, min_col=col_num + 1, max_col=col_num + 1):
if cell[0].value:
image_urls.append(cell[0].value)
The code above contains the first half of the function iterate_over_spreadsheet()
, here are its important steps:
we open the spreadsheet located at
spreadsheet_path
using the libraryopenpyxl
we iterate over all the sheets contained in the document
we look for the column whose first cell contains the value
column_name
(in the case of the spreadsheet we use as an example, that value would be"Image URL"
)we iterate over all the rows in that column, and collect their
value
in the arrayimage_urls
Now that we have collected all the image_urls
, we can write the second part of the function iterate_over_spreadsheet()
:
with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
for image_url in image_urls:
# Extracting the filename from the URL, stripping query parameters if any
base_name = os.path.basename(image_url.split('?')[0])
# Getting the filename without its original extension and appending '.png'
result_file_name = os.path.splitext(base_name)[0] + '.png'
result_path = os.path.join(result_directory_path, result_file_name)
if not os.path.exists(result_path): # don't re-process images
executor.submit(process_image, image_url, result_path)
In this second part of the function we:
iterate over the
image_urls
compose the
result_path
where the result image will be savedcheck that a file doesn't already exist at
result_path
(so as to not process the same image twice)call the function
process_image()
through anexecutor
, which allows us to execute 4 API calls in parallel
Step 3: Running the script
We're almost there, the last thing we need is to actually run the script.
To do this, we'll add this final piece of code:
if __name__ == "__main__":
SPREADSHEET_PATH = "./path_to_spreadsheet.xlsx"
COLUMN_NAME = "Image URL"
OUTPUT_DIRECTORY = "./output/"
if not os.path.exists(OUTPUT_DIRECTORY):
os.makedirs(OUTPUT_DIRECTORY)
iterate_over_spreadsheet(spreadsheet_path=SPREADSHEET_PATH, column_name=COLUMN_NAME, result_directory_path=OUTPUT_DIRECTORY)
Then, all that's left is to actually run the script using the terminal:
$ pip install requests openpyxl # run once to install the third-party libraries
$ python script.py
Conclusion
In this tutorial, we saw how to use a Python script to easily process images whose URLs are store inside an Excel spreadsheet with the Photoroom API.
We took the example of processing images through the Image Editing API, but this approach would also work perfectly with our Generate Background API.
Download the code sample
Here's the entire code sample, if you want to easily save it to a file:
import concurrent.futures
import openpyxl
import os
import requests
API_KEY = "REPLACE_WITH_YOUR_API_KEY"
def process_image(input_image_url, output_image_path):
try:
url = "https://image-api.photoroom.com/v2/edit"
query_string = {
"background.color": "FFFFFFFF",
"outputSize": "1000x1000",
"padding": "0.1",
"imageUrl": input_image_url
}
headers = {
"Accept": "image/png",
"x-api-key": API_KEY
}
response = requests.get(url, headers=headers, params=query_string)
response.raise_for_status()
with open(output_image_path, 'wb') as f:
f.write(response.content)
print(f"Image downloaded and saved to {output_image_path}")
except requests.RequestException as e:
print(f"Error: {str(e)}")
return str(e)
def iterate_over_spreadsheet(spreadsheet_path, column_name, result_directory_path):
# Load the spreadsheet
wb = openpyxl.load_workbook(spreadsheet_path)
image_urls = []
# Loop over all sheets in the workbook
for sheet_name in wb.sheetnames:
sheet = wb[sheet_name]
for col_num, col_cells in enumerate(sheet.iter_cols(values_only=True)):
if col_cells[0] == column_name:
break
else:
print(f"Column {column_name} not found in sheet {sheet_name}.")
continue
for cell in sheet.iter_rows(min_row=2, min_col=col_num + 1, max_col=col_num + 1):
if cell[0].value:
image_urls.append(cell[0].value)
with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
for image_url in image_urls:
# Extracting the filename from the URL, stripping query parameters if any
base_name = os.path.basename(image_url.split('?')[0])
# Getting the filename without its original extension and appending '.png'
result_file_name = os.path.splitext(base_name)[0] + '.png'
result_path = os.path.join(result_directory_path, result_file_name)
if not os.path.exists(result_path): # don't re-process images
executor.submit(process_image, image_url, result_path)
if __name__ == "__main__":
SPREADSHEET_PATH = "./path_to_spreadsheet.xlsx"
COLUMN_NAME = "Image URL"
OUTPUT_DIRECTORY = "./output/"
if not os.path.exists(OUTPUT_DIRECTORY):
os.makedirs(OUTPUT_DIRECTORY)
iterate_over_spreadsheet(spreadsheet_path=SPREADSHEET_PATH, column_name=COLUMN_NAME, result_directory_path=OUTPUT_DIRECTORY)
Last updated
Was this helpful?