Scraping agent API is used to manage web scraping agents under an Agenty account. Use this API to create new web scraper, modify an existing scraper etc.
Create a scraping agent
Endpoint:
Method: POST
URL: https://api.agenty.com/v1/agents/scraping/create
Headers:
Key | Value | Description |
---|---|---|
Content-Type | application/json |
Query params:
Key | Value | Description |
---|---|---|
apikey | {{API_KEY}} |
Body:
{
"name": "Books price scraping agent",
"description": "This agent will extract the product list, prices, image and detail page hyperlink from books.toscrape.com website",
"type": "scraping",
"config": {
"sourceurl": "http://books.toscrape.com/",
"collections": [
{
"name": "Collection1",
"fields": [
{
"name": "NAME",
"type": "CSS",
"selector": "h3 a",
"extract": "TEXT",
"attribute": null,
"from": null,
"visible": true,
"cleantrim": true,
"joinresult": false,
"postprocessing": [],
"formatter": []
},
{
"name": "PRICE",
"type": "CSS",
"selector": ".price_color",
"extract": "TEXT",
"attribute": "",
"from": null,
"visible": true,
"cleantrim": true,
"joinresult": false,
"postprocessing": [
{
"function": "Insert",
"parameters": [
{
"name": null,
"value": "http://books.toscrape.com/"
}
]
}
],
"formatter": []
},
{
"name": "IMAGE",
"type": "CSS",
"selector": ".thumbnail",
"extract": "ATTR",
"attribute": "src",
"from": null,
"visible": true,
"cleantrim": true,
"joinresult": false,
"postprocessing": [
{
"function": "Insert",
"parameters": [
{
"name": "Input",
"value": "http://books.toscrape.com/"
}
]
}
],
"formatter": []
},
{
"name": "DETAILS_PAGE_URL",
"type": "CSS",
"selector": ".product_pod h3 a",
"extract": "ATTR",
"attribute": "href",
"from": null,
"visible": true,
"cleantrim": true,
"joinresult": false,
"postprocessing": [
{
"function": "Insert",
"parameters": [
{
"name": "Input",
"value": "http://books.toscrape.com/"
}
]
}
],
"formatter": []
}
]
}
],
"engine": {
"name": "default",
"loadjavascript": true,
"loadimages": false,
"timeout": 30,
"viewport": {
"width": 1280,
"height": 600
}
},
"waitafterpageload": null,
"login": {
"enabled": false,
"type": null,
"data": []
},
"logout": null,
"pagination": {
"enabled": true,
"type": "CLICK",
"selector": ".next a",
"maxpages": 50
},
"header": {
"method": "GET",
"encoding": "utf-8",
"data": [
{
"key": "Accept",
"value": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"
},
{
"key": "User-Agent",
"value": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36"
},
{
"key": "Accept-Language",
"value": "*"
}
]
},
"autoredirect": {
"enabled": true,
"maxautoredirect": 3
},
"failretry": {
"enabled": true,
"maxtry": 3,
"tryinterval": 2,
"timeout": 0
},
"proxy": {
"enabled": false,
"type": null,
"reference": null
},
"throttling": {
"enabled": false,
"type": null,
"seconds": 0
},
"formsubmit": {
"enabled": false,
"data": []
},
"meta": null,
"input": {
"type": "SOURCE",
"reference": null
}
}
}
Responses:
Status: OK | Code: 200
{
"status_code": 200,
"message": "A new scraping agent with id: gowlyyg3dp created successfully"
}
Get scraping agent by id
Endpoint:
Method: GET
URL: https://api.agenty.com/v1/agents/scraping/gowlyyg3dp
Headers:
Key | Value | Description |
---|---|---|
Content-Type | application/json |
Query params:
Key | Value | Description |
---|---|---|
apikey | {{API_KEY}} |
Responses:
Status: OK | Code: 200
{
"config": {
"sourceurl": "http://books.toscrape.com/",
"collections": [
{
"name": "Collection1",
"fields": [
{
"name": "PRODUCT_NAME",
"type": "CSS",
"selector": "h3 a",
"extract": "TEXT",
"attribute": null,
"from": null,
"visible": true,
"cleantrim": true,
"joinresult": false,
"postprocessing": [],
"formatter": []
},
{
"name": "PRICE",
"type": "CSS",
"selector": ".price_color",
"extract": "TEXT",
"attribute": "",
"from": null,
"visible": true,
"cleantrim": true,
"joinresult": false,
"postprocessing": [
{
"function": "Insert",
"parameters": [
{
"name": null,
"value": "http://books.toscrape.com/"
}
]
}
],
"formatter": []
},
{
"name": "IMAGE",
"type": "CSS",
"selector": ".thumbnail",
"extract": "ATTR",
"attribute": "src",
"from": null,
"visible": true,
"cleantrim": true,
"joinresult": false,
"postprocessing": [
{
"function": "Insert",
"parameters": [
{
"name": "Input",
"value": "http://books.toscrape.com/"
}
]
}
],
"formatter": []
},
{
"name": "DETAILS_PAGE_URL",
"type": "CSS",
"selector": ".product_pod h3 a",
"extract": "ATTR",
"attribute": "href",
"from": null,
"visible": true,
"cleantrim": true,
"joinresult": false,
"postprocessing": [
{
"function": "Insert",
"parameters": [
{
"name": "Input",
"value": "http://books.toscrape.com/"
}
]
}
],
"formatter": []
}
]
}
],
"engine": {
"name": "default",
"loadjavascript": true,
"loadimages": false,
"timeout": 30,
"viewport": {
"width": 1280,
"height": 600
}
},
"waitafterpageload": null,
"login": {
"enabled": false,
"type": null,
"data": []
},
"logout": null,
"pagination": {
"enabled": true,
"type": "CLICK",
"selector": ".next a",
"maxpages": 50
},
"header": {
"method": "GET",
"encoding": "utf-8",
"data": [
{
"key": "Accept",
"value": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"
},
{
"key": "User-Agent",
"value": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36"
},
{
"key": "Accept-Language",
"value": "*"
}
]
},
"autoredirect": {
"enabled": true,
"maxautoredirect": 3
},
"failretry": {
"enabled": true,
"maxtry": 3,
"tryinterval": 2,
"timeout": 0
},
"proxy": {
"enabled": false,
"type": null,
"reference": null
},
"throttling": {
"enabled": false,
"type": null,
"seconds": 0
},
"formsubmit": {
"enabled": false,
"data": []
},
"profiles": null,
"meta": null,
"input": {
"type": "SOURCE",
"reference": null
}
},
"agent_id": "gowlyyg3dp",
"project_id": null,
"name": "Books scraping agent",
"description": "This agent will extract the product list, prices, image and detail page hyperlink from books.toscrape.com website",
"type": "scraping",
"version": 2,
"created_at": "2019-03-09T03:01:19",
"updated_at": "2019-03-09T03:03:07",
"cron_expression": null,
"schedule_description": null,
"next_auto_run_at": null,
"access_group_id": 0
}
Update a scraping agent
Endpoint:
Method: PUT
URL: https://api.agenty.com/v1/agents/scraping/{{AGENT_ID}}
Headers:
Key | Value | Description |
---|---|---|
Content-Type | application/json |
Query params:
Key | Value | Description |
---|---|---|
apikey | {{API_KEY}} |
Body:
{
"name": "Books scraping agent",
"description": "This agent will extract the product list, prices, image and detail page hyperlink from books.toscrape.com website",
"type": "scraping",
"config": {
"sourceurl": "http://books.toscrape.com/",
"collections": [
{
"name": "Collection1",
"fields": [
{
"name": "PRODUCT_NAME",
"type": "CSS",
"selector": "h3 a",
"extract": "TEXT",
"attribute": null,
"from": null,
"visible": true,
"cleantrim": true,
"joinresult": false,
"postprocessing": [],
"formatter": []
},
{
"name": "PRICE",
"type": "CSS",
"selector": ".price_color",
"extract": "TEXT",
"attribute": "",
"from": null,
"visible": true,
"cleantrim": true,
"joinresult": false,
"postprocessing": [
{
"function": "Insert",
"parameters": [
{
"name": null,
"value": "http://books.toscrape.com/"
}
]
}
],
"formatter": []
},
{
"name": "IMAGE",
"type": "CSS",
"selector": ".thumbnail",
"extract": "ATTR",
"attribute": "src",
"from": null,
"visible": true,
"cleantrim": true,
"joinresult": false,
"postprocessing": [
{
"function": "Insert",
"parameters": [
{
"name": "Input",
"value": "http://books.toscrape.com/"
}
]
}
],
"formatter": []
},
{
"name": "DETAILS_PAGE_URL",
"type": "CSS",
"selector": ".product_pod h3 a",
"extract": "ATTR",
"attribute": "href",
"from": null,
"visible": true,
"cleantrim": true,
"joinresult": false,
"postprocessing": [
{
"function": "Insert",
"parameters": [
{
"name": "Input",
"value": "http://books.toscrape.com/"
}
]
}
],
"formatter": []
}
]
}
],
"engine": {
"name": "default",
"loadjavascript": true,
"loadimages": false,
"timeout": 30,
"viewport": {
"width": 1280,
"height": 600
}
},
"waitafterpageload": null,
"login": {
"enabled": false,
"type": null,
"data": []
},
"logout": null,
"pagination": {
"enabled": true,
"type": "CLICK",
"selector": ".next a",
"maxpages": 50
},
"header": {
"method": "GET",
"encoding": "utf-8",
"data": [
{
"key": "Accept",
"value": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"
},
{
"key": "User-Agent",
"value": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36"
},
{
"key": "Accept-Language",
"value": "*"
}
]
},
"autoredirect": {
"enabled": true,
"maxautoredirect": 3
},
"failretry": {
"enabled": true,
"maxtry": 3,
"tryinterval": 2,
"timeout": 0
},
"proxy": {
"enabled": false,
"type": null,
"reference": null
},
"throttling": {
"enabled": false,
"type": null,
"seconds": 0
},
"formsubmit": {
"enabled": false,
"data": []
},
"meta": null,
"input": {
"type": "SOURCE",
"reference": null
}
}
}
Responses:
Status: OK | Code: 200
{
"status_code": 200,
"message": "Agent with id: gowlyyg3dp updated successfully"
}