Scraping Agent API

Scraping agent API is used to manage web scraping agents under an Agenty account. Use this API to create new web scraper, modify an existing scraper etc.

Create a scraping agent

Endpoint:

Method: POST
URL: https://api.agenty.com/v1/agents/scraping/create

Headers:

Key Value Description
Content-Type application/json

Query params:

Key Value Description
apikey {{API_KEY}}

Body:

{
  "name": "Books price scraping agent",
  "description": "This agent will extract the product list, prices, image and detail page hyperlink from books.toscrape.com website",
  "type": "scraping",
  "config": {
    "sourceurl": "http://books.toscrape.com/",
    "collections": [
      {
        "name": "Collection1",
        "fields": [
          {
            "name": "NAME",
            "type": "CSS",
            "selector": "h3 a",
            "extract": "TEXT",
            "attribute": null,
            "from": null,
            "visible": true,
            "cleantrim": true,
            "joinresult": false,
            "postprocessing": [],
            "formatter": []
          },
          {
            "name": "PRICE",
            "type": "CSS",
            "selector": ".price_color",
            "extract": "TEXT",
            "attribute": "",
            "from": null,
            "visible": true,
            "cleantrim": true,
            "joinresult": false,
            "postprocessing": [
              {
                "function": "Insert",
                "parameters": [
                  {
                    "name": null,
                    "value": "http://books.toscrape.com/"
                  }
                ]
              }
            ],
            "formatter": []
          },
          {
            "name": "IMAGE",
            "type": "CSS",
            "selector": ".thumbnail",
            "extract": "ATTR",
            "attribute": "src",
            "from": null,
            "visible": true,
            "cleantrim": true,
            "joinresult": false,
            "postprocessing": [
              {
                "function": "Insert",
                "parameters": [
                  {
                    "name": "Input",
                    "value": "http://books.toscrape.com/"
                  }
                ]
              }
            ],
            "formatter": []
          },
          {
            "name": "DETAILS_PAGE_URL",
            "type": "CSS",
            "selector": ".product_pod h3 a",
            "extract": "ATTR",
            "attribute": "href",
            "from": null,
            "visible": true,
            "cleantrim": true,
            "joinresult": false,
            "postprocessing": [
              {
                "function": "Insert",
                "parameters": [
                  {
                    "name": "Input",
                    "value": "http://books.toscrape.com/"
                  }
                ]
              }
            ],
            "formatter": []
          }
        ]
      }
    ],
    "engine": {
      "name": "default",
      "loadjavascript": true,
      "loadimages": false,
      "timeout": 30,
      "viewport": {
        "width": 1280,
        "height": 600
      }
    },
    "waitafterpageload": null,
    "login": {
      "enabled": false,
      "type": null,
      "data": []
    },
    "logout": null,
    "pagination": {
      "enabled": true,
      "type": "CLICK",
      "selector": ".next a",
      "maxpages": 50
    },
    "header": {
      "method": "GET",
      "encoding": "utf-8",
      "data": [
        {
          "key": "Accept",
          "value": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"
        },
        {
          "key": "User-Agent",
          "value": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36"
        },
        {
          "key": "Accept-Language",
          "value": "*"
        }
      ]
    },
    "autoredirect": {
      "enabled": true,
      "maxautoredirect": 3
    },
    "failretry": {
      "enabled": true,
      "maxtry": 3,
      "tryinterval": 2,
      "timeout": 0
    },
    "proxy": {
      "enabled": false,
      "type": null,
      "reference": null
    },
    "throttling": {
      "enabled": false,
      "type": null,
      "seconds": 0
    },
    "formsubmit": {
      "enabled": false,
      "data": []
    },
    "meta": null,
    "input": {
      "type": "SOURCE",
      "reference": null
    }
  }
}

Responses:

Status: OK | Code: 200

{
    "status_code": 200,
    "message": "A new scraping agent with id: gowlyyg3dp created successfully"
}

Get scraping agent by id

Endpoint:

Method: GET
URL: https://api.agenty.com/v1/agents/scraping/gowlyyg3dp

Headers:

Key Value Description
Content-Type application/json

Query params:

Key Value Description
apikey {{API_KEY}}

Responses:

Status: OK | Code: 200

{
    "config": {
        "sourceurl": "http://books.toscrape.com/",
        "collections": [
            {
                "name": "Collection1",
                "fields": [
                    {
                        "name": "PRODUCT_NAME",
                        "type": "CSS",
                        "selector": "h3 a",
                        "extract": "TEXT",
                        "attribute": null,
                        "from": null,
                        "visible": true,
                        "cleantrim": true,
                        "joinresult": false,
                        "postprocessing": [],
                        "formatter": []
                    },
                    {
                        "name": "PRICE",
                        "type": "CSS",
                        "selector": ".price_color",
                        "extract": "TEXT",
                        "attribute": "",
                        "from": null,
                        "visible": true,
                        "cleantrim": true,
                        "joinresult": false,
                        "postprocessing": [
                            {
                                "function": "Insert",
                                "parameters": [
                                    {
                                        "name": null,
                                        "value": "http://books.toscrape.com/"
                                    }
                                ]
                            }
                        ],
                        "formatter": []
                    },
                    {
                        "name": "IMAGE",
                        "type": "CSS",
                        "selector": ".thumbnail",
                        "extract": "ATTR",
                        "attribute": "src",
                        "from": null,
                        "visible": true,
                        "cleantrim": true,
                        "joinresult": false,
                        "postprocessing": [
                            {
                                "function": "Insert",
                                "parameters": [
                                    {
                                        "name": "Input",
                                        "value": "http://books.toscrape.com/"
                                    }
                                ]
                            }
                        ],
                        "formatter": []
                    },
                    {
                        "name": "DETAILS_PAGE_URL",
                        "type": "CSS",
                        "selector": ".product_pod h3 a",
                        "extract": "ATTR",
                        "attribute": "href",
                        "from": null,
                        "visible": true,
                        "cleantrim": true,
                        "joinresult": false,
                        "postprocessing": [
                            {
                                "function": "Insert",
                                "parameters": [
                                    {
                                        "name": "Input",
                                        "value": "http://books.toscrape.com/"
                                    }
                                ]
                            }
                        ],
                        "formatter": []
                    }
                ]
            }
        ],
        "engine": {
            "name": "default",
            "loadjavascript": true,
            "loadimages": false,
            "timeout": 30,
            "viewport": {
                "width": 1280,
                "height": 600
            }
        },
        "waitafterpageload": null,
        "login": {
            "enabled": false,
            "type": null,
            "data": []
        },
        "logout": null,
        "pagination": {
            "enabled": true,
            "type": "CLICK",
            "selector": ".next a",
            "maxpages": 50
        },
        "header": {
            "method": "GET",
            "encoding": "utf-8",
            "data": [
                {
                    "key": "Accept",
                    "value": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"
                },
                {
                    "key": "User-Agent",
                    "value": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36"
                },
                {
                    "key": "Accept-Language",
                    "value": "*"
                }
            ]
        },
        "autoredirect": {
            "enabled": true,
            "maxautoredirect": 3
        },
        "failretry": {
            "enabled": true,
            "maxtry": 3,
            "tryinterval": 2,
            "timeout": 0
        },
        "proxy": {
            "enabled": false,
            "type": null,
            "reference": null
        },
        "throttling": {
            "enabled": false,
            "type": null,
            "seconds": 0
        },
        "formsubmit": {
            "enabled": false,
            "data": []
        },
        "profiles": null,
        "meta": null,
        "input": {
            "type": "SOURCE",
            "reference": null
        }
    },
    "agent_id": "gowlyyg3dp",
    "project_id": null,
    "name": "Books scraping agent",
    "description": "This agent will extract the product list, prices, image and detail page hyperlink from books.toscrape.com website",
    "type": "scraping",
    "version": 2,
    "created_at": "2019-03-09T03:01:19",
    "updated_at": "2019-03-09T03:03:07",
    "cron_expression": null,
    "schedule_description": null,
    "next_auto_run_at": null,
    "access_group_id": 0
}

Update a scraping agent

Endpoint:

Method: PUT
URL: https://api.agenty.com/v1/agents/scraping/{{AGENT_ID}}

Headers:

Key Value Description
Content-Type application/json

Query params:

Key Value Description
apikey {{API_KEY}}

Body:

{
  "name": "Books scraping agent",
  "description": "This agent will extract the product list, prices, image and detail page hyperlink from books.toscrape.com website",
  "type": "scraping",
  "config": {
    "sourceurl": "http://books.toscrape.com/",
    "collections": [
      {
        "name": "Collection1",
        "fields": [
          {
            "name": "PRODUCT_NAME",
            "type": "CSS",
            "selector": "h3 a",
            "extract": "TEXT",
            "attribute": null,
            "from": null,
            "visible": true,
            "cleantrim": true,
            "joinresult": false,
            "postprocessing": [],
            "formatter": []
          },
          {
            "name": "PRICE",
            "type": "CSS",
            "selector": ".price_color",
            "extract": "TEXT",
            "attribute": "",
            "from": null,
            "visible": true,
            "cleantrim": true,
            "joinresult": false,
            "postprocessing": [
              {
                "function": "Insert",
                "parameters": [
                  {
                    "name": null,
                    "value": "http://books.toscrape.com/"
                  }
                ]
              }
            ],
            "formatter": []
          },
          {
            "name": "IMAGE",
            "type": "CSS",
            "selector": ".thumbnail",
            "extract": "ATTR",
            "attribute": "src",
            "from": null,
            "visible": true,
            "cleantrim": true,
            "joinresult": false,
            "postprocessing": [
              {
                "function": "Insert",
                "parameters": [
                  {
                    "name": "Input",
                    "value": "http://books.toscrape.com/"
                  }
                ]
              }
            ],
            "formatter": []
          },
          {
            "name": "DETAILS_PAGE_URL",
            "type": "CSS",
            "selector": ".product_pod h3 a",
            "extract": "ATTR",
            "attribute": "href",
            "from": null,
            "visible": true,
            "cleantrim": true,
            "joinresult": false,
            "postprocessing": [
              {
                "function": "Insert",
                "parameters": [
                  {
                    "name": "Input",
                    "value": "http://books.toscrape.com/"
                  }
                ]
              }
            ],
            "formatter": []
          }
        ]
      }
    ],
    "engine": {
      "name": "default",
      "loadjavascript": true,
      "loadimages": false,
      "timeout": 30,
      "viewport": {
        "width": 1280,
        "height": 600
      }
    },
    "waitafterpageload": null,
    "login": {
      "enabled": false,
      "type": null,
      "data": []
    },
    "logout": null,
    "pagination": {
      "enabled": true,
      "type": "CLICK",
      "selector": ".next a",
      "maxpages": 50
    },
    "header": {
      "method": "GET",
      "encoding": "utf-8",
      "data": [
        {
          "key": "Accept",
          "value": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"
        },
        {
          "key": "User-Agent",
          "value": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36"
        },
        {
          "key": "Accept-Language",
          "value": "*"
        }
      ]
    },
    "autoredirect": {
      "enabled": true,
      "maxautoredirect": 3
    },
    "failretry": {
      "enabled": true,
      "maxtry": 3,
      "tryinterval": 2,
      "timeout": 0
    },
    "proxy": {
      "enabled": false,
      "type": null,
      "reference": null
    },
    "throttling": {
      "enabled": false,
      "type": null,
      "seconds": 0
    },
    "formsubmit": {
      "enabled": false,
      "data": []
    },
    "meta": null,
    "input": {
      "type": "SOURCE",
      "reference": null
    }
  }
}

Responses:

Status: OK | Code: 200

{
    "status_code": 200,
    "message": "Agent with id: gowlyyg3dp updated successfully"
}