Skip to content

Commit

Permalink
Add slugs to org backend (webrecorder#1250)
Browse files Browse the repository at this point in the history
- Add slug field with uniqueness constraint to Organization
- Use python-slugify to generate slug from name and import that in migration
- Require name in all /rename and org creation requests
- Auto-generate slug for new org with no slug or when /rename is called w/o a slug
- Auto-generate slug for 'default-org' based on name

- Add /api/orgs/slugs GET endpoint to return all slugs in use

- tests: extend backend test-requirements.txt from requirements to allow testing slugify
- tests: move get_redis_crawl_stats() to avoid extra dependency in utils
  • Loading branch information
tw4l authored Oct 11, 2023
1 parent 16e7a1d commit 266afdf
Show file tree
Hide file tree
Showing 14 changed files with 138 additions and 29 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/k3d-ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ jobs:
python-version: '3.9'

- name: Install Python Libs
run: pip install pytest requests
run: pip install -r ./backend/test-requirements.txt

- name: Wait for all pods to be ready
run: kubectl wait --for=condition=ready pod --all --timeout=240s
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/k3d-nightly-ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ jobs:
python-version: '3.9'

- name: Install Python Libs
run: pip install pytest requests
run: pip install -r ./backend/test-requirements.txt

- name: Wait for all pods to be ready
run: kubectl wait --for=condition=ready pod --all --timeout=240s
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/microk8s-ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ jobs:
python-version: '3.9'

- name: Install Python Libs
run: pip install pytest requests
run: pip install -r ./backend/test-requirements.txt

- name: Wait for all pods to be ready
run: sudo microk8s kubectl wait --for=condition=ready pod --all --timeout=240s
Expand Down
2 changes: 1 addition & 1 deletion backend/btrixcloud/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from .migrations import BaseMigration


CURR_DB_VERSION = "0018"
CURR_DB_VERSION = "0019"


# ============================================================================
Expand Down
33 changes: 33 additions & 0 deletions backend/btrixcloud/migrations/migration_0019_org_slug.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
"""
Migration 0019 - Organization slug
"""
from btrixcloud.migrations import BaseMigration
from btrixcloud.utils import slug_from_name


MIGRATION_VERSION = "0019"


class Migration(BaseMigration):
"""Migration class."""

def __init__(self, mdb, migration_version=MIGRATION_VERSION):
super().__init__(mdb, migration_version)

async def migrate_up(self):
"""Perform migration up.
Add slug to all existing orgs.
"""
# pylint: disable=duplicate-code
mdb_orgs = self.mdb["organizations"]
async for org in mdb_orgs.find({"slug": {"$eq": None}}):
oid = org["_id"]
slug = slug_from_name(org["name"])
try:
await mdb_orgs.find_one_and_update(
{"_id": oid}, {"$set": {"slug": slug}}
)
# pylint: disable=broad-exception-caught
except Exception as err:
print(f"Error adding slug to org {oid}: {err}", flush=True)
3 changes: 3 additions & 0 deletions backend/btrixcloud/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -612,6 +612,7 @@ class RenameOrg(BaseModel):
"""Request to invite another user"""

name: str
slug: Optional[str] = None


# ============================================================================
Expand Down Expand Up @@ -664,6 +665,7 @@ class Organization(BaseMongoModel):
id: UUID4

name: str
slug: str

users: Dict[str, UserRole]

Expand Down Expand Up @@ -751,6 +753,7 @@ class OrgOut(BaseMongoModel):

id: UUID4
name: str
slug: str
users: Optional[Dict[str, Any]]
usage: Optional[Dict[str, int]]
crawlExecSeconds: Optional[Dict[str, int]]
Expand Down
20 changes: 18 additions & 2 deletions backend/btrixcloud/operator.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,12 @@
from pydantic import BaseModel, Field

from kubernetes.utils import parse_quantity
from redis import asyncio as exceptions

from .utils import (
from_k8s_date,
to_k8s_date,
dt_now,
get_redis_crawl_stats,
)
from .k8sapi import K8sAPI

Expand Down Expand Up @@ -1075,10 +1075,26 @@ def is_crawl_stopping(self, crawl, size):

return False

async def get_redis_crawl_stats(self, redis, crawl_id):
"""get page stats"""
try:
# crawler >0.9.0, done key is a value
pages_done = int(await redis.get(f"{crawl_id}:d") or 0)
except exceptions.ResponseError:
# crawler <=0.9.0, done key is a list
pages_done = await redis.llen(f"{crawl_id}:d")

pages_found = await redis.scard(f"{crawl_id}:s")
sizes = await redis.hgetall(f"{crawl_id}:size")
archive_size = sum(int(x) for x in sizes.values())

stats = {"found": pages_found, "done": pages_done, "size": archive_size}
return stats, sizes

async def update_crawl_state(self, redis, crawl, status, pods, done):
"""update crawl state and check if crawl is now done"""
results = await redis.hgetall(f"{crawl.id}:status")
stats, sizes = await get_redis_crawl_stats(redis, crawl.id)
stats, sizes = await self.get_redis_crawl_stats(redis, crawl.id)

# need to add size of previously completed WACZ files as well!
stats["size"] += status.filesAddedSize
Expand Down
27 changes: 26 additions & 1 deletion backend/btrixcloud/orgs.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
PaginatedResponse,
)
from .pagination import DEFAULT_PAGE_SIZE, paginated_format
from .utils import slug_from_name


DEFAULT_ORG = os.environ.get("DEFAULT_ORG", "My Organization")
Expand Down Expand Up @@ -61,7 +62,8 @@ async def init_index(self):
"""init lookup index"""
while True:
try:
return await self.orgs.create_index("name", unique=True)
await self.orgs.create_index("name", unique=True)
return await self.orgs.create_index("slug", unique=True)
# pylint: disable=duplicate-code
except AutoReconnect:
print(
Expand Down Expand Up @@ -92,6 +94,7 @@ async def create_new_org_for_user(
org = Organization(
id=id_,
name=org_name,
slug=slug_from_name(org_name),
users={str(user.id): UserRole.OWNER},
storage=DefaultStorage(name=storage_name, path=storage_path),
)
Expand Down Expand Up @@ -162,6 +165,7 @@ async def create_default_org(self, storage_name="default"):
print("Default organization already exists - skipping", flush=True)
else:
default_org.name = DEFAULT_ORG
default_org.slug = slug_from_name(DEFAULT_ORG)
await self.update(default_org)
print(f'Default organization renamed to "{DEFAULT_ORG}"', flush=True)
return
Expand All @@ -171,6 +175,7 @@ async def create_default_org(self, storage_name="default"):
org = Organization(
id=id_,
name=DEFAULT_ORG,
slug=slug_from_name(DEFAULT_ORG),
users={},
storage=DefaultStorage(name=storage_name, path=storage_path),
default=True,
Expand Down Expand Up @@ -392,6 +397,11 @@ async def get_org_metrics(self, org: Organization):
"publicCollectionsCount": public_collections_count,
}

async def get_all_org_slugs(self):
"""Return list of all org slugs."""
slugs = await self.orgs.distinct("slug", {})
return {"slugs": slugs}


# ============================================================================
# pylint: disable=too-many-statements
Expand Down Expand Up @@ -479,9 +489,15 @@ async def create_org(

id_ = uuid.uuid4()
storage_path = str(id_) + "/"

slug = new_org.slug
if not slug:
slug = slug_from_name(new_org.name)

org = Organization(
id=id_,
name=new_org.name,
slug=slug,
users={},
storage=DefaultStorage(name="default", path=storage_path),
)
Expand All @@ -502,6 +518,11 @@ async def rename_org(
org: Organization = Depends(org_owner_dep),
):
org.name = rename.name
if rename.slug:
org.slug = rename.slug
else:
org.slug = slug_from_name(rename.name)

try:
await ops.update(org)
except DuplicateKeyError:
Expand Down Expand Up @@ -649,4 +670,8 @@ async def add_new_user_to_org(
async def get_org_metrics(org: Organization = Depends(org_dep)):
return await ops.get_org_metrics(org)

@app.get("/orgs/slugs", tags=["organizations"])
async def get_all_org_slugs():
return await ops.get_all_org_slugs()

return ops
24 changes: 6 additions & 18 deletions backend/btrixcloud/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

from datetime import datetime

from redis import asyncio as exceptions
from slugify import slugify


def get_templates_dir():
Expand Down Expand Up @@ -38,23 +38,6 @@ def ts_now():
return str(dt_now())


async def get_redis_crawl_stats(redis, crawl_id):
"""get page stats"""
try:
# crawler >0.9.0, done key is a value
pages_done = int(await redis.get(f"{crawl_id}:d") or 0)
except exceptions.ResponseError:
# crawler <=0.9.0, done key is a list
pages_done = await redis.llen(f"{crawl_id}:d")

pages_found = await redis.scard(f"{crawl_id}:s")
sizes = await redis.hgetall(f"{crawl_id}:size")
archive_size = sum(int(x) for x in sizes.values())

stats = {"found": pages_found, "done": pages_done, "size": archive_size}
return stats, sizes


def run_once_lock(name):
"""run once lock via temp directory
- if dir doesn't exist, return true
Expand Down Expand Up @@ -109,3 +92,8 @@ def is_bool(stri: Optional[str]) -> bool:
if stri:
return stri.lower() in ("true", "1", "yes")
return False


def slug_from_name(name: str) -> str:
"""Generate slug from name"""
return slugify(name.replace("'", ""))
1 change: 1 addition & 0 deletions backend/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,4 @@ pathvalidate
https://github.com/ikreymer/stream-zip/archive/refs/heads/stream-uncompress.zip
boto3
backoff>=2.2.1
python-slugify>=8.0.1
4 changes: 4 additions & 0 deletions backend/test-requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
-r requirements.txt

pytest
requests
2 changes: 1 addition & 1 deletion backend/test/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ def non_default_org_id(admin_auth_headers):
r = requests.post(
f"{API_PREFIX}/orgs/create",
headers=admin_auth_headers,
json={"name": NON_DEFAULT_ORG_NAME},
json={"name": NON_DEFAULT_ORG_NAME, "slug": "non-default-org"},
)
assert r.status_code == 200

Expand Down
26 changes: 23 additions & 3 deletions backend/test/test_org.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,8 @@ def test_get_org_crawler(crawler_auth_headers, default_org_id):

def test_rename_org(admin_auth_headers, default_org_id):
UPDATED_NAME = "updated org name"
rename_data = {"name": UPDATED_NAME}
UPDATED_SLUG = "updated-org-name"
rename_data = {"name": UPDATED_NAME, "slug": UPDATED_SLUG}
r = requests.post(
f"{API_PREFIX}/orgs/{default_org_id}/rename",
headers=admin_auth_headers,
Expand All @@ -61,19 +62,20 @@ def test_rename_org(admin_auth_headers, default_org_id):
data = r.json()
assert data["updated"]

# Verify that name is now updated.
# Verify that name and slug are now updated.
r = requests.get(f"{API_PREFIX}/orgs/{default_org_id}", headers=admin_auth_headers)
assert r.status_code == 200
data = r.json()
assert data["name"] == UPDATED_NAME
assert data["slug"] == UPDATED_SLUG


def test_create_org(admin_auth_headers):
NEW_ORG_NAME = "New Org"
r = requests.post(
f"{API_PREFIX}/orgs/create",
headers=admin_auth_headers,
json={"name": NEW_ORG_NAME},
json={"name": NEW_ORG_NAME, "slug": "new-org"},
)

assert r.status_code == 200
Expand Down Expand Up @@ -389,3 +391,21 @@ def test_org_metrics(crawler_auth_headers, default_org_id):
assert data["workflowsQueuedCount"] >= 0
assert data["collectionsCount"] > 0
assert data["publicCollectionsCount"] >= 0


def test_get_org_slugs(admin_auth_headers):
# Fetch org count and slugs from /orgs
r = requests.get(f"{API_PREFIX}/orgs", headers=admin_auth_headers)
assert r.status_code == 200
data = r.json()
org_count = data["total"]
org_slugs = [item["slug"] for item in data["items"]]

# Fetch slugs from /orgs/slugs and verify data looks right
r = requests.get(f"{API_PREFIX}/orgs/slugs", headers=admin_auth_headers)
assert r.status_code == 200
slugs = r.json()["slugs"]

assert len(slugs) == org_count
for slug in slugs:
assert slug in org_slugs
19 changes: 19 additions & 0 deletions backend/test/test_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
"""utils tests"""
import pytest

from btrixcloud.utils import slug_from_name


@pytest.mark.parametrize(
"name,expected_slug",
[
("Default org", "default-org"),
("User's org", "users-org"),
("User's @ org", "users-org"),
("Org with åccénted charactêrs", "org-with-accented-characters"),
("Org with åccénted! charactêrs@!", "org-with-accented-characters"),
("cATs! 🐈🐈‍⬛", "cats"),
],
)
def test_slug_from_name(name: str, expected_slug: str):
assert slug_from_name(name) == expected_slug

0 comments on commit 266afdf

Please sign in to comment.