Use Public Resources to answer a biological question

public 1yr ago Version: Version 1 0 bookmarks

View Workflow

use-public-resources-to-answer-a-biological-questi — View Workflow

Help improve this workflow!

This workflow has been published but could be further improved with some additional meta data:

Lack of a description for a new keyword tool/pypi/skimage .

You can help improve this workflow by suggesting the addition or removal of keywords, suggest changes and report issues, or request to become a maintainer of the Workflow .

Learning Objectives

How to access genomic resource via its Python API
How to access image resource via its Python API
Relate image data to genomic data

Diabetes related genes expressed in pancreas

This notebook looks at the question Which diabetes related genes are expressed in the pancreas? Tissue and disease can be modified.

Steps:

Query humanmine.org , an integrated database of Homo sapiens genomic data using the intermine API to find the genes.
Using the list of found genes, search in the Image Data Resource (IDR) for images linked to the genes, tissue and disease.
Analyse the images found.

Launch

This notebook uses the environment.yml file.

See Setup .

Code Snippets

# Package required to interact with HumanMine
%pip install git+https://github.com/jburel/intermine-ws-python.git@python_3_10

# Package required to interact with IDR or OMERO
%pip install omero-py

Jupyter Notebook From line 2 of Day_4/PublicResources.ipynb

# libraries to interact with intermine
from intermine.webservice import Service

# libraries to interact with IDR
import requests
import json

Jupyter Notebook JSON requests InterMine From line 10 of Day_4/PublicResources.ipynb

19	service = Service("https://www.humanmine.org/humanmine/service")

Jupyter Notebook From line 19 of Day_4/PublicResources.ipynb

23	query = service.new_query("Gene")

Jupyter Notebook From line 23 of Day_4/PublicResources.ipynb

query.add_view(
    "primaryIdentifier", "symbol", "proteinAtlasExpression.cellType",
    "proteinAtlasExpression.level", "proteinAtlasExpression.reliability",
    "proteinAtlasExpression.tissue.name"
)

Jupyter Notebook From line 27 of Day_4/PublicResources.ipynb

TISSUE = "Pancreas"
DISEASE = "diabetes"

Jupyter Notebook From line 35 of Day_4/PublicResources.ipynb

query.add_constraint("proteinAtlasExpression.tissue.name", "=", TISSUE)
query.add_constraint("proteinAtlasExpression.level", "ONE OF", ["Medium", "High"])
query.add_constraint("organism.name", "=", "Homo sapiens")
query.add_constraint("diseases.name", "CONTAINS", DISEASE)

Jupyter Notebook From line 40 of Day_4/PublicResources.ipynb

upin_tissue = set()
for row in query.rows():
    upin_tissue.add(row["symbol"])
genes = sorted(upin_tissue, reverse=True)

Jupyter Notebook From line 47 of Day_4/PublicResources.ipynb

for i, a in enumerate(genes):
    print(a, end=' ')
    if i % 8 == 7: 
        print("")

Jupyter Notebook From line 54 of Day_4/PublicResources.ipynb

INDEX_PAGE = "https://idr.openmicroscopy.org/webclient/?experimenter=-1"

# create http session
with requests.Session() as session:
    request = requests.Request('GET', INDEX_PAGE)
    prepped = session.prepare_request(request)
    response = session.send(prepped)
    if response.status_code != 200:
        response.raise_for_status()

Jupyter Notebook From line 61 of Day_4/PublicResources.ipynb

SEARCH_URL = "https://idr.openmicroscopy.org/searchengine/api/v1/resources/{type}/search/"
KEY_VALUE_SEARCH = SEARCH_URL + "?key={key}&value={value}"
KEY = "Gene Symbol"

Jupyter Notebook From line 73 of Day_4/PublicResources.ipynb

%%time
import collections
from collections import defaultdict

results = {}
for gene in genes:
    qs1 = {'type': 'image', 'key': KEY, 'value': gene}
    url = KEY_VALUE_SEARCH.format(**qs1)
    json = session.get(url).json()
    images = json['results']['results']
    results[gene] = images

Jupyter Notebook JSON From line 79 of Day_4/PublicResources.ipynb

# Annotation key in IDR to find and filter by.
EXPRESSION_KEY = "Expression Pattern Description"
EXPRESSION = "Islets"
STAGE = "Developmental Stage"

Jupyter Notebook From line 93 of Day_4/PublicResources.ipynb

development_stage = {}
for k in results:
    images = results[k]
    result_images = defaultdict(list)
    for image in images:
        values = image["key_values"]
        stage = ""
        for v in values:
            name = v["name"]
            value = v['value']
            if name == STAGE:
                stage = value
            if name == EXPRESSION_KEY and EXPRESSION in value:
                result_images[stage].append(image["id"])
    development_stage[k] = result_images.items()

Jupyter Notebook From line 100 of Day_4/PublicResources.ipynb

118	print(development_stage)

Jupyter Notebook From line 118 of Day_4/PublicResources.ipynb

# URLs to retrieve the thumbnails and link to the images in IDR
BASE_URL = "https://idr.openmicroscopy.org/webclient"
IMAGE_DATA_URL = BASE_URL + "/render_thumbnail/{id}"
LINK_URL = BASE_URL + "/?show=image-{id}"

Jupyter Notebook From line 122 of Day_4/PublicResources.ipynb

# Display the images
from ipywidgets import AppLayout, widgets

table_widget = widgets.HTML("")

html = "<table>"
for gene in development_stage:
    images = development_stage[gene]
    if len(images) > 0:
        html += '<tr><td><h2>Gene: '+gene+'</h2></td></tr><tr>'
        for k, v in images:
            html += '<tr><td><h4>Developmental stage: '+k+'</h4></td></tr><tr>'
            for i in v:
                qs = {'id': i}
                url = IMAGE_DATA_URL.format(**qs)
                url_link = LINK_URL.format(**qs)
                html += '<td><a href="'+url_link+'" target="_blank"><img src="'+url+'"/></a></td>'
            html += "</tr>"
        html += "</tr>"
html += "</table>"

table_widget.value = html
AppLayout(header=None,
          left_sidebar=None,
          center=table_widget,
          right_sidebar=None,
          footer=None)

Jupyter Notebook ipywidgets From line 129 of Day_4/PublicResources.ipynb

PART_KEY = "Organism Part"
PATHOLOGY_KEY = "Pathology"
PATHOLOGY_NORMAL_VALUE = "Normal"

Jupyter Notebook From line 159 of Day_4/PublicResources.ipynb

pathology_images = {}
for k in results:
    images = results[k]
    result_images = defaultdict(list)
    for image in images:
        values = image["key_values"]
        part = None
        for v in values:
            name = v["name"]
            value = v['value']
            if name is not None and PART_KEY in name and (TISSUE or EXPRESSION in value):
                part = value
        for v in values:
            name = v["name"]
            value = v['value']
            if part is not None and name == PATHOLOGY_KEY:
                if PATHOLOGY_NORMAL_VALUE in value:
                    result_images[PATHOLOGY_NORMAL_VALUE].append(image["id"])
                else:
                    result_images[value].append(image["id"])
    pathology_images[k] = result_images.items()

Jupyter Notebook From line 165 of Day_4/PublicResources.ipynb

import pandas as pd
import matplotlib.pyplot as plt
disease_map = {}
gene = "PDX1"
images = pathology_images[gene]
if len(images) == 0:
    print("No images found")
else:
    for k, v in images:
        if k != PATHOLOGY_NORMAL_VALUE:
            disease_map[k] = len(v)

    disease_ordered = collections.OrderedDict(sorted(disease_map.items()))
    df = pd.DataFrame({'disease':disease_ordered.items(),
                       'number of images':disease_ordered.values()})
    df.plot(kind='barh', x='disease', y='number of images', figsize=(10,10))

Jupyter Notebook Pandas matplotlib From line 189 of Day_4/PublicResources.ipynb

from ipywidgets import GridspecLayout, widgets

increase = 8
max_value = increase
min_value = 0

disease = ""

def display_images(images, min, max):
    html = "<table>"
    html += '<tr>'
    if min < 0:
        min = 0
    if max >= len(images):
        max = len(images)


    for i in images[min:max]:
        qs = {'id': i}
        url = IMAGE_DATA_URL.format(**qs)
        url_link = LINK_URL.format(**qs)
        html += '<td><a href="'+url_link+'" target="_blank"><img src="'+url+'"/></a>&nbsp;</td>'
    html += "</tr>"
    html += "</table>"
    html_widget.value = html

    # Set the number of images found
    count_widget.value = "<b>Number of images found: " + str(len(images)) + "</b>"

def on_selection_change(change):
    global disease
    if change['name'] == 'value':
        values = get_images(change['new']) 
        if values is None:
            return
        disease = change['new']
        min_value = 0
        max_value = increase 
        display_images(values, min_value, max_value)

def get_images(disease):
    for k, v in images:
        if k == disease:
            return v
    return None

def on_click_next(b):
    global min_value
    global max_value
    max_value = max_value + increase
    min_value = min_value + increase
    values = get_images(disease)
    button_previous.disabled = False
    if values is None:
        return
    if max_value > len(values):
        button_next.disabled = True

    display_images(values, min_value, max_value)

def on_click_previous(b):
    global min_value
    global max_value
    max_value = max_value - increase
    min_value = min_value - increase
    button_next.disabled = False
    if min_value <= 0:  # reset 
        min_value = 0
        max_value = increase
        button_previous.disabled = True
    values = get_images(disease)
    if values is not None:
        display_images(values, min_value, max_value)

def dropdown_widget(disease_list,
                    dropdown_widget_name,
                    displaywidget=False):

    selection = widgets.Dropdown(
        options=disease_list,
        value=disease_list[0],
        description=dropdown_widget_name,
        disabled=False,
    )
    selection.observe(on_selection_change)
    display_images(get_images(selection.value), min_value, max_value)
    return selection

disease_list = list(disease_ordered.keys())
disease = disease_list[0]
gene_widget = widgets.HTML("")
count_widget = widgets.HTML("")
html_widget = widgets.HTML("")
disease_box = dropdown_widget(
    disease_list,
    'Disease: ', True
)

button_next = widgets.Button(description="Next>>")
button_next.on_click(on_click_next)

button_previous = widgets.Button(description="<<Previous", disabled=True)
button_previous.on_click(on_click_previous)

gene_widget.value = "Gene: <b>" + gene + "</b>"

grid = GridspecLayout(3, 3)
grid[0, 0] = gene_widget
grid[0, 1] = disease_box
grid[0, 2] = count_widget
grid[2, 0] = button_previous
grid[1, :] = html_widget
grid[2, 2] = button_next
grid

Jupyter Notebook ipywidgets From line 208 of Day_4/PublicResources.ipynb

BASE_SEARCH_URL = "https://idr.openmicroscopy.org/searchengine/api/v1/"
IMAGE_SEARCH = "/resources/image/searchannotation/"
IMAGE_SEARCH_PAGE = "/resources/image/searchannotation_page/"

Jupyter Notebook From line 325 of Day_4/PublicResources.ipynb

'''
**Query 1**
Organism Part           Small intestine OR Duodenum
Pathology               Adenocarcinoma (all) ==> contains (adenocarcinoma)
Gene Symbol             PDX1

'''
query_1 = {
    "query_details": {
        "and_filters": [
            {
                "name": "Gene Symbol",
                "value": "PDX1",
                "operator": "equals",
                "resource": "image"
            },
            {
                "name": "Pathology",
                "value": "adenocarcinoma",
                "operator": "contains",
                "resource": "image"
            }
        ],
        "or_filters": [[
            {
                "name": "Organism Part",
                "value": "Duodenum",
                "operator": "equals",
                "resource": "image"
            },
            {
                "name": "Organism Part",
                "value": "Small intestine",
                "operator": "equals",
                "resource": "image"
            }

        ]

        ],
        "case_sensitive": False
    }
}

Jupyter Notebook From line 331 of Day_4/PublicResources.ipynb

'''
**Query 2**
Organism Part           Small intestine OR Duodenum
Pathology               normal nos ==> normal tissue, nos
Gene Symbol             PDX1
'''
query_2 = {
    "query_details": {
        "and_filters": [
            {
                "name": "Gene Symbol",
                "value": "PDX1",
                "operator": "equals",
                "resource": "image"
            },
            {
                "name": "Pathology",
                "value": "normal tissue, nos",
                "operator": "equals",
                "resource": "image"
            }
        ],
        "or_filters": [[
            {
                "name": "Organism Part",
                "value": "Duodenum",
                "operator": "equals",
                "resource": "image"
            },
            {
                "name": "Organism Part",
                "value": "Small intestine",
                "operator": "equals",
                "resource": "image"
            }

        ]

        ],
        "case_sensitive": False
    }
}

Jupyter Notebook From line 377 of Day_4/PublicResources.ipynb

import json
def query_the_search_ending(query):
    received_results_data = []
    query_data = {"query": query}
    resp = requests.post(
        url="%s%s" % (BASE_SEARCH_URL, IMAGE_SEARCH), data=json.dumps(query_data)
    )
    res = resp.text
    try:
        returned_results = json.loads(res)
    except Exception:
        return []
    if not returned_results.get("results") or len(returned_results["results"]) == 0:
        print("Your query returns no results")
        return []
    total_results = returned_results["results"]["size"]
    print("Total no of result records %s" % total_results)
    for res in returned_results["results"]["results"]:
        received_results_data.append(res)

    received_results = len(returned_results["results"]["results"])
    bookmark = returned_results["results"]["bookmark"]
    total_pages = returned_results["results"]["total_pages"]
    page = 1
    while received_results < total_results:
        page += 1
        query_data = {
            "query": {"query_details": returned_results["query_details"]},
            "bookmark": bookmark,
        }
        query_data_json = json.dumps(query_data)
        resp = requests.post(
            url="%s%s" % (BASE_URL, IMAGE_SEARCH_PAGE), data=query_data_json
        )
        res = resp.text
        try:
            returned_results = json.loads(res)
        except Exception as e:
            return
        received_results = received_results + len(returned_results["results"]["results"])
        for res in returned_results["results"]["results"]:
            received_results_data.append(res)
        bookmark = returned_results["results"]["bookmark"]
    return received_results_data

Jupyter Notebook JSON From line 422 of Day_4/PublicResources.ipynb

469	results_1 = query_the_search_ending(query_1)

Jupyter Notebook From line 469 of Day_4/PublicResources.ipynb

473	results_2 = query_the_search_ending(query_2)

Jupyter Notebook From line 473 of Day_4/PublicResources.ipynb

html = "<table>"
for r in results_2:
    id = r["id"]
    qs = {'id': id}
    url = IMAGE_DATA_URL.format(**qs)
    url_link = LINK_URL.format(**qs)
    html += '<tr><td><b>Image ID: '+str(id)+'</b></td></tr><td><a href="'+url_link+'" target="_blank"><img src="'+url+'"/></a>&nbsp;</td>'
html += "</table>"
table_widget = widgets.HTML("")
table_widget.value = html
AppLayout(header=None, left_sidebar=None,
              center=table_widget,
              right_sidebar=None,
              footer=None)

Jupyter Notebook From line 477 of Day_4/PublicResources.ipynb

494	image_id = 4387380

Jupyter Notebook From line 494 of Day_4/PublicResources.ipynb

from omero.gateway import BlitzGateway
HOST = 'ws://idr.openmicroscopy.org/omero-ws'
conn = BlitzGateway('public', 'public',
                    host=HOST, secure=True)
print(conn.connect())
conn.c.enableKeepAlive(60)

Jupyter Notebook omero-py From line 498 of Day_4/PublicResources.ipynb

image = conn.getObject("Image", image_id)
print(image.getName())

Jupyter Notebook From line 507 of Day_4/PublicResources.ipynb

import numpy
def load_numpy_array(image):
    pixels = image.getPrimaryPixels()
    size_c = image.getSizeC()
    size_x = image.getSizeX()
    size_y = image.getSizeY()
    z, t = 0, 0  # first plane of the image

    c_list = []
    for c in range(size_c):  # all channels
        c_list.append((z, c, t))

    values = []
    # Load all the planes as YX numpy array
    planes = pixels.getPlanes(c_list)
    print("Downloading image %s" % image.getName())
    all_planes = numpy.dstack(list(planes))
    return all_planes

Jupyter Notebook numpy From line 512 of Day_4/PublicResources.ipynb

data = load_numpy_array(image)

Jupyter Notebook From line 533 of Day_4/PublicResources.ipynb

def disconnect(conn):
    """
    Disconnect from an OMERO server
    :param conn: The BlitzGateway
    """
    conn.close()

disconnect(conn)

Jupyter Notebook From line 537 of Day_4/PublicResources.ipynb

from skimage.color import rgb2hed
# Convert the image to HED using the pre-built skimage method
ihc_hed = rgb2hed(data)

Jupyter Notebook skimage From line 548 of Day_4/PublicResources.ipynb

# Create an RGB image for each of the stains
from skimage.color import hed2rgb
null = numpy.zeros_like(ihc_hed[:, :, 0],)
ihc_h = hed2rgb(numpy.stack((ihc_hed[:, :, 0], null, null), axis=-1))
ihc_e = hed2rgb(numpy.stack((null, ihc_hed[:, :, 1], null), axis=-1))
ihc_d = hed2rgb(numpy.stack((null, null, ihc_hed[:, :, 2]), axis=-1))

# Display
fig, axes = plt.subplots(2, 2, figsize=(7, 6), sharex=True, sharey=True)
ax = axes.ravel()

ax[0].imshow(data)
ax[0].set_title("Original image")

ax[1].imshow(ihc_h)
ax[1].set_title("Hematoxylin")

ax[2].imshow(ihc_e)
ax[2].set_title("Eosin")  # Note that there is no Eosin stain in this image

ax[3].imshow(ihc_d)
ax[3].set_title("DAB")

for a in ax.ravel():
    a.axis('off')

fig.tight_layout()

Jupyter Notebook skimage From line 554 of Day_4/PublicResources.ipynb

from skimage.exposure import rescale_intensity

# Rescale hematoxylin and DAB channels and give them a fluorescence look

h = rescale_intensity(ihc_hed[:, :, 0], out_range=(0, 1),
                      in_range=(0, numpy.percentile(ihc_hed[:, :, 0], 99)))
d = rescale_intensity(ihc_hed[:, :, 2], out_range=(0, 1),
                      in_range=(0, numpy.percentile(ihc_hed[:, :, 2], 99)))

# Cast the two channels into an RGB image, as the blue and green channels
# respectively
zdh = numpy.dstack((null, d, h))

fig = plt.figure()
axis = plt.subplot(1, 1, 1, sharex=ax[0], sharey=ax[0])
axis.imshow(zdh)
axis.set_title('Stain-separated image (rescaled)')
axis.axis('off')
plt.show()

Jupyter Notebook skimage From line 584 of Day_4/PublicResources.ipynb

def image_show(image, nrows=1, ncols=1, cmap='gray', **kwargs):
    fig, ax = plt.subplots(nrows=nrows, ncols=ncols, figsize=(16, 16))
    ax.imshow(image, cmap='gray')
    ax.axis('off')
    return fig, ax

Jupyter Notebook From line 606 of Day_4/PublicResources.ipynb

from skimage import data
from skimage import filters
from skimage.color import rgb2gray
import matplotlib.pyplot as plt

# Setting plot size to 15, 15
plt.figure(figsize=(15, 15))

gray_ihc_d = rgb2gray(ihc_d)

# Computing Otsu's thresholding value
threshold = filters.threshold_otsu(gray_ihc_d)

# Computing binarized values using the obtained
# threshold
binarized_ihc_d = (gray_ihc_d > threshold)*1
plt.subplot(2,2,1)
plt.title("Threshold: >"+str(threshold))

# Displaying the binarized image
plt.imshow(binarized_ihc_d, cmap = "gray")

# Computing Ni black's local pixel
# threshold values for every pixel
threshold = filters.threshold_niblack(gray_ihc_d)

# Computing binarized values using the obtained
# threshold
binarized_ihc_d = (gray_ihc_d > threshold)*1
plt.subplot(2,2,2)
plt.title("Niblack Thresholding")

# Displaying the binarized image
plt.imshow(binarized_ihc_d, cmap = "gray")

# Computing Sauvola's local pixel threshold
# values for every pixel - Not Binarized
threshold = filters.threshold_sauvola(gray_ihc_d)
plt.subplot(2,2,3)
plt.title("Sauvola Thresholding")

# Displaying the local threshold values
plt.imshow(threshold, cmap = "gray")

# Computing Sauvola's local pixel
# threshold values for every pixel - Binarized
binarized_ihc_d = (gray_ihc_d > threshold)*1
plt.subplot(2,2,4)
plt.title("Sauvola Thresholding - Converting to 0's and 1's")

# Displaying the binarized image
plt.imshow(binarized_ihc_d, cmap = "gray")