Reese#

Conversion Example#

Presented at ONICE meeting 2022-04-01. Sample dataset to be uploaded separately

Demonstrates reading trialwise behavior data for odor concentration experiment, including

  1
  2import numpy as np
  3import os
  4from datetime import datetime
  5from pynwb import NWBFile
  6from pynwb import TimeSeries
  7from pynwb.behavior import Position
  8from pynwb import NWBHDF5IO
  9from pynwb.file import Subject
 10
 11data_dir = "./session data/"
 12save_dir = "./NWB1.5.1/"
 13
 14subjects = os.listdir(data_dir)
 15
 16for subject in subjects:
 17     subject_dir = data_dir + subject + "/"
 18     subject_number = subject[-4:]
 19     experiments = os.listdir(subject_dir)
 20
 21     for experiment in experiments:
 22         experiment_dir = subject_dir + experiment + "/"
 23         sessions = os.listdir(experiment_dir)
 24         if experiment == 'ARHMM': #leave out ARHMM data for now
 25             continue
 26
 27         frame_data_exists = False
 28         trial_data_exists = False
 29         sniff_signal_exists = False
 30
 31         for session in sessions:
 32             session_dir = experiment_dir + session + "/"
 33             session_number = session[-2:]
 34             if session_number == 'nt': #skip pre-implant data
 35                 continue
 36
 37             #Neurodata Without Borders file settings
 38             nwb_description = "Experiment type: " + experiment + ", " + subject + ", " + session
 39             print(nwb_description)
 40             nwb_identifier = subject_number + '_' + experiment + '_' + session_number
 41             io = NWBHDF5IO(save_dir + nwb_identifier + '.nwb', mode='w')
 42
 43             dateinfo = "2021-01-01, 01:00:00"
 44             session_date_info = datetime.strptime(dateinfo, "%Y-%m-%d,%H:%M:%S")
 45             if os.path.exists(session_dir + "notes.txt") == True:
 46                 with open(session_dir + "notes.txt") as f:
 47                     notes = f.readlines()
 48                     for line in range(0,len(notes)):
 49                         if 'Date' in notes[line]:
 50                             dateinfo = notes[line]
 51                             dateinfo = dateinfo[6:26]
 52                             if dateinfo[14] == '-':
 53                                 session_date_info = datetime.strptime(dateinfo, "%Y-%m-%d: %H-%M-%S")
 54                             else:
 55                                 session_date_info = datetime.strptime(dateinfo, "%Y-%m-%d, %H:%M:%S")
 56
 57             if os.path.exists(session_dir + "trial_params.txt") == True:
 58                 #sampled by trial
 59                 trial_data_exists = True
 60                 trial_params = np.genfromtxt(session_dir + "trial_params.txt",delimiter = ',',skip_header=0)
 61                 concentration_level = trial_params[:,0]
 62                 stimulus_side = trial_params[:,1]
 63                 chosen_side = trial_params[:,2]
 64                 trial_start = trial_params[:,3]
 65                 trial_end = trial_params[:,4]
 66
 67             if os.path.exists(session_dir + "frame_params_wITI.txt") == True: #sampled at 80 Hz
 68                 frame_data_exists = True
 69                 frame_params = np.genfromtxt(session_dir + "frame_params_wITI.txt",delimiter = ',',skip_header=0)
 70                 nose_x = frame_params[:,0]; nose_y = frame_params[:,1]
 71                 head_x = frame_params[:,2]; head_y = frame_params[:,3]
 72                 body_x = frame_params[:,4]; body_y = frame_params[:,5]
 73                 frame_msec = frame_params[:,7]
 74
 75             if os.path.exists(session_dir + "sniff.bin") == True:
 76                 #sampled at 800 Hz
 77                 sniff_signal_exists = True
 78                 sniff_signal = np.fromfile(session_dir + "sniff.bin",dtype = 'float')
 79
 80             #create a session-specific neurodata without borders file
 81             subject_info = Subject(age=None, description=None,
 82                                    genotype=None, sex=None, species='Mouse', subject_id=subject,
 83                                    weight=None, date_of_birth=None, strain='B6')
 84             nwbfile = NWBFile(session_description=nwb_description,  #required
 85                               identifier=nwb_identifier,  # required
 86                               session_start_time=session_date_info,  #required
 87                               subject = subject_info,
 88                               session_id=session, #optional
 89                               file_create_date= session_date_info)  #optional
 90
 91             if trial_data_exists == True:
 92                 nwbfile.add_trial_column(name='level', description='the level of odor concentration stimulus presented')
 93                 nwbfile.add_trial_column(name='side', description='which side of the assay the correct stimulus is presented on')
 94                 nwbfile.add_trial_column(name='chosen', description='which side of the assay the mouse chose (correct or incorrect)')
 95
 96                 for trial in range(0,len(concentration_level)):
 97                     nwbfile.add_trial(start_time=trial_start[trial],
 98                                       stop_time=trial_end[trial],
 99                                       level=concentration_level[trial],side=stimulus_side[trial],
100                                       chosen=chosen_side[trial])
101
102             if frame_data_exists == True:
103                 tracking = Position() #create a position container for tracking data
104                 tracking.create_spatial_series(name='nose x-position',
105                                                data=nose_x,
106                                                timestamps=frame_msec,
107                                                reference_frame='session start')
108                 tracking.create_spatial_series(name='nose y-position',
109                                                data=nose_y,
110                                                timestamps=frame_msec,
111                                                reference_frame='session start')
112                 tracking.create_spatial_series(name='head x-position',
113                                                data=head_x,
114                                                timestamps=frame_msec,
115                                                reference_frame='session start')
116                 tracking.create_spatial_series(name='head y-position',
117                                                data=head_y,
118                                                timestamps=frame_msec,
119                                                reference_frame='session start')
120                 tracking.create_spatial_series(name='body x-position',
121                                                data=body_x,
122                                                timestamps=frame_msec,
123                                                reference_frame='session start')
124                 tracking.create_spatial_series(name='body y-position',
125                                                data=body_y,
126                                                timestamps=frame_msec,
127                                                reference_frame='session start')
128
129             if sniff_signal_exists == True:
130                 sniff = TimeSeries(name='sniff_signal', data=sniff_signal, unit='V', starting_time=0.0, rate=0.00125)
131                 nwbfile.add_acquisition(sniff)
132
133             io.write(nwbfile)
134             io.close()

Opportunities for Reuse#

Some opportunities to make this code able to be integrated into this package, and opportunities to learn a bit of Python!

Breaking into Functions#

Most analysis code in science is written as scripts — code that is usually written in the “root” of the document, usually not enclosed within functions or classes (or in the case of matlab, a single function per file). Scripts can be a very direct way of approaching a problem, but they can be difficult to reuse and maintain.

One first step in making code a bit more resuable is to break the code into separate functions. There are a lot of ways of thinking about what makes a “good” function, but in my opinion a good function is one that (among other things)

  • Does “one”(ish) thing, or, has a well-defined purpose. Functions should be short – it’s always possible to combine multiple functions in a larger “wrapper” function.

  • Allows alternative behavior with arguments using optional arguments with reasonable defaults. A function should encapsulate a particular operation that you might need to do multiple times, so rather than copying code with minor modifications, a function should offer the caller optionoal arguments that change its behavior. The function should require as few things as it needs to perform the option, so optional arguments should have default values that make the function do what one would expect it to do (ie. what its documentation says it does)

  • Has no side effects, and conversely doesn’t rely on anything outside of the function. A function should never change its surrounding environment (unless that’s specifically what it is for): it shouldn’t change the working directory, implicitly make or load files, etc. It also shouldn’t rely on other variables/objects in the surrounding environment that aren’t explicitly passed in as arguments. Functions with side effects are very brittle, as they need to be carefully orchestrated with other functions, making their functionality linked and thus harder to maintain because any change in one function affects the others. Instead, functions should require what they need with arguments, and return their result. Instead of saving a file at the end of a long analysis script, return the result of the analysis and then use a separate saving function that takes the data and a path to save it.

Among many other considerations…

To start breaking into functions, it’s always a good idea to make a brief outline of what we need our code to do! In this case, we

  • Analyze data from multiple subjects, which have biographical data contained in pynwb.file.Subject object.

  • Each of which has multiple experiments

  • Which in turn have multiple sessions – each of which has its own nwb file.

  • Each session can contain

    • notes.txt, which contained the date of the experiment

    • trial_params.txt, which describes the metadata for each of the sniff trials.

    • sniff.bin, a binary file containing analog sniffing data

  • After loading, the data is then saved using one of several objects or methods described at the top of this page.

That’s a reasonable place to start breaking up the code! We can start by breaking up the nested for loops by putting them in separate functions that look like this…

import os

def convert_session(experiment_dir:str, session:str):
    # ... code to conver session
    pass

def convert_experiment(subject_dir:str, experiment:str):
    experiment_dir = subject_dir + experiment + "/"
    sessions = os.listdir(experiment_dir)
    for session in sessions:
        convert_session(experiment_dir, session)
  
def convert_subject(data_dir: str, subject:str):
    subject_dir = data_dir + subject + "/"
    subject_number = subject[-4:]
    experiments = os.listdir(subject_dir)
    for experiment in experiments:
        if experiment == 'ARHMM':
            continue
        
        convert_experiment(subject_dir, experiment)

but even those are likely to be too “large” — the convert_session function would effectively have the rest of the code in the script! It’s also a good idea for both readability and reusability to encapsulate other separable operations in functions.

So we can also break up each of the three different types of data we have into their own functions. For example, the position data could look like this:

from typing import List, Tuple
import numpy as np
from dataclasses import dataclass

from pynwb.behavior import Position
from pynwb import NWBFile, ProcessingModule

@dataclass
class Frame_Data:
    name: str
    """
    What to call this spatial series
    """
    position: np.ndarray
    """
    Value of the position in (unit)
    """
    timestamps: np.ndarray
    """
    Array of timestamps (ms) for each position
    """
    
    def add_series(self, position:Position, reference_frame:str='session_start') -> Position:
        """
        Method to add this data series to a pynwb Position object.
        """
        position.create_spatial_series(
          name=self.name,
          data=self.position,
          timestamps=self.timestamps,
          reference_frame=reference_frame
        )
        return position

def load_frame_data(data_path:str, delimiter:str = ',', skip_header:int=0) -> List[Frame_Data]:
    """
    Load an individual session's frame data from a file into a list of Frame_Data containers
    """  
    frame_params = np.genfromtxt(
        data_path,
        delimiter = delimiter,
        skip_header=skip_header)
    
    names = ['nose_x', 'nose_y', 'head_x', 'head_y', 'body_x', 'body_y']
    position_series = []
    for i, name in enumerate(names):
        position_series.append(
          Frame_Data(name, frame_params[:,i], frame_params[:,7]))
    
    return position_series

def add_frame_data(nwbfile: NWBFile, frame_data:List[Frame_Data],
                   module_name:str="position", description:str='') -> Tuple[Position, ProcessingModule]:
    """
    Load the data from a given frame data file and add it to an NWB IO file.
    
    Creates a :class:`~pynwb.base.ProcessingModule` 
    
    Args:
        nwbfile: File to add to!
        frame_data (List[:class:`.Frame_Data`]): see :func:`.load_frame_data`
        module_name (str): Name to give to the created ProcessingModule
        description (str): Description to give to the created ProcessingModule
        
    References:
        https://pynwb.readthedocs.io/en/stable/tutorials/general/file.html?highlight=Position#spatial-series-and-position
    """
    position = Position()
    for data_series in frame_data:
        position = data_series.add_series(position)
    
    position_module = nwbfile.create_processing_module(
      name=module_name, description=description
    )

    position_module.add(position)
    
    return position, position_module
                 

Which we would use like::

>>> frame_data = load_frame_data(data_path)
>>> position, position_module = add_frame_data(nwbfile, frame_data)

This is really nice because we have made a relatively general Frame_Data class and add_frame_data that doesn’t depend on the particularity of the data at hand – only the load_frame_data has information that’s unique to us! (names and how to load and index the data frame). So someone else could then extend our functions by adding their own loading function without needing to rewrite the rest!

By writing docstrings as we go (and using types and type hints, which we’ll cover another time!!), we help keep track of what everything does, so this function would have its documentation rendered like:

onice_conversion.add_frame_data(nwbfile: NWBFile, frame_data: Frame_Data, module_name: str, description: str)#

Load the data from a given frame data file and add it to an NWB IO file.

Creates a ProcessingModule

Args:

nwbfile (NWBFile) : File to add to! frame_data (List[Frame_Data]) : see load_frame_data() module_name (str) : Name to give to the created ProcessingModule description (str) : Description to give to the created ProcessingModule

References:

https://pynwb.readthedocs.io/en/stable/tutorials/general/file.html?highlight=Position#spatial-series-and-position

The next thing we would start doing is structuring our code into separate modules: i/o operations, processing operations, and so on, but that’s for another time!

Warnings#

todo

(warn about using default values)

import os
from datetime import datetime
import warnings

dateinfo = "2021-01-01, 01:00:00"
session_date_info = datetime.strptime(dateinfo, "%Y-%m-%d,%H:%M:%S")
if os.path.exists(session_dir + "notes.txt") == True:
    # get date from file
    pass
else:
    warnings.warn(f'Couldnt get data from notes.txt, using default date {dateinfo}')

pathlib!#

todo

show use of pathlib vs os.path

from pathlib import Path

# for example
data_dir = Path().home() / 'my_data'
text_files = data_dir.glob('**/*.txt')
data_dir.exists()
my_file = data_dir / 'myfile.txt'
my_file2 = my_file.with_stem(my_file.stem + "_two")
# myfile_two.txt