Reese#
Conversion Example#
Presented at ONICE meeting 2022-04-01. Sample dataset to be uploaded separately
Demonstrates reading trialwise behavior data for odor concentration experiment, including
Making an
pynwb.NWBFile
with apynwb.file.Subject
descriptionTrialwise data using
pynwb.file.NWBFile.add_trial()
Spatial data from pose tracking using
pynwb.behavior.Position
Generic Timeseries data for analog sniff signal using
pynwb.base.TimeSeries
Writing an NWBFile with
pynwb.NWBHDF5IO
1
2import numpy as np
3import os
4from datetime import datetime
5from pynwb import NWBFile
6from pynwb import TimeSeries
7from pynwb.behavior import Position
8from pynwb import NWBHDF5IO
9from pynwb.file import Subject
10
11data_dir = "./session data/"
12save_dir = "./NWB1.5.1/"
13
14subjects = os.listdir(data_dir)
15
16for subject in subjects:
17 subject_dir = data_dir + subject + "/"
18 subject_number = subject[-4:]
19 experiments = os.listdir(subject_dir)
20
21 for experiment in experiments:
22 experiment_dir = subject_dir + experiment + "/"
23 sessions = os.listdir(experiment_dir)
24 if experiment == 'ARHMM': #leave out ARHMM data for now
25 continue
26
27 frame_data_exists = False
28 trial_data_exists = False
29 sniff_signal_exists = False
30
31 for session in sessions:
32 session_dir = experiment_dir + session + "/"
33 session_number = session[-2:]
34 if session_number == 'nt': #skip pre-implant data
35 continue
36
37 #Neurodata Without Borders file settings
38 nwb_description = "Experiment type: " + experiment + ", " + subject + ", " + session
39 print(nwb_description)
40 nwb_identifier = subject_number + '_' + experiment + '_' + session_number
41 io = NWBHDF5IO(save_dir + nwb_identifier + '.nwb', mode='w')
42
43 dateinfo = "2021-01-01, 01:00:00"
44 session_date_info = datetime.strptime(dateinfo, "%Y-%m-%d,%H:%M:%S")
45 if os.path.exists(session_dir + "notes.txt") == True:
46 with open(session_dir + "notes.txt") as f:
47 notes = f.readlines()
48 for line in range(0,len(notes)):
49 if 'Date' in notes[line]:
50 dateinfo = notes[line]
51 dateinfo = dateinfo[6:26]
52 if dateinfo[14] == '-':
53 session_date_info = datetime.strptime(dateinfo, "%Y-%m-%d: %H-%M-%S")
54 else:
55 session_date_info = datetime.strptime(dateinfo, "%Y-%m-%d, %H:%M:%S")
56
57 if os.path.exists(session_dir + "trial_params.txt") == True:
58 #sampled by trial
59 trial_data_exists = True
60 trial_params = np.genfromtxt(session_dir + "trial_params.txt",delimiter = ',',skip_header=0)
61 concentration_level = trial_params[:,0]
62 stimulus_side = trial_params[:,1]
63 chosen_side = trial_params[:,2]
64 trial_start = trial_params[:,3]
65 trial_end = trial_params[:,4]
66
67 if os.path.exists(session_dir + "frame_params_wITI.txt") == True: #sampled at 80 Hz
68 frame_data_exists = True
69 frame_params = np.genfromtxt(session_dir + "frame_params_wITI.txt",delimiter = ',',skip_header=0)
70 nose_x = frame_params[:,0]; nose_y = frame_params[:,1]
71 head_x = frame_params[:,2]; head_y = frame_params[:,3]
72 body_x = frame_params[:,4]; body_y = frame_params[:,5]
73 frame_msec = frame_params[:,7]
74
75 if os.path.exists(session_dir + "sniff.bin") == True:
76 #sampled at 800 Hz
77 sniff_signal_exists = True
78 sniff_signal = np.fromfile(session_dir + "sniff.bin",dtype = 'float')
79
80 #create a session-specific neurodata without borders file
81 subject_info = Subject(age=None, description=None,
82 genotype=None, sex=None, species='Mouse', subject_id=subject,
83 weight=None, date_of_birth=None, strain='B6')
84 nwbfile = NWBFile(session_description=nwb_description, #required
85 identifier=nwb_identifier, # required
86 session_start_time=session_date_info, #required
87 subject = subject_info,
88 session_id=session, #optional
89 file_create_date= session_date_info) #optional
90
91 if trial_data_exists == True:
92 nwbfile.add_trial_column(name='level', description='the level of odor concentration stimulus presented')
93 nwbfile.add_trial_column(name='side', description='which side of the assay the correct stimulus is presented on')
94 nwbfile.add_trial_column(name='chosen', description='which side of the assay the mouse chose (correct or incorrect)')
95
96 for trial in range(0,len(concentration_level)):
97 nwbfile.add_trial(start_time=trial_start[trial],
98 stop_time=trial_end[trial],
99 level=concentration_level[trial],side=stimulus_side[trial],
100 chosen=chosen_side[trial])
101
102 if frame_data_exists == True:
103 tracking = Position() #create a position container for tracking data
104 tracking.create_spatial_series(name='nose x-position',
105 data=nose_x,
106 timestamps=frame_msec,
107 reference_frame='session start')
108 tracking.create_spatial_series(name='nose y-position',
109 data=nose_y,
110 timestamps=frame_msec,
111 reference_frame='session start')
112 tracking.create_spatial_series(name='head x-position',
113 data=head_x,
114 timestamps=frame_msec,
115 reference_frame='session start')
116 tracking.create_spatial_series(name='head y-position',
117 data=head_y,
118 timestamps=frame_msec,
119 reference_frame='session start')
120 tracking.create_spatial_series(name='body x-position',
121 data=body_x,
122 timestamps=frame_msec,
123 reference_frame='session start')
124 tracking.create_spatial_series(name='body y-position',
125 data=body_y,
126 timestamps=frame_msec,
127 reference_frame='session start')
128
129 if sniff_signal_exists == True:
130 sniff = TimeSeries(name='sniff_signal', data=sniff_signal, unit='V', starting_time=0.0, rate=0.00125)
131 nwbfile.add_acquisition(sniff)
132
133 io.write(nwbfile)
134 io.close()
Opportunities for Reuse#
Some opportunities to make this code able to be integrated into this package, and opportunities to learn a bit of Python!
Breaking into Functions#
Most analysis code in science is written as scripts — code that is usually written in the “root” of the document, usually not enclosed within functions or classes (or in the case of matlab, a single function per file). Scripts can be a very direct way of approaching a problem, but they can be difficult to reuse and maintain.
One first step in making code a bit more resuable is to break the code into separate functions. There are a lot of ways of thinking about what makes a “good” function, but in my opinion a good function is one that (among other things)
Does “one”(ish) thing, or, has a well-defined purpose. Functions should be short – it’s always possible to combine multiple functions in a larger “wrapper” function.
Allows alternative behavior with arguments using optional arguments with reasonable defaults. A function should encapsulate a particular operation that you might need to do multiple times, so rather than copying code with minor modifications, a function should offer the caller optionoal arguments that change its behavior. The function should require as few things as it needs to perform the option, so optional arguments should have default values that make the function do what one would expect it to do (ie. what its documentation says it does)
Has no side effects, and conversely doesn’t rely on anything outside of the function. A function should never change its surrounding environment (unless that’s specifically what it is for): it shouldn’t change the working directory, implicitly make or load files, etc. It also shouldn’t rely on other variables/objects in the surrounding environment that aren’t explicitly passed in as arguments. Functions with side effects are very brittle, as they need to be carefully orchestrated with other functions, making their functionality linked and thus harder to maintain because any change in one function affects the others. Instead, functions should require what they need with arguments, and return their result. Instead of saving a file at the end of a long analysis script, return the result of the analysis and then use a separate saving function that takes the data and a path to save it.
Among many other considerations…
To start breaking into functions, it’s always a good idea to make a brief outline of what we need our code to do! In this case, we
Analyze data from multiple
subjects
, which have biographical data contained inpynwb.file.Subject
object.Each of which has multiple
experiments
Which in turn have multiple
sessions
– each of which has its own nwb file.Each session can contain
notes.txt
, which contained the date of the experimenttrial_params.txt
, which describes the metadata for each of the sniff trials.sniff.bin
, a binary file containing analog sniffing data
After loading, the data is then saved using one of several objects or methods described at the top of this page.
That’s a reasonable place to start breaking up the code! We can start by breaking up the nested for loops by putting them in separate functions that look like this…
import os
def convert_session(experiment_dir:str, session:str):
# ... code to conver session
pass
def convert_experiment(subject_dir:str, experiment:str):
experiment_dir = subject_dir + experiment + "/"
sessions = os.listdir(experiment_dir)
for session in sessions:
convert_session(experiment_dir, session)
def convert_subject(data_dir: str, subject:str):
subject_dir = data_dir + subject + "/"
subject_number = subject[-4:]
experiments = os.listdir(subject_dir)
for experiment in experiments:
if experiment == 'ARHMM':
continue
convert_experiment(subject_dir, experiment)
but even those are likely to be too “large” — the convert_session
function would effectively
have the rest of the code in the script! It’s also a good idea for both readability and reusability
to encapsulate other separable operations in functions.
So we can also break up each of the three different types of data we have into their own functions. For example, the position data could look like this:
from typing import List, Tuple
import numpy as np
from dataclasses import dataclass
from pynwb.behavior import Position
from pynwb import NWBFile, ProcessingModule
@dataclass
class Frame_Data:
name: str
"""
What to call this spatial series
"""
position: np.ndarray
"""
Value of the position in (unit)
"""
timestamps: np.ndarray
"""
Array of timestamps (ms) for each position
"""
def add_series(self, position:Position, reference_frame:str='session_start') -> Position:
"""
Method to add this data series to a pynwb Position object.
"""
position.create_spatial_series(
name=self.name,
data=self.position,
timestamps=self.timestamps,
reference_frame=reference_frame
)
return position
def load_frame_data(data_path:str, delimiter:str = ',', skip_header:int=0) -> List[Frame_Data]:
"""
Load an individual session's frame data from a file into a list of Frame_Data containers
"""
frame_params = np.genfromtxt(
data_path,
delimiter = delimiter,
skip_header=skip_header)
names = ['nose_x', 'nose_y', 'head_x', 'head_y', 'body_x', 'body_y']
position_series = []
for i, name in enumerate(names):
position_series.append(
Frame_Data(name, frame_params[:,i], frame_params[:,7]))
return position_series
def add_frame_data(nwbfile: NWBFile, frame_data:List[Frame_Data],
module_name:str="position", description:str='') -> Tuple[Position, ProcessingModule]:
"""
Load the data from a given frame data file and add it to an NWB IO file.
Creates a :class:`~pynwb.base.ProcessingModule`
Args:
nwbfile: File to add to!
frame_data (List[:class:`.Frame_Data`]): see :func:`.load_frame_data`
module_name (str): Name to give to the created ProcessingModule
description (str): Description to give to the created ProcessingModule
References:
https://pynwb.readthedocs.io/en/stable/tutorials/general/file.html?highlight=Position#spatial-series-and-position
"""
position = Position()
for data_series in frame_data:
position = data_series.add_series(position)
position_module = nwbfile.create_processing_module(
name=module_name, description=description
)
position_module.add(position)
return position, position_module
Which we would use like::
>>> frame_data = load_frame_data(data_path)
>>> position, position_module = add_frame_data(nwbfile, frame_data)
This is really nice because we have made a relatively general Frame_Data
class and add_frame_data
that doesn’t
depend on the particularity of the data at hand – only the load_frame_data
has information that’s
unique to us! (names and how to load and index the data frame). So someone else could then extend our functions
by adding their own loading function without needing to rewrite the rest!
By writing docstrings as we go (and using types and type hints, which we’ll cover another time!!), we help keep track of what everything does, so this function would have its documentation rendered like:
- onice_conversion.add_frame_data(nwbfile: NWBFile, frame_data: Frame_Data, module_name: str, description: str)#
Load the data from a given frame data file and add it to an NWB IO file.
Creates a
ProcessingModule
- Args:
nwbfile (NWBFile) : File to add to! frame_data (List[
Frame_Data
]) : seeload_frame_data()
module_name (str) : Name to give to the created ProcessingModule description (str) : Description to give to the created ProcessingModule- References:
The next thing we would start doing is structuring our code into separate modules: i/o operations, processing operations, and so on, but that’s for another time!
Warnings#
todo
(warn about using default values)
import os
from datetime import datetime
import warnings
dateinfo = "2021-01-01, 01:00:00"
session_date_info = datetime.strptime(dateinfo, "%Y-%m-%d,%H:%M:%S")
if os.path.exists(session_dir + "notes.txt") == True:
# get date from file
pass
else:
warnings.warn(f'Couldnt get data from notes.txt, using default date {dateinfo}')
pathlib
!#
todo
show use of pathlib vs os.path
from pathlib import Path
# for example
data_dir = Path().home() / 'my_data'
text_files = data_dir.glob('**/*.txt')
data_dir.exists()
my_file = data_dir / 'myfile.txt'
my_file2 = my_file.with_stem(my_file.stem + "_two")
# myfile_two.txt