Skip to content

Neurosuite

Class and functions for loading data processed with the Neurosuite (Klusters, Neuroscope, NDmanager)

NeuroSuite (BaseLoader)

Loader for kluster data

Source code in pynapple/io/neurosuite.py
class NeuroSuite(BaseLoader):
    """
    Loader for kluster data
    """

    def __init__(self, path):
        """
        Instantiate the data class from a neurosuite folder.

        Parameters
        ----------
        path : str
            The path to the data.
        """
        self.basename = os.path.basename(path)
        self.time_support = None

        super().__init__(path)

        # Need to check if nwb file exists and if data are there
        loading_neurosuite = True
        if self.path is not None:
            nwb_path = os.path.join(self.path, "pynapplenwb")
            if os.path.exists(nwb_path):
                files = os.listdir(nwb_path)
                if len([f for f in files if f.endswith(".nwb")]):
                    success = self.load_nwb_spikes(path)
                    if success:
                        loading_neurosuite = False

        # Bypass if data have already been transfered to nwb
        if loading_neurosuite:
            self.load_neurosuite_xml(path)
            # print("XML loaded")
            # To label the electrodes groups
            app = App()
            window = EphysGUI(app, path=path, groups=self.group_to_channel)
            app.mainloop()
            try:
                app.update()
            except Exception:
                pass

            # print("GUI DONE")
            if window.status:
                self.ephys_information = window.ephys_information
                self.load_neurosuite_spikes(path, self.basename, self.time_support)
                self.save_data(path)

    def load_neurosuite_spikes(self, path, basename, time_support=None, fs=20000.0):
        """
        Read the clus and res files and convert to NWB.
        Instantiate automatically a TsGroup object.

        Parameters
        ----------
        path : str
            The path to the data
        basename : str
            Basename of the clu and res files.
        time_support : IntevalSet, optional
            The time support of the data
        fs : float, optional
            Sampling rate of the recording.

        Raises
        ------
        RuntimeError
            If number of clu and res are not equal.

        """
        files = os.listdir(path)
        clu_files = np.sort([f for f in files if ".clu." in f and f[0] != "."])
        res_files = np.sort([f for f in files if ".res." in f and f[0] != "."])
        clu1 = np.sort([int(f.split(".")[-1]) for f in clu_files])
        clu2 = np.sort([int(f.split(".")[-1]) for f in res_files])
        if len(clu_files) != len(res_files) or not (clu1 == clu2).any():
            raise RuntimeError(
                "Not the same number of clu and res files in " + path + "; Exiting ..."
            )

        count = 0
        spikes = {}
        group = pd.Series(dtype=np.int32)
        for i, s in zip(range(len(clu_files)), clu1):
            clu = np.genfromtxt(
                os.path.join(path, basename + ".clu." + str(s)), dtype=np.int32
            )[1:]
            if np.max(clu) > 1:  # getting rid of mua and noise
                res = np.genfromtxt(os.path.join(path, basename + ".res." + str(s)))
                tmp = np.unique(clu).astype(int)
                idx_clu = tmp[tmp > 1]
                idx_out = np.arange(count, count + len(idx_clu))

                for j, k in zip(idx_clu, idx_out):
                    t = res[clu == j] / fs
                    spikes[k] = nap.Ts(t=t, time_units="s")
                    group.loc[k] = s

                count += len(idx_clu)

        group = group - 1  # better to start it a 0

        self.spikes = nap.TsGroup(
            spikes, time_support=time_support, time_units="s", group=group
        )

        # adding some information to help parse the neurons
        names = pd.Series(
            index=group.index,
            data=[self.ephys_information[group.loc[i]]["name"] for i in group.index],
        )
        if ~np.all(names.values == ""):
            self.spikes.set_info(name=names)
        locations = pd.Series(
            index=group.index,
            data=[
                self.ephys_information[group.loc[i]]["location"] for i in group.index
            ],
        )
        if ~np.all(locations.values == ""):
            self.spikes.set_info(location=locations)

        return

    def load_neurosuite_xml(self, path):
        """
        path should be the folder session containing the XML file

        Function reads :
        1. the number of channels
        2. the sampling frequency of the dat file or the eeg file depending of what is present in the folder
            eeg file first if both are present or both are absent
        3. the mappings shanks to channels as a dict

        Parameters
        ----------
        path: str
            The path to the data

        Raises
        ------
        RuntimeError
            If path does not contain the xml file.
        """
        listdir = os.listdir(path)
        xmlfiles = [f for f in listdir if f.endswith(".xml")]
        if not len(xmlfiles):
            raise RuntimeError("Path {} contains no xml files;".format(path))
            sys.exit()
        new_path = os.path.join(path, xmlfiles[0])

        self.xmldoc = minidom.parse(new_path)
        self.nChannels = int(
            self.xmldoc.getElementsByTagName("acquisitionSystem")[0]
            .getElementsByTagName("nChannels")[0]
            .firstChild.data
        )
        self.fs_dat = int(
            self.xmldoc.getElementsByTagName("acquisitionSystem")[0]
            .getElementsByTagName("samplingRate")[0]
            .firstChild.data
        )
        self.fs_eeg = int(
            self.xmldoc.getElementsByTagName("fieldPotentials")[0]
            .getElementsByTagName("lfpSamplingRate")[0]
            .firstChild.data
        )

        self.group_to_channel = {}
        groups = (
            self.xmldoc.getElementsByTagName("anatomicalDescription")[0]
            .getElementsByTagName("channelGroups")[0]
            .getElementsByTagName("group")
        )
        for i in range(len(groups)):
            self.group_to_channel[i] = np.array(
                [
                    int(child.firstChild.data)
                    for child in groups[i].getElementsByTagName("channel")
                ]
            )

        return

    def save_data(self, path):
        """
        Save the data to NWB format.

        Parameters
        ----------
        path : str
            The path to save the data

        """
        self.nwb_path = os.path.join(path, "pynapplenwb")
        if not os.path.exists(self.nwb_path):
            raise RuntimeError("Path {} does not exist.".format(self.nwb_path))
        self.nwbfilename = [f for f in os.listdir(self.nwb_path) if "nwb" in f][0]
        self.nwbfilepath = os.path.join(self.nwb_path, self.nwbfilename)

        io = NWBHDF5IO(self.nwbfilepath, "r+")
        nwbfile = io.read()

        electrode_groups = {}

        for g in self.group_to_channel:
            device = nwbfile.create_device(
                name=self.ephys_information[g]["device"]["name"] + "-" + str(g),
                description=self.ephys_information[g]["device"]["description"],
                manufacturer=self.ephys_information[g]["device"]["manufacturer"],
            )

            if (
                len(self.ephys_information[g]["position"])
                and type(self.ephys_information[g]["position"]) is str
            ):
                self.ephys_information[g]["position"] = re.split(
                    ";|,| ", self.ephys_information[g]["position"]
                )
            elif self.ephys_information[g]["position"] == "":
                self.ephys_information[g]["position"] = None

            electrode_groups[g] = nwbfile.create_electrode_group(
                name="group" + str(g) + "_" + self.ephys_information[g]["name"],
                description=self.ephys_information[g]["description"],
                position=self.ephys_information[g]["position"],
                location=self.ephys_information[g]["location"],
                device=device,
            )

            for idx in self.group_to_channel[g]:
                nwbfile.add_electrode(
                    id=idx,
                    x=0.0,
                    y=0.0,
                    z=0.0,
                    imp=0.0,
                    location=self.ephys_information[g]["location"],
                    filtering="none",
                    group=electrode_groups[g],
                )

        # Adding units
        nwbfile.add_unit_column("location", "the anatomical location of this unit")
        nwbfile.add_unit_column("group", "the group of the unit")
        for u in self.spikes.keys():
            nwbfile.add_unit(
                id=u,
                spike_times=self.spikes[u].as_units("s").index.values,
                electrode_group=electrode_groups[self.spikes.get_info("group").loc[u]],
                location=self.ephys_information[self.spikes.get_info("group").loc[u]][
                    "location"
                ],
                group=self.spikes.get_info("group").loc[u],
            )

        io.write(nwbfile)
        io.close()

        return

    def load_nwb_spikes(self, path):
        """
        Read the NWB spikes to extract the spike times.

        Parameters
        ----------
        path : str
            The path to the data

        Returns
        -------
        TYPE
            Description
        """
        self.nwb_path = os.path.join(path, "pynapplenwb")
        if not os.path.exists(self.nwb_path):
            raise RuntimeError("Path {} does not exist.".format(self.nwb_path))
        self.nwbfilename = [f for f in os.listdir(self.nwb_path) if "nwb" in f][0]
        self.nwbfilepath = os.path.join(self.nwb_path, self.nwbfilename)

        io = NWBHDF5IO(self.nwbfilepath, "r")
        nwbfile = io.read()

        if nwbfile.units is None:
            io.close()
            return False
        else:
            units = nwbfile.units.to_dataframe()
            spikes = {
                n: nap.Ts(t=units.loc[n, "spike_times"], time_units="s")
                for n in units.index
            }

            self.spikes = nap.TsGroup(
                spikes,
                time_support=self.time_support,
                time_units="s",
                group=units["group"],
            )

            if ~np.all(units["location"] == ""):
                self.spikes.set_info(location=units["location"])

            io.close()
            return True

    def load_lfp(
        self,
        filename=None,
        channel=None,
        extension=".eeg",
        frequency=1250.0,
        precision="int16",
        bytes_size=2,
    ):
        """
        Load the LFP.

        Parameters
        ----------
        filename : str, optional
            The filename of the lfp file.
            It can be useful it multiple dat files are present in the data directory
        channel : int or list of int, optional
            The channel(s) to load. If None return a memory map of the dat file to avoid memory error
        extension : str, optional
            The file extenstion (.eeg, .dat, .lfp). Make sure the frequency match
        frequency : float, optional
            Default 1250 Hz for the eeg file
        precision : str, optional
            The precision of the binary file
        bytes_size : int, optional
            Bytes size of the lfp file

        Raises
        ------
        RuntimeError
            If can't find the lfp/eeg/dat file

        Returns
        -------
        Tsd or TsdFrame
            The lfp in a time series format
        """
        if filename is not None:
            filepath = os.path.join(self.path, filename)
        else:
            listdir = os.listdir(self.path)
            eegfile = [f for f in listdir if f.endswith(extension)]
            if not len(eegfile):
                raise RuntimeError(
                    "Path {} contains no {} files;".format(self.path, extension)
                )

            filepath = os.path.join(self.path, eegfile[0])

        self.load_neurosuite_xml(self.path)

        n_channels = int(self.nChannels)

        f = open(filepath, "rb")
        startoffile = f.seek(0, 0)
        endoffile = f.seek(0, 2)
        bytes_size = 2
        n_samples = int((endoffile - startoffile) / n_channels / bytes_size)
        duration = n_samples / frequency
        f.close()
        fp = np.memmap(filepath, np.int16, "r", shape=(n_samples, n_channels))
        timestep = np.arange(0, n_samples) / frequency

        time_support = nap.IntervalSet(start=0, end=duration, time_units="s")

        if channel is None:
            return nap.TsdFrame(
                t=timestep, d=fp, time_units="s", time_support=time_support
            )
        elif type(channel) is int:
            return nap.Tsd(
                t=timestep, d=fp[:, channel], time_units="s", time_support=time_support
            )
        elif type(channel) is list:
            return nap.TsdFrame(
                t=timestep,
                d=fp[:, channel],
                time_units="s",
                time_support=time_support,
                columns=channel,
            )

    def read_neuroscope_intervals(self, name=None, path2file=None):
        """
        This function reads .evt files in which odd raws indicate the beginning
        of the time series and the even raws are the ends.
        If the file is present in the nwb, provide the just the name. If the file
        is not present in the nwb, it loads the events from the nwb directory.
        If just the path is provided but not the name, it takes the name from the file.

        Parameters
        ----------
        name: str
            name of the epoch in the nwb file, e.g. "rem" or desired name save
            the data in the nwb.

        path2file: str
            Path of the file you want to load.

        Returns
        -------
        IntervalSet
            Contains two columns corresponding to the start and end of the intervals.

        """
        if name:
            isets = self.load_nwb_intervals(name)
            if isinstance(isets, nap.IntervalSet):
                return isets
        if name is not None and path2file is None:
            path2file = os.path.join(self.path, self.basename + "." + name + ".evt")
        if path2file is not None:
            try:
                # df = pd.read_csv(path2file, delimiter=' ', usecols = [0], header = None)
                tmp = np.genfromtxt(path2file)[:, 0]
                df = tmp.reshape(len(tmp) // 2, 2)
            except ValueError:
                print("specify a valid name")
            isets = nap.IntervalSet(df[:, 0], df[:, 1], time_units="ms")
            if name is None:
                name = path2file.split(".")[-2]
                print("*** saving file in the nwb as", name)
            self.save_nwb_intervals(isets, name)
        else:
            raise ValueError("specify a valid path")
        return isets

    def write_neuroscope_intervals(self, extension, isets, name):
        """Write events to load with neuroscope (e.g. ripples start and ends)

        Parameters
        ----------
        extension : str
            The extension of the file (e.g. basename.evt.py.rip)
        isets : IntervalSet
            The IntervalSet to write
        name : str
            The name of the events (e.g. Ripples)
        """
        start = isets.as_units("ms")["start"].values
        ends = isets.as_units("ms")["end"].values

        datatowrite = np.vstack((start, ends)).T.flatten()

        n = len(isets)

        texttowrite = np.vstack(
            (
                (np.repeat(np.array([name + " start"]), n)),
                (np.repeat(np.array([name + " end"]), n)),
            )
        ).T.flatten()

        evt_file = os.path.join(self.path, self.basename + extension)

        f = open(evt_file, "w")
        for t, n in zip(datatowrite, texttowrite):
            f.writelines("{:1.6f}".format(t) + "\t" + n + "\n")
        f.close()

        return

    def load_mean_waveforms(self, epoch=None, waveform_window=None, spike_count=1000):
        """
        Load the mean waveforms from a dat file.

        Parameters
        ----------
        epoch : IntervalSet
            default = None
            Restrict spikes to an epoch.
        waveform_window : IntervalSet
            default interval nap.IntervalSet(start = -0.0005, end = 0.001, time_units = 'ms')
            Limit waveform extraction before and after spike time
        spike_count : int
            default = 1000
            Number of spikes used per neuron for the calculation of waveforms

        Returns
        -------
        dictionary
            the waveforms for all neurons
        pandas.Series
            the channel with the maximum waveform for each neuron

        """
        if not isinstance(waveform_window, nap.IntervalSet):
            waveform_window = nap.IntervalSet(start=-0.5, end=1, time_units="ms")

        spikes = self.spikes
        if not os.path.exists(self.path):  # check if path exists
            print("The path " + self.path + " doesn't exist; Exiting ...")
            sys.exit()

        # Load XML INFO
        self.load_neurosuite_xml(self.path)
        n_channels = self.nChannels
        fs = self.fs_dat
        group_to_channel = self.group_to_channel
        group = spikes.get_info("group")

        # Check if there is an epoch, restrict spike times to epoch
        if epoch is not None:
            if type(epoch) is not nap.IntervalSet:
                print("Epoch must be an IntervalSet")
                sys.exit()
            else:
                print("Restricting spikes to epoch")
                spikes = spikes.restrict(epoch)
                epstart = int(epoch.as_units("s")["start"].values[0] * fs)
                epend = int(epoch.as_units("s")["end"].values[0] * fs)

        # Find dat file
        files = os.listdir(self.path)
        dat_files = np.sort([f for f in files if "dat" in f and f[0] != "."])

        # Need n_samples collected in the entire recording from dat file to load
        file = os.path.join(self.path, dat_files[0])
        f = open(
            file, "rb"
        )  # open file to get number of samples collected in the entire recording
        startoffile = f.seek(0, 0)
        endoffile = f.seek(0, 2)
        bytes_size = 2
        n_samples = int((endoffile - startoffile) / n_channels / bytes_size)
        f.close()
        # map to memory all samples for all channels, channels are numbered according to neuroscope number
        fp = np.memmap(file, np.int16, "r", shape=(n_samples, n_channels))

        # convert spike times to spikes in sample number
        sample_spikes = {
            neuron: (spikes[neuron].as_units("s").index.values * fs).astype("int")
            for neuron in spikes
        }

        # prep for waveforms
        overlap = int(
            waveform_window.tot_length(time_units="s")
        )  # one spike's worth of overlap between windows
        waveform_window = abs(np.array(waveform_window.as_units("s"))[0] * fs).astype(
            int
        )  # convert time to sample number
        neuron_waveforms = {
            n: np.zeros([np.sum(waveform_window), len(group_to_channel[group[n]])])
            for n in sample_spikes
        }

        # divide dat file into batches that slightly overlap for faster loading
        batch_size = 3000000
        windows = np.arange(0, int(endoffile / n_channels / bytes_size), batch_size)
        if epoch is not None:
            print("Restricting dat file to epoch")
            windows = windows[(windows >= epstart) & (windows <= epend)]
        batches = []
        for (
            i
        ) in windows:  # make overlapping batches from the beginning to end of recording
            if i == windows[-1]:  # the last batch cannot overlap with the next one
                batches.append([i, n_samples])
            else:
                batches.append([i, i + batch_size + overlap])
        batches = [np.int32(batch) for batch in batches]

        sample_counted_spikes = {}
        for index, neuron in enumerate(sample_spikes):
            if len(sample_spikes[neuron]) >= spike_count:
                sample_counted_spikes[neuron] = np.array(
                    np.random.choice(list(sample_spikes[neuron]), spike_count)
                )
            elif len(sample_spikes[neuron]) < spike_count:
                print(
                    "Not enough spikes in neuron " + str(index) + "... using all spikes"
                )
                sample_counted_spikes[neuron] = sample_spikes[neuron]

        # Make one array containing all selected spike times of all neurons - will be used to check for spikes before loading dat file
        spike_check = np.array(
            [
                int(spikes_neuron)
                for spikes_neuron in sample_counted_spikes[neuron]
                for neuron in sample_counted_spikes
            ]
        )

        for index, timestep in enumerate(batches):
            print(
                f"Extracting waveforms from dat file: window {index+1} / {len(windows)}",
                end="\r",
            )

            if (
                len(
                    spike_check[
                        (timestep[0] < spike_check) & (timestep[1] > spike_check)
                    ]
                )
                == 0
            ):
                continue  # if there are no spikes for any neurons in this batch, skip and go to the next one

            # Load dat file for timestep
            tmp = pd.DataFrame(
                data=fp[timestep[0] : timestep[1], :],
                columns=np.arange(n_channels),
                index=range(timestep[0], timestep[1]),
            )  # load dat file

            # Check if any spikes are present
            for neuron in sample_counted_spikes:
                neurontmp = sample_counted_spikes[neuron]
                tmp2 = neurontmp[(timestep[0] < neurontmp) & (timestep[1] > neurontmp)]
                if len(neurontmp) == 0:
                    continue  # skip neuron if it has no spikes in this batch
                tmpn = tmp[
                    group_to_channel[group[neuron]]
                ]  # restrict dat file to the channel group of the neuron

                for time in tmp2:  # add each spike waveform to neuron_waveform
                    spikewindow = tmpn.loc[
                        time - waveform_window[0] : time + waveform_window[1] - 1
                    ]  # waveform for this spike time
                    try:
                        neuron_waveforms[neuron] += spikewindow.values
                    except (
                        Exception
                    ):  # ignore if full waveform is not present in this batch
                        pass

        meanwf = {
            n: pd.DataFrame(
                data=np.array(neuron_waveforms[n]) / spike_count,
                columns=np.arange(len(group_to_channel[group[n]])),
                index=np.array(np.arange(-waveform_window[0], waveform_window[1])) / fs,
            )
            for n in sample_counted_spikes
        }

        # find the max channel for each neuron
        maxch = pd.Series(
            data=[meanwf[n][meanwf[n].loc[0].idxmin()].name for n in meanwf],
            index=spikes.keys(),
        )

        return meanwf, maxch

__init__(self, path) special

Instantiate the data class from a neurosuite folder.

Parameters:

Name Type Description Default
path str

The path to the data.

required
Source code in pynapple/io/neurosuite.py
def __init__(self, path):
    """
    Instantiate the data class from a neurosuite folder.

    Parameters
    ----------
    path : str
        The path to the data.
    """
    self.basename = os.path.basename(path)
    self.time_support = None

    super().__init__(path)

    # Need to check if nwb file exists and if data are there
    loading_neurosuite = True
    if self.path is not None:
        nwb_path = os.path.join(self.path, "pynapplenwb")
        if os.path.exists(nwb_path):
            files = os.listdir(nwb_path)
            if len([f for f in files if f.endswith(".nwb")]):
                success = self.load_nwb_spikes(path)
                if success:
                    loading_neurosuite = False

    # Bypass if data have already been transfered to nwb
    if loading_neurosuite:
        self.load_neurosuite_xml(path)
        # print("XML loaded")
        # To label the electrodes groups
        app = App()
        window = EphysGUI(app, path=path, groups=self.group_to_channel)
        app.mainloop()
        try:
            app.update()
        except Exception:
            pass

        # print("GUI DONE")
        if window.status:
            self.ephys_information = window.ephys_information
            self.load_neurosuite_spikes(path, self.basename, self.time_support)
            self.save_data(path)

load_neurosuite_spikes(self, path, basename, time_support=None, fs=20000.0)

Read the clus and res files and convert to NWB.

Instantiate automatically a TsGroup object.

Parameters:

Name Type Description Default
path str

The path to the data

required
basename str

Basename of the clu and res files.

required
time_support IntevalSet

The time support of the data

None
fs float

Sampling rate of the recording.

20000.0

Exceptions:

Type Description
RuntimeError

If number of clu and res are not equal.

Source code in pynapple/io/neurosuite.py
def load_neurosuite_spikes(self, path, basename, time_support=None, fs=20000.0):
    """
    Read the clus and res files and convert to NWB.
    Instantiate automatically a TsGroup object.

    Parameters
    ----------
    path : str
        The path to the data
    basename : str
        Basename of the clu and res files.
    time_support : IntevalSet, optional
        The time support of the data
    fs : float, optional
        Sampling rate of the recording.

    Raises
    ------
    RuntimeError
        If number of clu and res are not equal.

    """
    files = os.listdir(path)
    clu_files = np.sort([f for f in files if ".clu." in f and f[0] != "."])
    res_files = np.sort([f for f in files if ".res." in f and f[0] != "."])
    clu1 = np.sort([int(f.split(".")[-1]) for f in clu_files])
    clu2 = np.sort([int(f.split(".")[-1]) for f in res_files])
    if len(clu_files) != len(res_files) or not (clu1 == clu2).any():
        raise RuntimeError(
            "Not the same number of clu and res files in " + path + "; Exiting ..."
        )

    count = 0
    spikes = {}
    group = pd.Series(dtype=np.int32)
    for i, s in zip(range(len(clu_files)), clu1):
        clu = np.genfromtxt(
            os.path.join(path, basename + ".clu." + str(s)), dtype=np.int32
        )[1:]
        if np.max(clu) > 1:  # getting rid of mua and noise
            res = np.genfromtxt(os.path.join(path, basename + ".res." + str(s)))
            tmp = np.unique(clu).astype(int)
            idx_clu = tmp[tmp > 1]
            idx_out = np.arange(count, count + len(idx_clu))

            for j, k in zip(idx_clu, idx_out):
                t = res[clu == j] / fs
                spikes[k] = nap.Ts(t=t, time_units="s")
                group.loc[k] = s

            count += len(idx_clu)

    group = group - 1  # better to start it a 0

    self.spikes = nap.TsGroup(
        spikes, time_support=time_support, time_units="s", group=group
    )

    # adding some information to help parse the neurons
    names = pd.Series(
        index=group.index,
        data=[self.ephys_information[group.loc[i]]["name"] for i in group.index],
    )
    if ~np.all(names.values == ""):
        self.spikes.set_info(name=names)
    locations = pd.Series(
        index=group.index,
        data=[
            self.ephys_information[group.loc[i]]["location"] for i in group.index
        ],
    )
    if ~np.all(locations.values == ""):
        self.spikes.set_info(location=locations)

    return

load_neurosuite_xml(self, path)

path should be the folder session containing the XML file

Function reads : 1. the number of channels 2. the sampling frequency of the dat file or the eeg file depending of what is present in the folder eeg file first if both are present or both are absent 3. the mappings shanks to channels as a dict

Parameters:

Name Type Description Default
path str

The path to the data

required

Exceptions:

Type Description
RuntimeError

If path does not contain the xml file.

Source code in pynapple/io/neurosuite.py
def load_neurosuite_xml(self, path):
    """
    path should be the folder session containing the XML file

    Function reads :
    1. the number of channels
    2. the sampling frequency of the dat file or the eeg file depending of what is present in the folder
        eeg file first if both are present or both are absent
    3. the mappings shanks to channels as a dict

    Parameters
    ----------
    path: str
        The path to the data

    Raises
    ------
    RuntimeError
        If path does not contain the xml file.
    """
    listdir = os.listdir(path)
    xmlfiles = [f for f in listdir if f.endswith(".xml")]
    if not len(xmlfiles):
        raise RuntimeError("Path {} contains no xml files;".format(path))
        sys.exit()
    new_path = os.path.join(path, xmlfiles[0])

    self.xmldoc = minidom.parse(new_path)
    self.nChannels = int(
        self.xmldoc.getElementsByTagName("acquisitionSystem")[0]
        .getElementsByTagName("nChannels")[0]
        .firstChild.data
    )
    self.fs_dat = int(
        self.xmldoc.getElementsByTagName("acquisitionSystem")[0]
        .getElementsByTagName("samplingRate")[0]
        .firstChild.data
    )
    self.fs_eeg = int(
        self.xmldoc.getElementsByTagName("fieldPotentials")[0]
        .getElementsByTagName("lfpSamplingRate")[0]
        .firstChild.data
    )

    self.group_to_channel = {}
    groups = (
        self.xmldoc.getElementsByTagName("anatomicalDescription")[0]
        .getElementsByTagName("channelGroups")[0]
        .getElementsByTagName("group")
    )
    for i in range(len(groups)):
        self.group_to_channel[i] = np.array(
            [
                int(child.firstChild.data)
                for child in groups[i].getElementsByTagName("channel")
            ]
        )

    return

save_data(self, path)

Save the data to NWB format.

Parameters:

Name Type Description Default
path str

The path to save the data

required
Source code in pynapple/io/neurosuite.py
def save_data(self, path):
    """
    Save the data to NWB format.

    Parameters
    ----------
    path : str
        The path to save the data

    """
    self.nwb_path = os.path.join(path, "pynapplenwb")
    if not os.path.exists(self.nwb_path):
        raise RuntimeError("Path {} does not exist.".format(self.nwb_path))
    self.nwbfilename = [f for f in os.listdir(self.nwb_path) if "nwb" in f][0]
    self.nwbfilepath = os.path.join(self.nwb_path, self.nwbfilename)

    io = NWBHDF5IO(self.nwbfilepath, "r+")
    nwbfile = io.read()

    electrode_groups = {}

    for g in self.group_to_channel:
        device = nwbfile.create_device(
            name=self.ephys_information[g]["device"]["name"] + "-" + str(g),
            description=self.ephys_information[g]["device"]["description"],
            manufacturer=self.ephys_information[g]["device"]["manufacturer"],
        )

        if (
            len(self.ephys_information[g]["position"])
            and type(self.ephys_information[g]["position"]) is str
        ):
            self.ephys_information[g]["position"] = re.split(
                ";|,| ", self.ephys_information[g]["position"]
            )
        elif self.ephys_information[g]["position"] == "":
            self.ephys_information[g]["position"] = None

        electrode_groups[g] = nwbfile.create_electrode_group(
            name="group" + str(g) + "_" + self.ephys_information[g]["name"],
            description=self.ephys_information[g]["description"],
            position=self.ephys_information[g]["position"],
            location=self.ephys_information[g]["location"],
            device=device,
        )

        for idx in self.group_to_channel[g]:
            nwbfile.add_electrode(
                id=idx,
                x=0.0,
                y=0.0,
                z=0.0,
                imp=0.0,
                location=self.ephys_information[g]["location"],
                filtering="none",
                group=electrode_groups[g],
            )

    # Adding units
    nwbfile.add_unit_column("location", "the anatomical location of this unit")
    nwbfile.add_unit_column("group", "the group of the unit")
    for u in self.spikes.keys():
        nwbfile.add_unit(
            id=u,
            spike_times=self.spikes[u].as_units("s").index.values,
            electrode_group=electrode_groups[self.spikes.get_info("group").loc[u]],
            location=self.ephys_information[self.spikes.get_info("group").loc[u]][
                "location"
            ],
            group=self.spikes.get_info("group").loc[u],
        )

    io.write(nwbfile)
    io.close()

    return

load_nwb_spikes(self, path)

Read the NWB spikes to extract the spike times.

Parameters:

Name Type Description Default
path str

The path to the data

required

Returns:

Type Description
TYPE

Description

Source code in pynapple/io/neurosuite.py
def load_nwb_spikes(self, path):
    """
    Read the NWB spikes to extract the spike times.

    Parameters
    ----------
    path : str
        The path to the data

    Returns
    -------
    TYPE
        Description
    """
    self.nwb_path = os.path.join(path, "pynapplenwb")
    if not os.path.exists(self.nwb_path):
        raise RuntimeError("Path {} does not exist.".format(self.nwb_path))
    self.nwbfilename = [f for f in os.listdir(self.nwb_path) if "nwb" in f][0]
    self.nwbfilepath = os.path.join(self.nwb_path, self.nwbfilename)

    io = NWBHDF5IO(self.nwbfilepath, "r")
    nwbfile = io.read()

    if nwbfile.units is None:
        io.close()
        return False
    else:
        units = nwbfile.units.to_dataframe()
        spikes = {
            n: nap.Ts(t=units.loc[n, "spike_times"], time_units="s")
            for n in units.index
        }

        self.spikes = nap.TsGroup(
            spikes,
            time_support=self.time_support,
            time_units="s",
            group=units["group"],
        )

        if ~np.all(units["location"] == ""):
            self.spikes.set_info(location=units["location"])

        io.close()
        return True

load_lfp(self, filename=None, channel=None, extension='.eeg', frequency=1250.0, precision='int16', bytes_size=2)

Load the LFP.

Parameters:

Name Type Description Default
filename str

The filename of the lfp file. It can be useful it multiple dat files are present in the data directory

None
channel int or list of int

The channel(s) to load. If None return a memory map of the dat file to avoid memory error

None
extension str

The file extenstion (.eeg, .dat, .lfp). Make sure the frequency match

'.eeg'
frequency float

Default 1250 Hz for the eeg file

1250.0
precision str

The precision of the binary file

'int16'
bytes_size int

Bytes size of the lfp file

2

Exceptions:

Type Description
RuntimeError

If can't find the lfp/eeg/dat file

Returns:

Type Description
Tsd or TsdFrame

The lfp in a time series format

Source code in pynapple/io/neurosuite.py
def load_lfp(
    self,
    filename=None,
    channel=None,
    extension=".eeg",
    frequency=1250.0,
    precision="int16",
    bytes_size=2,
):
    """
    Load the LFP.

    Parameters
    ----------
    filename : str, optional
        The filename of the lfp file.
        It can be useful it multiple dat files are present in the data directory
    channel : int or list of int, optional
        The channel(s) to load. If None return a memory map of the dat file to avoid memory error
    extension : str, optional
        The file extenstion (.eeg, .dat, .lfp). Make sure the frequency match
    frequency : float, optional
        Default 1250 Hz for the eeg file
    precision : str, optional
        The precision of the binary file
    bytes_size : int, optional
        Bytes size of the lfp file

    Raises
    ------
    RuntimeError
        If can't find the lfp/eeg/dat file

    Returns
    -------
    Tsd or TsdFrame
        The lfp in a time series format
    """
    if filename is not None:
        filepath = os.path.join(self.path, filename)
    else:
        listdir = os.listdir(self.path)
        eegfile = [f for f in listdir if f.endswith(extension)]
        if not len(eegfile):
            raise RuntimeError(
                "Path {} contains no {} files;".format(self.path, extension)
            )

        filepath = os.path.join(self.path, eegfile[0])

    self.load_neurosuite_xml(self.path)

    n_channels = int(self.nChannels)

    f = open(filepath, "rb")
    startoffile = f.seek(0, 0)
    endoffile = f.seek(0, 2)
    bytes_size = 2
    n_samples = int((endoffile - startoffile) / n_channels / bytes_size)
    duration = n_samples / frequency
    f.close()
    fp = np.memmap(filepath, np.int16, "r", shape=(n_samples, n_channels))
    timestep = np.arange(0, n_samples) / frequency

    time_support = nap.IntervalSet(start=0, end=duration, time_units="s")

    if channel is None:
        return nap.TsdFrame(
            t=timestep, d=fp, time_units="s", time_support=time_support
        )
    elif type(channel) is int:
        return nap.Tsd(
            t=timestep, d=fp[:, channel], time_units="s", time_support=time_support
        )
    elif type(channel) is list:
        return nap.TsdFrame(
            t=timestep,
            d=fp[:, channel],
            time_units="s",
            time_support=time_support,
            columns=channel,
        )

read_neuroscope_intervals(self, name=None, path2file=None)

This function reads .evt files in which odd raws indicate the beginning

of the time series and the even raws are the ends. If the file is present in the nwb, provide the just the name. If the file is not present in the nwb, it loads the events from the nwb directory. If just the path is provided but not the name, it takes the name from the file.

Parameters:

Name Type Description Default
name str

name of the epoch in the nwb file, e.g. "rem" or desired name save the data in the nwb.

None
path2file str

Path of the file you want to load.

None

Returns:

Type Description
IntervalSet

Contains two columns corresponding to the start and end of the intervals.

Source code in pynapple/io/neurosuite.py
def read_neuroscope_intervals(self, name=None, path2file=None):
    """
    This function reads .evt files in which odd raws indicate the beginning
    of the time series and the even raws are the ends.
    If the file is present in the nwb, provide the just the name. If the file
    is not present in the nwb, it loads the events from the nwb directory.
    If just the path is provided but not the name, it takes the name from the file.

    Parameters
    ----------
    name: str
        name of the epoch in the nwb file, e.g. "rem" or desired name save
        the data in the nwb.

    path2file: str
        Path of the file you want to load.

    Returns
    -------
    IntervalSet
        Contains two columns corresponding to the start and end of the intervals.

    """
    if name:
        isets = self.load_nwb_intervals(name)
        if isinstance(isets, nap.IntervalSet):
            return isets
    if name is not None and path2file is None:
        path2file = os.path.join(self.path, self.basename + "." + name + ".evt")
    if path2file is not None:
        try:
            # df = pd.read_csv(path2file, delimiter=' ', usecols = [0], header = None)
            tmp = np.genfromtxt(path2file)[:, 0]
            df = tmp.reshape(len(tmp) // 2, 2)
        except ValueError:
            print("specify a valid name")
        isets = nap.IntervalSet(df[:, 0], df[:, 1], time_units="ms")
        if name is None:
            name = path2file.split(".")[-2]
            print("*** saving file in the nwb as", name)
        self.save_nwb_intervals(isets, name)
    else:
        raise ValueError("specify a valid path")
    return isets

write_neuroscope_intervals(self, extension, isets, name)

Write events to load with neuroscope (e.g. ripples start and ends)

Parameters:

Name Type Description Default
extension str

The extension of the file (e.g. basename.evt.py.rip)

required
isets IntervalSet

The IntervalSet to write

required
name str

The name of the events (e.g. Ripples)

required
Source code in pynapple/io/neurosuite.py
def write_neuroscope_intervals(self, extension, isets, name):
    """Write events to load with neuroscope (e.g. ripples start and ends)

    Parameters
    ----------
    extension : str
        The extension of the file (e.g. basename.evt.py.rip)
    isets : IntervalSet
        The IntervalSet to write
    name : str
        The name of the events (e.g. Ripples)
    """
    start = isets.as_units("ms")["start"].values
    ends = isets.as_units("ms")["end"].values

    datatowrite = np.vstack((start, ends)).T.flatten()

    n = len(isets)

    texttowrite = np.vstack(
        (
            (np.repeat(np.array([name + " start"]), n)),
            (np.repeat(np.array([name + " end"]), n)),
        )
    ).T.flatten()

    evt_file = os.path.join(self.path, self.basename + extension)

    f = open(evt_file, "w")
    for t, n in zip(datatowrite, texttowrite):
        f.writelines("{:1.6f}".format(t) + "\t" + n + "\n")
    f.close()

    return

load_mean_waveforms(self, epoch=None, waveform_window=None, spike_count=1000)

Load the mean waveforms from a dat file.

Parameters:

Name Type Description Default
epoch IntervalSet

default = None Restrict spikes to an epoch.

None
waveform_window IntervalSet

default interval nap.IntervalSet(start = -0.0005, end = 0.001, time_units = 'ms') Limit waveform extraction before and after spike time

None
spike_count int

default = 1000 Number of spikes used per neuron for the calculation of waveforms

1000

Returns:

Type Description
dictionary

the waveforms for all neurons

Source code in pynapple/io/neurosuite.py
def load_mean_waveforms(self, epoch=None, waveform_window=None, spike_count=1000):
    """
    Load the mean waveforms from a dat file.

    Parameters
    ----------
    epoch : IntervalSet
        default = None
        Restrict spikes to an epoch.
    waveform_window : IntervalSet
        default interval nap.IntervalSet(start = -0.0005, end = 0.001, time_units = 'ms')
        Limit waveform extraction before and after spike time
    spike_count : int
        default = 1000
        Number of spikes used per neuron for the calculation of waveforms

    Returns
    -------
    dictionary
        the waveforms for all neurons
    pandas.Series
        the channel with the maximum waveform for each neuron

    """
    if not isinstance(waveform_window, nap.IntervalSet):
        waveform_window = nap.IntervalSet(start=-0.5, end=1, time_units="ms")

    spikes = self.spikes
    if not os.path.exists(self.path):  # check if path exists
        print("The path " + self.path + " doesn't exist; Exiting ...")
        sys.exit()

    # Load XML INFO
    self.load_neurosuite_xml(self.path)
    n_channels = self.nChannels
    fs = self.fs_dat
    group_to_channel = self.group_to_channel
    group = spikes.get_info("group")

    # Check if there is an epoch, restrict spike times to epoch
    if epoch is not None:
        if type(epoch) is not nap.IntervalSet:
            print("Epoch must be an IntervalSet")
            sys.exit()
        else:
            print("Restricting spikes to epoch")
            spikes = spikes.restrict(epoch)
            epstart = int(epoch.as_units("s")["start"].values[0] * fs)
            epend = int(epoch.as_units("s")["end"].values[0] * fs)

    # Find dat file
    files = os.listdir(self.path)
    dat_files = np.sort([f for f in files if "dat" in f and f[0] != "."])

    # Need n_samples collected in the entire recording from dat file to load
    file = os.path.join(self.path, dat_files[0])
    f = open(
        file, "rb"
    )  # open file to get number of samples collected in the entire recording
    startoffile = f.seek(0, 0)
    endoffile = f.seek(0, 2)
    bytes_size = 2
    n_samples = int((endoffile - startoffile) / n_channels / bytes_size)
    f.close()
    # map to memory all samples for all channels, channels are numbered according to neuroscope number
    fp = np.memmap(file, np.int16, "r", shape=(n_samples, n_channels))

    # convert spike times to spikes in sample number
    sample_spikes = {
        neuron: (spikes[neuron].as_units("s").index.values * fs).astype("int")
        for neuron in spikes
    }

    # prep for waveforms
    overlap = int(
        waveform_window.tot_length(time_units="s")
    )  # one spike's worth of overlap between windows
    waveform_window = abs(np.array(waveform_window.as_units("s"))[0] * fs).astype(
        int
    )  # convert time to sample number
    neuron_waveforms = {
        n: np.zeros([np.sum(waveform_window), len(group_to_channel[group[n]])])
        for n in sample_spikes
    }

    # divide dat file into batches that slightly overlap for faster loading
    batch_size = 3000000
    windows = np.arange(0, int(endoffile / n_channels / bytes_size), batch_size)
    if epoch is not None:
        print("Restricting dat file to epoch")
        windows = windows[(windows >= epstart) & (windows <= epend)]
    batches = []
    for (
        i
    ) in windows:  # make overlapping batches from the beginning to end of recording
        if i == windows[-1]:  # the last batch cannot overlap with the next one
            batches.append([i, n_samples])
        else:
            batches.append([i, i + batch_size + overlap])
    batches = [np.int32(batch) for batch in batches]

    sample_counted_spikes = {}
    for index, neuron in enumerate(sample_spikes):
        if len(sample_spikes[neuron]) >= spike_count:
            sample_counted_spikes[neuron] = np.array(
                np.random.choice(list(sample_spikes[neuron]), spike_count)
            )
        elif len(sample_spikes[neuron]) < spike_count:
            print(
                "Not enough spikes in neuron " + str(index) + "... using all spikes"
            )
            sample_counted_spikes[neuron] = sample_spikes[neuron]

    # Make one array containing all selected spike times of all neurons - will be used to check for spikes before loading dat file
    spike_check = np.array(
        [
            int(spikes_neuron)
            for spikes_neuron in sample_counted_spikes[neuron]
            for neuron in sample_counted_spikes
        ]
    )

    for index, timestep in enumerate(batches):
        print(
            f"Extracting waveforms from dat file: window {index+1} / {len(windows)}",
            end="\r",
        )

        if (
            len(
                spike_check[
                    (timestep[0] < spike_check) & (timestep[1] > spike_check)
                ]
            )
            == 0
        ):
            continue  # if there are no spikes for any neurons in this batch, skip and go to the next one

        # Load dat file for timestep
        tmp = pd.DataFrame(
            data=fp[timestep[0] : timestep[1], :],
            columns=np.arange(n_channels),
            index=range(timestep[0], timestep[1]),
        )  # load dat file

        # Check if any spikes are present
        for neuron in sample_counted_spikes:
            neurontmp = sample_counted_spikes[neuron]
            tmp2 = neurontmp[(timestep[0] < neurontmp) & (timestep[1] > neurontmp)]
            if len(neurontmp) == 0:
                continue  # skip neuron if it has no spikes in this batch
            tmpn = tmp[
                group_to_channel[group[neuron]]
            ]  # restrict dat file to the channel group of the neuron

            for time in tmp2:  # add each spike waveform to neuron_waveform
                spikewindow = tmpn.loc[
                    time - waveform_window[0] : time + waveform_window[1] - 1
                ]  # waveform for this spike time
                try:
                    neuron_waveforms[neuron] += spikewindow.values
                except (
                    Exception
                ):  # ignore if full waveform is not present in this batch
                    pass

    meanwf = {
        n: pd.DataFrame(
            data=np.array(neuron_waveforms[n]) / spike_count,
            columns=np.arange(len(group_to_channel[group[n]])),
            index=np.array(np.arange(-waveform_window[0], waveform_window[1])) / fs,
        )
        for n in sample_counted_spikes
    }

    # find the max channel for each neuron
    maxch = pd.Series(
        data=[meanwf[n][meanwf[n].loc[0].idxmin()].name for n in meanwf],
        index=spikes.keys(),
    )

    return meanwf, maxch