1
0
mirror of https://github.com/sharkdp/bat.git synced 2025-09-02 03:12:25 +01:00

Extract some private submodules from 'bat::assets' (#1850)

This commit is contained in:
Martin Nordholts
2021-09-15 07:59:33 +02:00
committed by GitHub
parent 6226eba52a
commit e84b702309
7 changed files with 166 additions and 126 deletions

View File

@@ -0,0 +1,82 @@
use std::fs::File;
use std::path::Path;
use std::time::SystemTime;
use semver::Version;
use serde::{Deserialize, Serialize};
use crate::error::*;
#[derive(Debug, PartialEq, Default, Serialize, Deserialize)]
pub struct AssetsMetadata {
bat_version: Option<String>,
creation_time: Option<SystemTime>,
}
const FILENAME: &str = "metadata.yaml";
impl AssetsMetadata {
#[cfg(feature = "build-assets")]
pub(crate) fn new(current_version: &str) -> AssetsMetadata {
AssetsMetadata {
bat_version: Some(current_version.to_owned()),
creation_time: Some(SystemTime::now()),
}
}
#[cfg(feature = "build-assets")]
pub(crate) fn save_to_folder(&self, path: &Path) -> Result<()> {
let file = File::create(path.join(FILENAME))?;
serde_yaml::to_writer(file, self)?;
Ok(())
}
fn try_load_from_folder(path: &Path) -> Result<Self> {
let file = File::open(path.join(FILENAME))?;
Ok(serde_yaml::from_reader(file)?)
}
/// Load metadata about the stored cache file from the given folder.
///
/// There are several possibilities:
/// - We find a metadata.yaml file and are able to parse it
/// => return the contained information
/// - We find a metadata.yaml file and but are not able to parse it
/// => return a SerdeYamlError
/// - We do not find a metadata.yaml file but a syntaxes.bin or themes.bin file
/// => assume that these were created by an old version of bat and return
/// AssetsMetadata::default() without version information
/// - We do not find a metadata.yaml file and no cached assets
/// => no user provided assets are available, return None
pub fn load_from_folder(path: &Path) -> Result<Option<Self>> {
match Self::try_load_from_folder(path) {
Ok(metadata) => Ok(Some(metadata)),
Err(e) => {
if let Error::SerdeYamlError(_) = e {
Err(e)
} else if path.join("syntaxes.bin").exists() || path.join("themes.bin").exists() {
Ok(Some(Self::default()))
} else {
Ok(None)
}
}
}
}
pub fn is_compatible_with(&self, current_version: &str) -> bool {
let current_version =
Version::parse(current_version).expect("bat follows semantic versioning");
let stored_version = self
.bat_version
.as_ref()
.and_then(|ver| Version::parse(ver).ok());
if let Some(stored_version) = stored_version {
current_version.major == stored_version.major
&& current_version.minor == stored_version.minor
} else {
false
}
}
}

397
src/assets/build_assets.rs Normal file
View File

@@ -0,0 +1,397 @@
use std::collections::HashMap;
use std::path::Path;
use syntect::highlighting::ThemeSet;
use syntect::parsing::syntax_definition::{
ContextReference, MatchOperation, MatchPattern, Pattern, SyntaxDefinition,
};
use syntect::parsing::{Scope, SyntaxSet, SyntaxSetBuilder};
use crate::assets::*;
type SyntaxName = String;
/// Used to look up what dependencies a given [SyntaxDefinition] has
type SyntaxToDependencies = HashMap<SyntaxName, Vec<Dependency>>;
/// Used to look up which [SyntaxDefinition] corresponds to a given [Dependency]
type DependencyToSyntax<'a> = HashMap<Dependency, &'a SyntaxDefinition>;
/// Represents a dependency on an external `.sublime-syntax` file.
#[derive(Debug, Eq, PartialEq, Clone, Hash)]
enum Dependency {
/// By name. Example YAML: `include: C.sublime-syntax`
ByName(String),
/// By scope. Example YAML: `embed: scope:source.c`
ByScope(Scope),
}
pub fn build(
source_dir: &Path,
include_integrated_assets: bool,
target_dir: &Path,
current_version: &str,
) -> Result<()> {
let theme_set = build_theme_set(source_dir, include_integrated_assets);
let syntax_set_builder = build_syntax_set_builder(source_dir, include_integrated_assets)?;
let minimal_syntaxes = build_minimal_syntaxes(&syntax_set_builder, include_integrated_assets)?;
let syntax_set = syntax_set_builder.build();
print_unlinked_contexts(&syntax_set);
write_assets(
&theme_set,
&syntax_set,
&minimal_syntaxes,
target_dir,
current_version,
)
}
fn build_theme_set(source_dir: &Path, include_integrated_assets: bool) -> ThemeSet {
let mut theme_set = if include_integrated_assets {
crate::assets::get_integrated_themeset()
} else {
ThemeSet::new()
};
let theme_dir = source_dir.join("themes");
if theme_dir.exists() {
let res = theme_set.add_from_folder(&theme_dir);
if let Err(err) = res {
println!(
"Failed to load one or more themes from '{}' (reason: '{}')",
theme_dir.to_string_lossy(),
err,
);
}
} else {
println!(
"No themes were found in '{}', using the default set",
theme_dir.to_string_lossy()
);
}
theme_set
}
fn build_syntax_set_builder(
source_dir: &Path,
include_integrated_assets: bool,
) -> Result<SyntaxSetBuilder> {
let mut syntax_set_builder = if !include_integrated_assets {
let mut builder = syntect::parsing::SyntaxSetBuilder::new();
builder.add_plain_text_syntax();
builder
} else {
from_binary::<SyntaxSet>(get_serialized_integrated_syntaxset(), COMPRESS_SYNTAXES)
.into_builder()
};
let syntax_dir = source_dir.join("syntaxes");
if syntax_dir.exists() {
syntax_set_builder.add_from_folder(syntax_dir, true)?;
} else {
println!(
"No syntaxes were found in '{}', using the default set.",
syntax_dir.to_string_lossy()
);
}
Ok(syntax_set_builder)
}
fn print_unlinked_contexts(syntax_set: &SyntaxSet) {
let missing_contexts = syntax_set.find_unlinked_contexts();
if !missing_contexts.is_empty() {
println!("Some referenced contexts could not be found!");
for context in missing_contexts {
println!("- {}", context);
}
}
}
fn write_assets(
theme_set: &ThemeSet,
syntax_set: &SyntaxSet,
minimal_syntaxes: &MinimalSyntaxes,
target_dir: &Path,
current_version: &str,
) -> Result<()> {
let _ = std::fs::create_dir_all(target_dir);
asset_to_cache(
theme_set,
&target_dir.join("themes.bin"),
"theme set",
COMPRESS_THEMES,
)?;
asset_to_cache(
syntax_set,
&target_dir.join("syntaxes.bin"),
"syntax set",
COMPRESS_SYNTAXES,
)?;
asset_to_cache(
minimal_syntaxes,
&target_dir.join("minimal_syntaxes.bin"),
"minimal syntax sets",
COMPRESS_MINIMAL_SYNTAXES,
)?;
print!(
"Writing metadata to folder {} ... ",
target_dir.to_string_lossy()
);
crate::assets_metadata::AssetsMetadata::new(current_version).save_to_folder(target_dir)?;
println!("okay");
Ok(())
}
fn print_syntax_set_names(syntax_set: &SyntaxSet) {
let names = syntax_set
.syntaxes()
.iter()
.map(|syntax| &syntax.name)
.collect::<Vec<_>>();
println!("{:?}", names);
}
fn build_minimal_syntaxes(
syntax_set_builder: &'_ SyntaxSetBuilder,
include_integrated_assets: bool,
) -> Result<MinimalSyntaxes> {
let mut minimal_syntaxes = MinimalSyntaxes {
by_name: HashMap::new(),
serialized_syntax_sets: vec![],
};
if include_integrated_assets {
// Dependency info is not present in integrated assets, so we can't
// calculate minimal syntax sets. Return early without any data filled
// in. This means that no minimal syntax sets will be available to use, and
// the full, slow-to-deserialize, fallback syntax set will be used instead.
return Ok(minimal_syntaxes);
}
let minimal_syntax_sets_to_serialize = build_minimal_syntax_sets(syntax_set_builder)
// For now, only store syntax sets with one syntax, otherwise
// the binary grows by several megs
.filter(|syntax_set| syntax_set.syntaxes().len() == 1);
for minimal_syntax_set in minimal_syntax_sets_to_serialize {
// Remember what index it is found at
let current_index = minimal_syntaxes.serialized_syntax_sets.len();
for syntax in minimal_syntax_set.syntaxes() {
minimal_syntaxes
.by_name
.insert(syntax.name.to_ascii_lowercase().clone(), current_index);
}
let serialized_syntax_set = asset_to_contents(
&minimal_syntax_set,
&format!("failed to serialize minimal syntax set {}", current_index),
COMPRESS_SERIALIZED_MINIMAL_SYNTAXES,
)?;
// Add last so that it ends up at `current_index`
minimal_syntaxes
.serialized_syntax_sets
.push(serialized_syntax_set);
}
Ok(minimal_syntaxes)
}
/// Analyzes dependencies between syntaxes in a [SyntaxSetBuilder].
/// From that, it builds minimal [SyntaxSet]s.
fn build_minimal_syntax_sets(
syntax_set_builder: &'_ SyntaxSetBuilder,
) -> impl Iterator<Item = SyntaxSet> + '_ {
let syntaxes = syntax_set_builder.syntaxes();
// Build the data structures we need for dependency resolution
let (syntax_to_dependencies, dependency_to_syntax) = generate_maps(syntaxes);
// Create one minimal SyntaxSet from each (non-hidden) SyntaxDefinition
syntaxes.iter().filter_map(move |syntax| {
if syntax.hidden {
return None;
}
let mut builder = SyntaxSetDependencyBuilder::new();
builder.add_with_dependencies(syntax, &syntax_to_dependencies, &dependency_to_syntax);
let syntax_set = builder.build();
if std::env::var("BAT_PRINT_SYNTAX_DEPENDENCIES").is_ok() {
// To trigger this code, run:
// BAT_PRINT_SYNTAX_DEPENDENCIES=1 cargo run -- cache --build --source assets --blank --target /tmp
print_syntax_set_names(&syntax_set);
}
Some(syntax_set)
})
}
/// In order to analyze dependencies, we need two key pieces of data.
/// First, when we have a [Dependency], we need to know what [SyntaxDefinition] that
/// corresponds to. Second, when we have a [SyntaxDefinition], we need to know
/// what dependencies it has. This functions generates that data for each syntax.
fn generate_maps(syntaxes: &[SyntaxDefinition]) -> (SyntaxToDependencies, DependencyToSyntax) {
let mut syntax_to_dependencies = HashMap::new();
let mut dependency_to_syntax = HashMap::new();
for syntax in syntaxes {
syntax_to_dependencies.insert(syntax.name.clone(), dependencies_for_syntax(syntax));
dependency_to_syntax.insert(Dependency::ByName(syntax.name.clone()), syntax);
dependency_to_syntax.insert(Dependency::ByScope(syntax.scope), syntax);
}
(syntax_to_dependencies, dependency_to_syntax)
}
/// Gets what external dependencies a given [SyntaxDefinition] has.
/// An external dependency is another `.sublime-syntax` file.
/// It does that by looking for variants of the following YAML patterns:
/// - `include: C.sublime-syntax`
/// - `embed: scope:source.c`
fn dependencies_for_syntax(syntax: &SyntaxDefinition) -> Vec<Dependency> {
let mut dependencies: Vec<Dependency> = syntax
.contexts
.values()
.flat_map(|context| &context.patterns)
.flat_map(dependencies_from_pattern)
.collect();
// No need to track a dependency more than once
dependencies.dedup();
dependencies
}
fn dependencies_from_pattern(pattern: &Pattern) -> Vec<Dependency> {
match *pattern {
Pattern::Match(MatchPattern {
operation: MatchOperation::Push(ref context_references),
..
}) => context_references
.iter()
.map(dependency_from_context_reference)
.collect(),
Pattern::Include(ref context_reference) => {
vec![dependency_from_context_reference(context_reference)]
}
_ => vec![],
}
.into_iter()
.flatten()
.collect()
}
fn dependency_from_context_reference(context_reference: &ContextReference) -> Option<Dependency> {
match &context_reference {
ContextReference::File { ref name, .. } => Some(Dependency::ByName(name.clone())),
ContextReference::ByScope { ref scope, .. } => Some(Dependency::ByScope(*scope)),
_ => None,
}
}
/// Helper to construct a [SyntaxSetBuilder] that contains only [SyntaxDefinition]s
/// that have dependencies among them.
struct SyntaxSetDependencyBuilder {
syntax_set_builder: SyntaxSetBuilder,
}
impl SyntaxSetDependencyBuilder {
fn new() -> Self {
SyntaxSetDependencyBuilder {
syntax_set_builder: SyntaxSetBuilder::new(),
}
}
/// Add a [SyntaxDefinition] to the underlying [SyntaxSetBuilder].
/// Also resolve any dependencies it has and add those [SyntaxDefinition]s too.
/// This is a recursive process.
fn add_with_dependencies(
&mut self,
syntax: &SyntaxDefinition,
syntax_to_dependencies: &SyntaxToDependencies,
dependency_to_syntax: &DependencyToSyntax,
) {
let name = &syntax.name;
if self.is_syntax_already_added(name) {
return;
}
self.syntax_set_builder.add(syntax.clone());
let dependencies = syntax_to_dependencies.get(name);
if dependencies.is_none() {
eprintln!("ERROR: Unknown dependencies for {}", name);
return;
}
for dependency in dependencies.unwrap() {
if let Some(syntax_definition_dependency) = dependency_to_syntax.get(dependency) {
self.add_with_dependencies(
syntax_definition_dependency,
syntax_to_dependencies,
dependency_to_syntax,
)
}
}
}
fn is_syntax_already_added(&self, name: &str) -> bool {
self.syntax_set_builder
.syntaxes()
.iter()
.any(|syntax| syntax.name == name)
}
fn build(self) -> SyntaxSet {
self.syntax_set_builder.build()
}
}
fn asset_to_contents<T: serde::Serialize>(
asset: &T,
description: &str,
compressed: bool,
) -> Result<Vec<u8>> {
let mut contents = vec![];
if compressed {
bincode::serialize_into(
flate2::write::ZlibEncoder::new(&mut contents, flate2::Compression::best()),
asset,
)
} else {
bincode::serialize_into(&mut contents, asset)
}
.map_err(|_| format!("Could not serialize {}", description))?;
Ok(contents)
}
fn asset_to_cache<T: serde::Serialize>(
asset: &T,
path: &Path,
description: &str,
compressed: bool,
) -> Result<()> {
print!("Writing {} to {} ... ", description, path.to_string_lossy());
let contents = asset_to_contents(asset, description, compressed)?;
std::fs::write(path, &contents[..]).map_err(|_| {
format!(
"Could not save {} to {}",
description,
path.to_string_lossy()
)
})?;
println!("okay");
Ok(())
}

View File

@@ -0,0 +1,42 @@
use std::ffi::OsStr;
use std::path::Path;
use crate::error::*;
const IGNORED_SUFFIXES: [&str; 13] = [
// Editor etc backups
"~",
".bak",
".old",
".orig",
// Debian and derivatives apt/dpkg/ucf backups
".dpkg-dist",
".dpkg-old",
".ucf-dist",
".ucf-new",
".ucf-old",
// Red Hat and derivatives rpm backups
".rpmnew",
".rpmorig",
".rpmsave",
// Build system input/template files
".in",
];
/// If we find an ignored suffix on the file name, e.g. '~', we strip it and
/// then try again without it.
pub fn try_with_stripped_suffix<T, F>(file_name: &OsStr, func: F) -> Result<Option<T>>
where
F: Fn(&OsStr) -> Result<Option<T>>,
{
let mut from_stripped = None;
if let Some(file_str) = Path::new(file_name).to_str() {
for suffix in &IGNORED_SUFFIXES {
if let Some(stripped_filename) = file_str.strip_suffix(suffix) {
from_stripped = func(OsStr::new(stripped_filename))?;
break;
}
}
}
Ok(from_stripped)
}

View File

@@ -0,0 +1,72 @@
use std::collections::HashMap;
use lazycell::LazyCell;
use syntect::parsing::SyntaxSet;
use super::*;
#[derive(Debug)]
pub(crate) struct MinimalAssets {
minimal_syntaxes: MinimalSyntaxes,
/// Lazily load serialized [SyntaxSet]s from [Self.minimal_syntaxes]. The
/// index in this vec matches the index in
/// [Self.minimal_syntaxes.serialized_syntax_sets]
deserialized_minimal_syntaxes: Vec<LazyCell<SyntaxSet>>,
}
/// Stores and allows lookup of minimal [SyntaxSet]s. The [SyntaxSet]s are
/// stored in serialized form, and are deserialized on-demand. This gives good
/// startup performance since only the necessary [SyntaxReference]s needs to be
/// deserialized.
#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]
pub(crate) struct MinimalSyntaxes {
/// Lookup the index into `serialized_syntax_sets` of a [SyntaxSet] by the
/// name of any [SyntaxReference] inside the [SyntaxSet]
/// (We will later add `by_extension`, `by_first_line`, etc.)
pub(crate) by_name: HashMap<String, usize>,
/// Serialized [SyntaxSet]s. Whether or not this data is compressed is
/// decided by [COMPRESS_SERIALIZED_MINIMAL_SYNTAXES]
pub(crate) serialized_syntax_sets: Vec<Vec<u8>>,
}
impl MinimalAssets {
pub(crate) fn new(minimal_syntaxes: MinimalSyntaxes) -> Self {
// Prepare so we can lazily load minimal syntaxes without a mut reference
let deserialized_minimal_syntaxes =
vec![LazyCell::new(); minimal_syntaxes.serialized_syntax_sets.len()];
Self {
minimal_syntaxes,
deserialized_minimal_syntaxes,
}
}
pub fn get_syntax_set_by_name(&self, name: &str) -> Option<&SyntaxSet> {
self.minimal_syntaxes
.by_name
.get(&name.to_ascii_lowercase())
.and_then(|index| self.get_minimal_syntax_set_with_index(*index))
}
fn load_minimal_syntax_set_with_index(&self, index: usize) -> Result<SyntaxSet> {
let serialized_syntax_set = &self.minimal_syntaxes.serialized_syntax_sets[index];
asset_from_contents(
&serialized_syntax_set[..],
&format!("minimal syntax set {}", index),
COMPRESS_SERIALIZED_MINIMAL_SYNTAXES,
)
.map_err(|_| format!("Could not parse minimal syntax set {}", index).into())
}
fn get_minimal_syntax_set_with_index(&self, index: usize) -> Option<&SyntaxSet> {
self.deserialized_minimal_syntaxes
.get(index)
.and_then(|cell| {
cell.try_borrow_with(|| self.load_minimal_syntax_set_with_index(index))
.ok()
})
}
}

View File

@@ -0,0 +1,27 @@
use std::path::PathBuf;
use syntect::parsing::SyntaxSet;
use super::*;
/// A SyntaxSet in serialized form, i.e. bincoded and flate2 compressed.
/// We keep it in this format since we want to load it lazily.
#[derive(Debug)]
pub enum SerializedSyntaxSet {
/// The data comes from a user-generated cache file.
FromFile(PathBuf),
/// The data to use is embedded into the bat binary.
FromBinary(&'static [u8]),
}
impl SerializedSyntaxSet {
pub fn deserialize(&self) -> Result<SyntaxSet> {
match self {
SerializedSyntaxSet::FromBinary(data) => Ok(from_binary(data, COMPRESS_SYNTAXES)),
SerializedSyntaxSet::FromFile(ref path) => {
asset_from_cache(path, "syntax set", COMPRESS_SYNTAXES)
}
}
}
}