Closed yzobus closed 2 months ago
Can you share a workbook?
Sure: dragged_formula.xlsx
Only Field A2 and A3 are recognized as formula in this case.
f
tag with no specify value, can be calculated with reference to the shared formula.
consider a worksheet xml below
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<worksheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main"
xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships"
xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" mc:Ignorable="x14ac xr xr2 xr3"
xmlns:x14ac="http://schemas.microsoft.com/office/spreadsheetml/2009/9/ac"
xmlns:xr="http://schemas.microsoft.com/office/spreadsheetml/2014/revision"
xmlns:xr2="http://schemas.microsoft.com/office/spreadsheetml/2015/revision2"
xmlns:xr3="http://schemas.microsoft.com/office/spreadsheetml/2016/revision3" xr:uid="{00000000-0001-0000-0000-000000000000}">
<dimension ref="A1:B11"/>
<sheetViews>
<sheetView tabSelected="1" workbookViewId="0">
<selection activeCell="B1" sqref="B1:B11"/>
</sheetView>
</sheetViews>
<sheetFormatPr baseColWidth="10" defaultColWidth="8.83203125" defaultRowHeight="15"/>
<sheetData>
<row r="1" spans="1:2">
<c r="A1">
<v>1</v>
</c>
<c r="B1">
<f>A1</f>
<v>1</v>
</c>
</row>
<row r="2" spans="1:2">
<c r="B2">
<f t="shared" ref="B2:B11" si="0">A2</f>
<v>0</v>
</c>
</row>
<row r="3" spans="1:2">
<c r="B3">
<f t="shared" si="0"/>
<v>0</v>
</c>
</row>
<row r="4" spans="1:2">
<c r="B4">
<f t="shared" si="0"/>
<v>0</v>
</c>
</row>
<row r="5" spans="1:2">
<c r="B5">
<f t="shared" si="0"/>
<v>0</v>
</c>
</row>
<row r="6" spans="1:2">
<c r="B6">
<f t="shared" si="0"/>
<v>0</v>
</c>
</row>
<row r="7" spans="1:2">
<c r="B7">
<f t="shared" si="0"/>
<v>0</v>
</c>
</row>
<row r="8" spans="1:2">
<c r="B8">
<f t="shared" si="0"/>
<v>0</v>
</c>
</row>
<row r="9" spans="1:2">
<c r="B9">
<f t="shared" si="0"/>
<v>0</v>
</c>
</row>
<row r="10" spans="1:2">
<c r="B10">
<f t="shared" si="0"/>
<v>0</v>
</c>
</row>
<row r="11" spans="1:2">
<c r="B11">
<f t="shared" si="0"/>
<v>0</v>
</c>
</row>
</sheetData>
<phoneticPr fontId="1" type="noConversion"/>
<pageMargins left="0.7" right="0.7" top="0.75" bottom="0.75" header="0.3" footer="0.3"/>
<f t="shared" ref="B2:B11" si="0">A2</f>
f
tag ablove is the orgrinal formula cell.
cells in row or column can be select to take this cell as reference, and new formula can be calculated, offset will be applied.
helper function
/// Convert the integer to Excelsheet column title.
/// If the column number not in 1~16384, an Error is returned.
pub(crate) fn column_number_to_name(num: u32) -> Result<String, XlsxError> {
if num < 1 || num > MAX_COLUMNS {
return Err(XlsxError::Unexpected("column number overflow"));
}
let mut col: Vec<u8> = Vec::new();
let mut num = num;
while num > 0 {
let integer: u8 = (num as u8 - 1) % 26 + 65;
col.push(integer);
num = (num - 1) / 26;
}
col.reverse();
match String::from_utf8(col) {
Ok(s) => Ok(s),
Err(_) => Err(XlsxError::NumericColumn(num as u8)),
}
}
pub(crate) fn dimension_to_title(cell: (u32, u32)) -> Result<String, XlsxError> {
let col = column_number_to_name(cell.0)?;
Ok(format!("{col}{}", cell.1).to_owned())
}
shared formula should be add to xlsxcellreader instance
pub struct XlsxCellReader<'a> {
xml: XlReader<'a>,
strings: &'a [String],
formats: &'a [CellFormat],
is_1904: bool,
dimensions: Dimensions,
row_index: u32,
col_index: u32,
buf: Vec<u8>,
cell_buf: Vec<u8>,
formulas: Vec<Option<(String, HashMap<String, (i32, i32)>)>>,
}
shared formula related code should be added in next_formula
method.
pub fn next_formula(&mut self) -> Result<Option<Cell<String>>, XlsxError> {
loop {
self.buf.clear();
match self.xml.read_event_into(&mut self.buf) {
Ok(Event::Start(ref row_element))
if row_element.local_name().as_ref() == b"row" =>
{
let attribute = get_attribute(row_element.attributes(), QName(b"r"))?;
if let Some(range) = attribute {
let row = get_row(range)?;
self.row_index = row;
}
}
Ok(Event::End(ref row_element)) if row_element.local_name().as_ref() == b"row" => {
self.row_index += 1;
self.col_index = 0;
}
Ok(Event::Start(ref c_element)) if c_element.local_name().as_ref() == b"c" => {
let attribute = get_attribute(c_element.attributes(), QName(b"r"))?;
let pos = if let Some(range) = attribute {
let (row, col) = get_row_column(range)?;
self.col_index = col;
(row, col)
} else {
(self.row_index, self.col_index)
};
let mut value = None;
loop {
self.cell_buf.clear();
match self.xml.read_event_into(&mut self.cell_buf) {
Ok(Event::Start(ref e)) => {
let mut offset_map: HashMap<String, (i32, i32)> = HashMap::new();
let mut shared_index = None;
let mut shared_ref = None;
let shared =
get_attribute(e.attributes(), QName(b"t")).unwrap_or(None);
match shared {
Some(b"shared") => {
shared_index = Some(
String::from_utf8(
get_attribute(e.attributes(), QName(b"si"))?
.unwrap()
.to_vec(),
)
.unwrap()
.parse::<u32>()?,
);
match get_attribute(e.attributes(), QName(b"ref"))? {
Some(res) => {
let reference = get_dimension(res)?;
if reference.start.0 != reference.end.0 {
for i in
0..=(reference.end.0 - reference.start.0)
{
offset_map.insert(
dimension_to_title((
reference.start.0 + i,
reference.start.1,
))?,
(
(reference.start.0 as i64
- pos.0 as i64
+ i as i64)
as i32,
0,
),
);
}
} else if reference.start.1 != reference.end.1 {
for i in
0..=(reference.end.1 - reference.start.1)
{
offset_map.insert(
dimension_to_title((
reference.start.0,
reference.start.1 + i,
))?,
(
0,
(reference.start.1 as i64
- pos.1 as i64
+ i as i64)
as i32,
),
);
}
}
shared_ref = Some(reference);
}
None => {}
}
}
_ => {}
}
if let Some(f) = read_formula(&mut self.xml, e)? {
value = Some(f.clone());
if shared_index.is_some() && shared_ref.is_some() {
// original shared formula
while self.formulas.len() < shared_index.unwrap() as usize {
self.formulas.push(None);
}
self.formulas.push(Some((f, offset_map)));
}
}
if shared_index.is_some() && shared_ref.is_none() {
// shared formula
let cell_regex = Regex::new(r"[A-Z]+[0-9]+").unwrap();
if let Some((f, offset)) =
self.formulas[shared_index.unwrap() as usize].clone()
{
let cells = cell_regex
.find_iter(f.as_str())
.map(|x| get_row_column(x.as_str().as_bytes()));
let mut template = cell_regex
.replace_all(f.as_str(), r"\uffff")
.into_owned();
let ffff_regex = Regex::new(r"\\uffff").unwrap();
for res in cells {
match res {
Ok(cell) => {
let (row, col) = offset
.get(&dimension_to_title(pos)?)
.unwrap();
// calculate new formula cell pos
let name = dimension_to_title((
(cell.0 as i64 + *row as i64) as u32,
(cell.1 as i64 + *col as i64) as u32,
))?;
template = ffff_regex
.replace(&template, name.as_str())
.into_owned();
}
Err(_) => {}
};
}
value = Some(template.clone());
};
}
}
Ok(Event::End(ref e)) if e.local_name().as_ref() == b"c" => break,
Ok(Event::Eof) => return Err(XlsxError::XmlEof("c")),
Err(e) => return Err(XlsxError::Xml(e)),
_ => (),
}
}
self.col_index += 1;
return Ok(Some(Cell::new(pos, value.unwrap_or_default())));
}
Ok(Event::End(ref e)) if e.local_name().as_ref() == b"sheetData" => {
return Ok(None);
}
Ok(Event::Eof) => return Err(XlsxError::XmlEof("sheetData")),
Err(e) => return Err(XlsxError::Xml(e)),
_ => (),
}
}
}
code above imply the original formula cell is always the first to be found, which is not always the case. but reference the formula at the end need a full scan of xlsxcellreaderm. I dont find a effeicent way to do this.
There is seemingly still a problem in detecting formulas when they are created via dragging another formula.
For example, I set A1 to 1 and define a formula that is "=A1 + 1" in the field A2 and drag it down to field A5. In the xlsx file on can see that in all fields from A2:A5, a formula is defined, however, only the ones in A2 and A3 are detected.
The reason for this seems to lie in the formatting of the xml file, whose sheetdata will look like this:
From this view, the output of the worksheet_formula functions makes sense, because there is no value stored between and , but a defined shard functionality.
I think it would be nice to also detect formulas which are created by dragging, however, it also seems to be a nightmare to correctly get the formulas